Backed out 4 changesets (bug 1854912) for causing failures in sub-sample-scheduling.html

Backed out changeset 6d23b44c6fc0 (bug 1854912) Backed out changeset 3719cb3c102a (bug 1854912) Backed out changeset 7fe68331b4e9 (bug 1854912) Backed out changeset 1715d7aafa06 (bug 1854912)
2024-11-23 12:51:06 +00:00 · 2023-10-02 21:19:40 +03:00 · 2023-10-02 21:19:40 +03:00 · 99f938e4bd
commit 99f938e4bd
parent 26e0b0be0a
14 changed files with 374 additions and 382 deletions
--- a/media/libspeex_resampler/02_simd-detect-runtime.patch
+++ b/media/libspeex_resampler/02_simd-detect-runtime.patch
@ -1,37 +1,22 @@
 diff --git a/src/resample.c b/src/resample.c
 --- a/src/resample.c
 +++ b/src/resample.c
-@@ -91,23 +91,17 @@ static void speex_free(void *ptr) {free(
- #ifndef NULL
- #define NULL 0
+@@ -94,13 +94,7 @@ static void speex_free (void *ptr) {free(ptr);}
+ #define UINT32_MAX 4294967296U
 #endif
 
- #ifndef UINT32_MAX
- #define UINT32_MAX 4294967295U
- #endif
- 
-#ifdef USE_SSE
+-#ifdef _USE_SSE
 -#include "resample_sse.h"
 -#endif
 -
-#ifdef USE_NEON
+-#ifdef _USE_NEON
 -#include "resample_neon.h"
 -#endif
 +#include "simd_detect.h"
 
- /* Number of elements to allocate on the stack */
+ /* Numer of elements to allocate on the stack */
 #ifdef VAR_ARRAYS
- #define FIXED_STACK_ALLOC 8192
- #else
- #define FIXED_STACK_ALLOC 1024
- #endif
- 
-@@ -341,17 +335,19 @@ static int resampler_basic_direct_single
-    const spx_uint32_t den_rate = st->den_rate;
-    spx_word32_t sum;
- 
-    while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
-    {
+@@ -346,7 +340,9 @@ static int resampler_basic_direct_single(SpeexResamplerState *st, spx_uint32_t c
       const spx_word16_t *sinct = & sinc_table[samp_frac_num*N];
       const spx_word16_t *iptr = & in[last_sample];
 
@ -42,17 +27,7 @@ diff --git a/src/resample.c b/src/resample.c
       int j;
       sum = 0;
       for(j=0;j<N;j++) sum += MULT16_16(sinct[j], iptr[j]);
- 
- /*    This code is slower on most DSPs which have only 2 accumulators.
-       Plus this this forces truncation to 32 bits and you lose the HW guard bits.
-       I think we can trust the compiler and let it vectorize and/or unroll itself.
-       spx_word32_t accum[4] = {0,0,0,0};
-@@ -359,18 +355,20 @@ static int resampler_basic_direct_single
-         accum[0] += MULT16_16(sinct[j], iptr[j]);
-         accum[1] += MULT16_16(sinct[j+1], iptr[j+1]);
-         accum[2] += MULT16_16(sinct[j+2], iptr[j+2]);
-         accum[3] += MULT16_16(sinct[j+3], iptr[j+3]);
-       }
+@@ -364,8 +360,10 @@ static int resampler_basic_direct_single(SpeexResamplerState *st, spx_uint32_t c
       sum = accum[0] + accum[1] + accum[2] + accum[3];
 */
       sum = SATURATE32PSHR(sum, 15, 32767);
@ -64,17 +39,7 @@ diff --git a/src/resample.c b/src/resample.c
 #endif
 
       out[out_stride * out_sample++] = sum;
-       last_sample += int_advance;
-       samp_frac_num += frac_advance;
-       if (samp_frac_num >= den_rate)
-       {
-          samp_frac_num -= den_rate;
-@@ -399,29 +397,33 @@ static int resampler_basic_direct_double
-    const spx_uint32_t den_rate = st->den_rate;
-    double sum;
- 
-    while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
-    {
+@@ -404,7 +402,9 @@ static int resampler_basic_direct_double(SpeexResamplerState *st, spx_uint32_t c
       const spx_word16_t *sinct = & sinc_table[samp_frac_num*N];
       const spx_word16_t *iptr = & in[last_sample];
 
@ -85,10 +50,7 @@ diff --git a/src/resample.c b/src/resample.c
       int j;
       double accum[4] = {0,0,0,0};
 
-       for(j=0;j<N;j+=4) {
-         accum[0] += sinct[j]*iptr[j];
-         accum[1] += sinct[j+1]*iptr[j+1];
-         accum[2] += sinct[j+2]*iptr[j+2];
+@@ -415,8 +415,10 @@ static int resampler_basic_direct_double(SpeexResamplerState *st, spx_uint32_t c
         accum[3] += sinct[j+3]*iptr[j+3];
       }
       sum = accum[0] + accum[1] + accum[2] + accum[3];
@ -100,17 +62,7 @@ diff --git a/src/resample.c b/src/resample.c
 #endif
 
       out[out_stride * out_sample++] = PSHR32(sum, 15);
-       last_sample += int_advance;
-       samp_frac_num += frac_advance;
-       if (samp_frac_num >= den_rate)
-       {
-          samp_frac_num -= den_rate;
-@@ -455,34 +457,38 @@ static int resampler_basic_interpolate_s
- #ifdef FIXED_POINT
-       const spx_word16_t frac = PDIV32(SHL32((samp_frac_num*st->oversample) % st->den_rate,15),st->den_rate);
- #else
-       const spx_word16_t frac = ((float)((samp_frac_num*st->oversample) % st->den_rate))/st->den_rate;
- #endif
+@@ -460,7 +462,9 @@ static int resampler_basic_interpolate_single(SpeexResamplerState *st, spx_uint3
       spx_word16_t interp[4];
 
 
@ -121,16 +73,9 @@ diff --git a/src/resample.c b/src/resample.c
       int j;
       spx_word32_t accum[4] = {0,0,0,0};
 
-       for(j=0;j<N;j++) {
-         const spx_word16_t curr_in=iptr[j];
-         accum[0] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-2]);
-         accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]);
-         accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]);
-         accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]);
-       }
- 
+@@ -475,9 +479,11 @@ static int resampler_basic_interpolate_single(SpeexResamplerState *st, spx_uint3
       cubic_coef(frac, interp);
-       sum = MULT16_32_Q15(interp[0],accum[0]) + MULT16_32_Q15(interp[1],accum[1]) + MULT16_32_Q15(interp[2],accum[2]) + MULT16_32_Q15(interp[3],accum[3]);
+       sum = MULT16_32_Q15(interp[0],SHR32(accum[0], 1)) + MULT16_32_Q15(interp[1],SHR32(accum[1], 1)) + MULT16_32_Q15(interp[2],SHR32(accum[2], 1)) + MULT16_32_Q15(interp[3],SHR32(accum[3], 1));
       sum = SATURATE32PSHR(sum, 15, 32767);
 -#else
 +#ifdef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE
@ -141,17 +86,7 @@ diff --git a/src/resample.c b/src/resample.c
 #endif
 
       out[out_stride * out_sample++] = sum;
-       last_sample += int_advance;
-       samp_frac_num += frac_advance;
-       if (samp_frac_num >= den_rate)
-       {
-          samp_frac_num -= den_rate;
-@@ -518,33 +524,37 @@ static int resampler_basic_interpolate_d
- #ifdef FIXED_POINT
-       const spx_word16_t frac = PDIV32(SHL32((samp_frac_num*st->oversample) % st->den_rate,15),st->den_rate);
- #else
-       const spx_word16_t frac = ((float)((samp_frac_num*st->oversample) % st->den_rate))/st->den_rate;
- #endif
+@@ -523,7 +529,9 @@ static int resampler_basic_interpolate_double(SpeexResamplerState *st, spx_uint3
       spx_word16_t interp[4];
 
 
@ -162,13 +97,7 @@ diff --git a/src/resample.c b/src/resample.c
       int j;
       double accum[4] = {0,0,0,0};
 
-       for(j=0;j<N;j++) {
-         const double curr_in=iptr[j];
-         accum[0] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-2]);
-         accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]);
-         accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]);
-         accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]);
-       }
+@@ -537,9 +545,11 @@ static int resampler_basic_interpolate_double(SpeexResamplerState *st, spx_uint3
 
       cubic_coef(frac, interp);
       sum = MULT16_32_Q15(interp[0],accum[0]) + MULT16_32_Q15(interp[1],accum[1]) + MULT16_32_Q15(interp[2],accum[2]) + MULT16_32_Q15(interp[3],accum[3]);
@ -181,15 +110,11 @@ diff --git a/src/resample.c b/src/resample.c
 #endif
 
       out[out_stride * out_sample++] = PSHR32(sum,15);
-       last_sample += int_advance;
-       samp_frac_num += frac_advance;
-       if (samp_frac_num >= den_rate)
-       {
-          samp_frac_num -= den_rate;
 diff --git a/src/resample_neon.c b/src/resample_neon.c
 --- a/src/resample_neon.c
 +++ b/src/resample_neon.c
-@@ -32,16 +32,17 @@
+@@ -31,16 +31,18 @@
+    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
@ -197,46 +122,27 @@ diff --git a/src/resample_neon.c b/src/resample_neon.c
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
- #include <stdint.h>
 +#include "simd_detect.h"
+
+ #include <arm_neon.h>
 
 #ifdef FIXED_POINT
- #if defined(__aarch64__)
+ #ifdef __thumb2__
 static inline int32_t saturate_32bit_to_16bit(int32_t a) {
     int32_t ret;
-     asm ("fmov s0, %w[a]\n"
-          "sqxtn h0, s0\n"
-          "sxtl v0.4s, v0.4h\n"
-@@ -73,17 +74,17 @@
+     asm ("ssat %[ret], #16, %[a]"
+          : [ret] "=&r" (ret)
+@@ -60,17 +62,17 @@ static inline int32_t saturate_32bit_to_
+     return ret;
 }
 #endif
 #undef WORD2INT
 #define WORD2INT(x) (saturate_32bit_to_16bit(x))
 
 #define OVERRIDE_INNER_PRODUCT_SINGLE
- /* Only works when len % 4 == 0 and len >= 4 */
- #if defined(__aarch64__)
+ /* Only works when len % 4 == 0 */
 -static inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
-+inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
- {
-     int32_t ret;
-     uint32_t remainder = len % 16;
-     len = len - remainder;
- 
-     asm volatile ("	 cmp %w[len], #0\n"
- 		  "	 b.ne 1f\n"
- 		  "	 ld1 {v16.4h}, [%[b]], #8\n"
-@@ -128,17 +129,17 @@
- 		  : [ret] "=r" (ret), [a] "+r" (a), [b] "+r" (b),
- 		    [len] "+r" (len), [remainder] "+r" (remainder)
- 		  :
- 		  : "cc", "v0",
- 		    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23");
-     return ret;
- }
- #else
-static inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
-+inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
+int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
 {
     int32_t ret;
     uint32_t remainder = len % 16;
@ -245,36 +151,17 @@ diff --git a/src/resample_neon.c b/src/resample_neon.c
     asm volatile ("	 cmp %[len], #0\n"
 		  "	 bne 1f\n"
 		  "	 vld1.16 {d16}, [%[b]]!\n"
-@@ -218,17 +219,17 @@
- #endif
- 
+@@ -134,17 +136,17 @@ static inline int32_t saturate_float_to_
+          : "q0");
+     return ret;
+ }
 #undef WORD2INT
 #define WORD2INT(x) (saturate_float_to_16bit(x))
 
 #define OVERRIDE_INNER_PRODUCT_SINGLE
- /* Only works when len % 4 == 0 and len >= 4 */
- #if defined(__aarch64__)
+ /* Only works when len % 4 == 0 */
 -static inline float inner_product_single(const float *a, const float *b, unsigned int len)
-+inline float inner_product_single(const float *a, const float *b, unsigned int len)
- {
-     float ret;
-     uint32_t remainder = len % 16;
-     len = len - remainder;
- 
-     asm volatile ("	 cmp %w[len], #0\n"
- 		  "	 b.ne 1f\n"
- 		  "	 ld1 {v16.4s}, [%[b]], #16\n"
-@@ -273,17 +274,17 @@
- 		  : [ret] "=w" (ret), [a] "+r" (a), [b] "+r" (b),
- 		    [len] "+r" (len), [remainder] "+r" (remainder)
- 		  :
- 		  : "cc", "v1", "v2", "v3", "v4",
- 		    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23");
-     return ret;
- }
- #else
-static inline float inner_product_single(const float *a, const float *b, unsigned int len)
-+inline float inner_product_single(const float *a, const float *b, unsigned int len)
+float inner_product_single(const float *a, const float *b, unsigned int len)
 {
     float ret;
     uint32_t remainder = len % 16;
@ -333,7 +220,7 @@ diff --git a/src/resample_sse.c b/src/resample_sse.c
    return ret;
 }
 
- #ifdef USE_SSE2
+ #ifdef _USE_SSE2
 #include <emmintrin.h>
 #define OVERRIDE_INNER_PRODUCT_DOUBLE
 
--- a/media/libspeex_resampler/04_hugemem.patch
+++ b/media/libspeex_resampler/04_hugemem.patch
@ -14,13 +14,13 @@ diff --git a/src/resample.c b/src/resample.c
 +
 #ifdef OUTSIDE_SPEEX
 #include <stdlib.h>
- static void *speex_alloc(int size) {return calloc(size,1);}
- static void *speex_realloc(void *ptr, int size) {return realloc(ptr, size);}
- static void speex_free(void *ptr) {free(ptr);}
- #ifndef EXPORT
- #define EXPORT
- #endif
-@@ -633,25 +645,26 @@ static int update_filter(SpeexResamplerS
+ static void *speex_alloc (int size) {return calloc(size,1);}
+ static void *speex_realloc (void *ptr, int size) {return realloc(ptr, size);}
+ static void speex_free (void *ptr) {free(ptr);}
+ #include "speex_resampler.h"
+ #include "arch.h"
+ #else /* OUTSIDE_SPEEX */
+@@ -643,25 +645,26 @@ static int update_filter(SpeexResamplerS
          st->oversample >>= 1;
       if (st->oversample < 1)
          st->oversample = 1;
@ -29,24 +29,23 @@ diff --git a/src/resample.c b/src/resample.c
       st->cutoff = quality_map[st->quality].upsample_bandwidth;
    }
 
+-   /* Choose the resampling type that requires the least amount of memory */
 -#ifdef RESAMPLE_FULL_SINC_TABLE
 -   use_direct = 1;
 -   if (INT_MAX/sizeof(spx_word16_t)/st->den_rate < st->filt_len)
 -      goto fail;
-#else
-   /* Choose the resampling type that requires the least amount of memory */
-   use_direct = st->filt_len*st->den_rate <= st->filt_len*st->oversample+8
-                && INT_MAX/sizeof(spx_word16_t)/st->den_rate >= st->filt_len;
 +   use_direct =
 +#ifdef RESAMPLE_HUGEMEM
 +      /* Choose the direct resampler, even with higher initialization costs,
 +         when resampling any multiple of 100 to 44100. */
 +      st->den_rate <= 441
-+#else
+ #else
+-   use_direct = st->filt_len*st->den_rate <= st->filt_len*st->oversample+8
 +      /* Choose the resampling type that requires the least amount of memory */
 +      st->filt_len*st->den_rate <= st->filt_len*st->oversample+8
- #endif
-+                && INT_MAX/sizeof(spx_word16_t)/st->den_rate >= st->filt_len;
+#endif
+                 && INT_MAX/sizeof(spx_word16_t)/st->den_rate >= st->filt_len;
+-#endif
    if (use_direct)
    {
       min_sinc_table_length = st->filt_len*st->den_rate;
@ -55,3 +54,4 @@ diff --git a/src/resample.c b/src/resample.c
          goto fail;
 
       min_sinc_table_length = st->filt_len*st->oversample+8;
+
--- a/media/libspeex_resampler/05_remove-empty-asm-clobber.patch
+++ b/media/libspeex_resampler/05_remove-empty-asm-clobber.patch
@ -0,0 +1,33 @@
+https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#Extended-Asm says
+
+  asm [volatile] ( AssemblerTemplate : [OutputOperands] [ : [InputOperands] [ : [Clobbers] ] ] )
+
+which implies that Clobbers is optional even after the third colon, but 
+the gcc used for b2g_try_emulator_dep builds says
+
+resample_neon.c: In function 'saturate_32bit_to_16bit':
+resample_neon.c:50: error: expected string literal before ')' token
+
+diff --git a/src/resample_neon.c b/src/resample_neon.c
+--- a/src/resample_neon.c
+++ b/src/resample_neon.c
+@@ -41,18 +41,17 @@
+ #include <arm_neon.h>
+ 
+ #ifdef FIXED_POINT
+ #ifdef __thumb2__
+ static inline int32_t saturate_32bit_to_16bit(int32_t a) {
+     int32_t ret;
+     asm ("ssat %[ret], #16, %[a]"
+          : [ret] "=&r" (ret)
+-         : [a] "r" (a)
+-         : );
+         : [a] "r" (a));
+     return ret;
+ }
+ #else
+ static inline int32_t saturate_32bit_to_16bit(int32_t a) {
+     int32_t ret;
+     asm ("vmov.s32 d0[0], %[a]\n"
+          "vqmovn.s32 d0, q0\n"
+          "vmov.s16 %[ret], d0[0]\n"
--- a/media/libspeex_resampler/06_set-rate-overflow-no-return.patch
+++ b/media/libspeex_resampler/06_set-rate-overflow-no-return.patch
@ -1,9 +1,7 @@
-This is a fix for https://bugzilla.mozilla.org/show_bug.cgi?id=1274083
-
 diff --git a/src/resample.c b/src/resample.c
 --- a/src/resample.c
 +++ b/src/resample.c
-@@ -1129,18 +1129,19 @@ EXPORT int speex_resampler_set_rate_frac
+@@ -1141,18 +1141,19 @@ EXPORT int speex_resampler_set_rate_frac
 
    st->num_rate /= fact;
    st->den_rate /= fact;
@ -12,10 +10,10 @@ diff --git a/src/resample.c b/src/resample.c
    {
       for (i=0;i<st->nb_channels;i++)
       {
-         if (multiply_frac(&st->samp_frac_num[i],st->samp_frac_num[i],st->den_rate,old_den) != RESAMPLER_ERR_SUCCESS)
+-         if (_muldiv(&st->samp_frac_num[i],st->samp_frac_num[i],st->den_rate,old_den) != RESAMPLER_ERR_SUCCESS)
 -            return RESAMPLER_ERR_OVERFLOW;
-+         if (multiply_frac(&st->samp_frac_num[i],st->samp_frac_num[i],st->den_rate,old_den) != RESAMPLER_ERR_SUCCESS) {
-+            st->samp_frac_num[i] = st->den_rate-1;
+         if (_muldiv(&st->samp_frac_num[i],st->samp_frac_num[i],st->den_rate,old_den) != RESAMPLER_ERR_SUCCESS) {
+           st->samp_frac_num[i] = st->den_rate-1;
 +         }
          /* Safety net */
          if (st->samp_frac_num[i] >= st->den_rate)
--- a/media/libspeex_resampler/07_integer-halving.patch
+++ b/media/libspeex_resampler/07_integer-halving.patch
@ -0,0 +1,95 @@
+diff --git a/src/arch.h b/src/arch.h
+--- a/src/arch.h
+++ b/src/arch.h
+@@ -172,26 +172,23 @@ typedef float spx_word32_t;
+ #define SHL(a,shift)       (a)
+ #define SATURATE(x,a) (x)
+ 
+ #define ADD16(a,b) ((a)+(b))
+ #define SUB16(a,b) ((a)-(b))
+ #define ADD32(a,b) ((a)+(b))
+ #define SUB32(a,b) ((a)-(b))
+ #define MULT16_16_16(a,b)     ((a)*(b))
+#define MULT16_32_32(a,b)     ((a)*(b))
+ #define MULT16_16(a,b)     ((spx_word32_t)(a)*(spx_word32_t)(b))
+ #define MAC16_16(c,a,b)     ((c)+(spx_word32_t)(a)*(spx_word32_t)(b))
+ 
+-#define MULT16_32_Q11(a,b)     ((a)*(b))
+-#define MULT16_32_Q13(a,b)     ((a)*(b))
+-#define MULT16_32_Q14(a,b)     ((a)*(b))
+ #define MULT16_32_Q15(a,b)     ((a)*(b))
+ #define MULT16_32_P15(a,b)     ((a)*(b))
+ 
+-#define MAC16_32_Q11(c,a,b)     ((c)+(a)*(b))
+ #define MAC16_32_Q15(c,a,b)     ((c)+(a)*(b))
+ 
+ #define MAC16_16_Q11(c,a,b)     ((c)+(a)*(b))
+ #define MAC16_16_Q13(c,a,b)     ((c)+(a)*(b))
+ #define MAC16_16_P13(c,a,b)     ((c)+(a)*(b))
+ #define MULT16_16_Q11_32(a,b)     ((a)*(b))
+ #define MULT16_16_Q13(a,b)     ((a)*(b))
+ #define MULT16_16_Q14(a,b)     ((a)*(b))
+diff --git a/src/fixed_generic.h b/src/fixed_generic.h
+--- a/src/fixed_generic.h
+++ b/src/fixed_generic.h
+@@ -64,32 +64,27 @@
+ 
+ #define ADD16(a,b) ((spx_word16_t)((spx_word16_t)(a)+(spx_word16_t)(b)))
+ #define SUB16(a,b) ((spx_word16_t)(a)-(spx_word16_t)(b))
+ #define ADD32(a,b) ((spx_word32_t)(a)+(spx_word32_t)(b))
+ #define SUB32(a,b) ((spx_word32_t)(a)-(spx_word32_t)(b))
+ 
+ 
+ /* result fits in 16 bits */
+-#define MULT16_16_16(a,b)     ((((spx_word16_t)(a))*((spx_word16_t)(b))))
+#define MULT16_16_16(a,b)     (((spx_word16_t)(a))*((spx_word16_t)(b)))
+/* result fits in 32 bits */
+#define MULT16_32_32(a,b)     (((spx_word16_t)(a))*((spx_word32_t)(b)))
+ 
+ /* (spx_word32_t)(spx_word16_t) gives TI compiler a hint that it's 16x16->32 multiply */
+ #define MULT16_16(a,b)     (((spx_word32_t)(spx_word16_t)(a))*((spx_word32_t)(spx_word16_t)(b)))
+ 
+ #define MAC16_16(c,a,b) (ADD32((c),MULT16_16((a),(b))))
+-#define MULT16_32_Q12(a,b) ADD32(MULT16_16((a),SHR((b),12)), SHR(MULT16_16((a),((b)&0x00000fff)),12))
+-#define MULT16_32_Q13(a,b) ADD32(MULT16_16((a),SHR((b),13)), SHR(MULT16_16((a),((b)&0x00001fff)),13))
+-#define MULT16_32_Q14(a,b) ADD32(MULT16_16((a),SHR((b),14)), SHR(MULT16_16((a),((b)&0x00003fff)),14))
+-
+-#define MULT16_32_Q11(a,b) ADD32(MULT16_16((a),SHR((b),11)), SHR(MULT16_16((a),((b)&0x000007ff)),11))
+-#define MAC16_32_Q11(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),11)), SHR(MULT16_16((a),((b)&0x000007ff)),11)))
+-
+-#define MULT16_32_P15(a,b) ADD32(MULT16_16((a),SHR((b),15)), PSHR(MULT16_16((a),((b)&0x00007fff)),15))
+-#define MULT16_32_Q15(a,b) ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))
+-#define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15)))
+#define MULT16_32_P15(a,b) ADD32(MULT16_32_32(a,SHR((b),15)), PSHR(MULT16_16((a),((b)&0x00007fff)),15))
+#define MULT16_32_Q15(a,b) ADD32(MULT16_32_32(a,SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))
+#define MAC16_32_Q15(c,a,b) ADD32(c,MULT16_32_Q15(a,b))
+ 
+ 
+ #define MAC16_16_Q11(c,a,b)     (ADD32((c),SHR(MULT16_16((a),(b)),11)))
+ #define MAC16_16_Q13(c,a,b)     (ADD32((c),SHR(MULT16_16((a),(b)),13)))
+ #define MAC16_16_P13(c,a,b)     (ADD32((c),SHR(ADD32(4096,MULT16_16((a),(b))),13)))
+ 
+ #define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11))
+ #define MULT16_16_Q13(a,b) (SHR(MULT16_16((a),(b)),13))
+diff --git a/src/resample.c b/src/resample.c
+--- a/src/resample.c
+++ b/src/resample.c
+@@ -474,17 +474,17 @@ static int resampler_basic_interpolate_s
+         const spx_word16_t curr_in=iptr[j];
+         accum[0] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-2]);
+         accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]);
+         accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]);
+         accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]);
+       }
+ 
+       cubic_coef(frac, interp);
+-      sum = MULT16_32_Q15(interp[0],SHR32(accum[0], 1)) + MULT16_32_Q15(interp[1],SHR32(accum[1], 1)) + MULT16_32_Q15(interp[2],SHR32(accum[2], 1)) + MULT16_32_Q15(interp[3],SHR32(accum[3], 1));
+      sum = MULT16_32_Q15(interp[0],accum[0]) + MULT16_32_Q15(interp[1],accum[1]) + MULT16_32_Q15(interp[2],accum[2]) + MULT16_32_Q15(interp[3],accum[3]);
+       sum = SATURATE32PSHR(sum, 15, 32767);
+ #ifdef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE
+       } else {
+       cubic_coef(frac, interp);
+       sum = interpolate_product_single(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp);
+       }
+ #endif
+ 
--- a/media/libspeex_resampler/COPYING
+++ b/media/libspeex_resampler/COPYING
@ -1,7 +1,7 @@
 Copyright 2002-2008 	Xiph.org Foundation
 Copyright 2002-2008 	Jean-Marc Valin
 Copyright 2005-2007	Analog Devices Inc.
-Copyright 2005-2008	Commonwealth Scientific and Industrial Research
+Copyright 2005-2008	Commonwealth Scientific and Industrial Research 
                        Organisation (CSIRO)
 Copyright 1993, 2002, 2006 David Rowe
 Copyright 2003 		EpicGames
--- a/media/libspeex_resampler/moz.yaml
+++ b/media/libspeex_resampler/moz.yaml
@ -10,8 +10,8 @@ origin:

  url: https://speex.org/

-  release: 738e17905e1ca2a1fa932ddd9c2a85d089f4e845 (2023-02-28T16:25:59.000-05:00).
-  revision: 738e17905e1ca2a1fa932ddd9c2a85d089f4e845
+  release: 79822c8fb79d202cbe7e899f7406acf17beb247c (2017-01-22T14:16:02.000-05:00).
+  revision: 79822c8fb79d202cbe7e899f7406acf17beb247c

  license: BSD-3-Clause
  license-file: COPYING
@ -35,6 +35,7 @@ vendoring:
    - libspeexdsp/resample_sse.h
    - libspeexdsp/resample_neon.h
    - libspeexdsp/arch.h
+    - libspeexdsp/stack_alloc.h
    - libspeexdsp/fixed_generic.h
    - include/speex/speex_resampler.h
    - AUTHORS
@ -57,6 +58,9 @@ vendoring:
    - action: move-file
      from: '{vendor_dir}/libspeexdsp/arch.h'
      to: '{vendor_dir}/src/arch.h'
+    - action: move-file
+      from: '{vendor_dir}/libspeexdsp/stack_alloc.h'
+      to: '{vendor_dir}/src/stack_alloc.h'
    - action: move-file
      from: '{vendor_dir}/libspeexdsp/fixed_generic.h'
      to: '{vendor_dir}/src/fixed_generic.h'
@ -69,4 +73,6 @@ vendoring:
    - 02_simd-detect-runtime.patch
    - 03_set-skip-frac.patch
    - 04_hugemem.patch
-    - 05_set-rate-overflow-no-return.patch
+    - 05_remove-empty-asm-clobber.patch
+    - 06_set-rate-overflow-no-return.patch
+    - 07_integer-halving.patch
--- a/media/libspeex_resampler/src/arch.h
+++ b/media/libspeex_resampler/src/arch.h
@ -41,10 +41,10 @@
 #ifdef FLOATING_POINT
 #error You cannot compile as floating point and fixed point at the same time
 #endif
-#ifdef USE_SSE
+#ifdef _USE_SSE
 #error SSE is only for floating-point
 #endif
-#if defined(ARM4_ASM) + defined(ARM5E_ASM) + defined(BFIN_ASM) > 1
+#if ((defined (ARM4_ASM)||defined (ARM4_ASM)) && defined(BFIN_ASM)) || (defined (ARM4_ASM)&&defined(ARM5E_ASM))
 #error Make up your mind. What CPU do you have?
 #endif
 #ifdef VORBIS_PSYCHO
@ -56,10 +56,10 @@
 #ifndef FLOATING_POINT
 #error You now need to define either FIXED_POINT or FLOATING_POINT
 #endif
-#if defined(ARM4_ASM) || defined(ARM5E_ASM) || defined(BFIN_ASM)
+#if defined (ARM4_ASM) || defined(ARM5E_ASM) || defined(BFIN_ASM)
 #error I suppose you can have a [ARM4/ARM5E/Blackfin] that has float instructions?
 #endif
-#ifdef FIXED_DEBUG
+#ifdef FIXED_POINT_DEBUG
 #error "Don't you think enabling fixed-point is a good thing to do if you want to debug that?"
 #endif

@ -117,9 +117,9 @@ typedef spx_word32_t spx_sig_t;

 #ifdef ARM5E_ASM
 #include "fixed_arm5e.h"
-#elif defined(ARM4_ASM)
+#elif defined (ARM4_ASM)
 #include "fixed_arm4.h"
-#elif defined(BFIN_ASM)
+#elif defined (BFIN_ASM)
 #include "fixed_bfin.h"
 #endif

@ -207,7 +207,7 @@ typedef float spx_word32_t;
 #endif


-#if defined(CONFIG_TI_C54X) || defined(CONFIG_TI_C55X)
+#if defined (CONFIG_TI_C54X) || defined (CONFIG_TI_C55X)

 /* 2 on TI C5x DSP */
 #define BYTES_PER_CHAR 2
--- a/media/libspeex_resampler/src/fixed_generic.h
+++ b/media/libspeex_resampler/src/fixed_generic.h
@ -77,7 +77,6 @@
 #define MULT16_16(a,b)     (((spx_word32_t)(spx_word16_t)(a))*((spx_word32_t)(spx_word16_t)(b)))

 #define MAC16_16(c,a,b) (ADD32((c),MULT16_16((a),(b))))
-
 #define MULT16_32_P15(a,b) ADD32(MULT16_32_32(a,SHR((b),15)), PSHR(MULT16_16((a),((b)&0x00007fff)),15))
 #define MULT16_32_Q15(a,b) ADD32(MULT16_32_32(a,SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))
 #define MAC16_32_Q15(c,a,b) ADD32(c,MULT16_32_Q15(a,b))
--- a/media/libspeex_resampler/src/moz.build
+++ b/media/libspeex_resampler/src/moz.build
@ -28,15 +28,15 @@ DEFINES['FLOATING_POINT'] = True

 # Only use SSE code when using floating point samples, and on x86
 if CONFIG['INTEL_ARCHITECTURE']:
-    DEFINES['USE_SSE'] = True
-    DEFINES['USE_SSE2'] = True
+    DEFINES['_USE_SSE'] = True
+    DEFINES['_USE_SSE2'] = True
    SOURCES += [
        'resample_sse.c'
    ]
    SOURCES['resample_sse.c'].flags += CONFIG['SSE2_FLAGS']

 if CONFIG['CPU_ARCH'] == 'arm' and CONFIG['BUILD_ARM_NEON']:
-    DEFINES['USE_NEON'] = True
+    DEFINES['_USE_NEON'] = True
    SOURCES += [
        'resample_neon.c'
    ]
--- a/media/libspeex_resampler/src/resample.c
+++ b/media/libspeex_resampler/src/resample.c
@ -46,7 +46,7 @@
   Smith, Julius O. Digital Audio Resampling Home Page
   Center for Computer Research in Music and Acoustics (CCRMA),
   Stanford University, 2007.
-   Web published at https://ccrma.stanford.edu/~jos/resample/.
+   Web published at http://ccrma.stanford.edu/~jos/resample/.

   There is one main difference, though. This resampler uses cubic
   interpolation instead of linear interpolation in the above paper. This
@ -65,12 +65,9 @@

 #ifdef OUTSIDE_SPEEX
 #include <stdlib.h>
-static void *speex_alloc(int size) {return calloc(size,1);}
-static void *speex_realloc(void *ptr, int size) {return realloc(ptr, size);}
-static void speex_free(void *ptr) {free(ptr);}
-#ifndef EXPORT
-#define EXPORT
-#endif
+static void *speex_alloc (int size) {return calloc(size,1);}
+static void *speex_realloc (void *ptr, int size) {return realloc(ptr, size);}
+static void speex_free (void *ptr) {free(ptr);}
 #include "speex_resampler.h"
 #include "arch.h"
 #else /* OUTSIDE_SPEEX */
@ -80,6 +77,7 @@ static void speex_free(void *ptr) {free(ptr);}
 #include "os_support.h"
 #endif /* OUTSIDE_SPEEX */

+#include "stack_alloc.h"
 #include <math.h>
 #include <limits.h>

@ -95,12 +93,12 @@ static void speex_free(void *ptr) {free(ptr);}
 #endif

 #ifndef UINT32_MAX
-#define UINT32_MAX 4294967295U
+#define UINT32_MAX 4294967296U
 #endif

 #include "simd_detect.h"

-/* Number of elements to allocate on the stack */
+/* Numer of elements to allocate on the stack */
 #ifdef VAR_ARRAYS
 #define FIXED_STACK_ALLOC 8192
 #else
@ -192,14 +190,16 @@ struct FuncDef {
   int oversample;
 };

-static const struct FuncDef kaiser12_funcdef = {kaiser12_table, 64};
-#define KAISER12 (&kaiser12_funcdef)
-static const struct FuncDef kaiser10_funcdef = {kaiser10_table, 32};
-#define KAISER10 (&kaiser10_funcdef)
-static const struct FuncDef kaiser8_funcdef = {kaiser8_table, 32};
-#define KAISER8 (&kaiser8_funcdef)
-static const struct FuncDef kaiser6_funcdef = {kaiser6_table, 32};
-#define KAISER6 (&kaiser6_funcdef)
+static const struct FuncDef _KAISER12 = {kaiser12_table, 64};
+#define KAISER12 (&_KAISER12)
+/*static struct FuncDef _KAISER12 = {kaiser12_table, 32};
+#define KAISER12 (&_KAISER12)*/
+static const struct FuncDef _KAISER10 = {kaiser10_table, 32};
+#define KAISER10 (&_KAISER10)
+static const struct FuncDef _KAISER8 = {kaiser8_table, 32};
+#define KAISER8 (&_KAISER8)
+static const struct FuncDef _KAISER6 = {kaiser6_table, 32};
+#define KAISER6 (&_KAISER6)

 struct QualityMapping {
   int base_length;
@ -584,7 +584,6 @@ static int resampler_basic_zero(SpeexResamplerState *st, spx_uint32_t channel_in
   const int frac_advance = st->frac_advance;
   const spx_uint32_t den_rate = st->den_rate;

-   (void)in;
   while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
   {
      out[out_stride * out_sample++] = 0;
@ -602,15 +601,16 @@ static int resampler_basic_zero(SpeexResamplerState *st, spx_uint32_t channel_in
   return out_sample;
 }

-static int multiply_frac(spx_uint32_t *result, spx_uint32_t value, spx_uint32_t num, spx_uint32_t den)
+static int _muldiv(spx_uint32_t *result, spx_uint32_t value, spx_uint32_t mul, spx_uint32_t div)
 {
-   spx_uint32_t major = value / den;
-   spx_uint32_t remain = value % den;
+   speex_assert(result);
+   spx_uint32_t major = value / div;
+   spx_uint32_t remainder = value % div;
   /* TODO: Could use 64 bits operation to check for overflow. But only guaranteed in C99+ */
-   if (remain > UINT32_MAX / num || major > UINT32_MAX / num
-       || major * num > UINT32_MAX - remain * num / den)
+   if (remainder > UINT32_MAX / mul || major > UINT32_MAX / mul
+       || major * mul > UINT32_MAX - remainder * mul / div)
      return RESAMPLER_ERR_OVERFLOW;
-   *result = remain * num / den + major * num;
+   *result = remainder * mul / div + major * mul;
   return RESAMPLER_ERR_SUCCESS;
 }

@ -631,7 +631,7 @@ static int update_filter(SpeexResamplerState *st)
   {
      /* down-sampling */
      st->cutoff = quality_map[st->quality].downsample_bandwidth * st->den_rate / st->num_rate;
-      if (multiply_frac(&st->filt_len,st->filt_len,st->num_rate,st->den_rate) != RESAMPLER_ERR_SUCCESS)
+      if (_muldiv(&st->filt_len,st->filt_len,st->num_rate,st->den_rate) != RESAMPLER_ERR_SUCCESS)
         goto fail;
      /* Round up to make sure we have a multiple of 8 for SSE */
      st->filt_len = ((st->filt_len-1)&(~0x7))+8;
@ -746,18 +746,16 @@ static int update_filter(SpeexResamplerState *st)
      {
         spx_uint32_t j;
         spx_uint32_t olen = old_length;
-         spx_uint32_t start = i*st->mem_alloc_size;
-         spx_uint32_t magic_samples = st->magic_samples[i];
         /*if (st->magic_samples[i])*/
         {
            /* Try and remove the magic samples as if nothing had happened */

            /* FIXME: This is wrong but for now we need it to avoid going over the array bounds */
-            olen = old_length + 2*magic_samples;
-            for (j=old_length-1+magic_samples;j--;)
-               st->mem[start+j+magic_samples] = st->mem[i*old_alloc_size+j];
-            for (j=0;j<magic_samples;j++)
-               st->mem[start+j] = 0;
+            olen = old_length + 2*st->magic_samples[i];
+            for (j=old_length-1+st->magic_samples[i];j--;)
+               st->mem[i*st->mem_alloc_size+j+st->magic_samples[i]] = st->mem[i*old_alloc_size+j];
+            for (j=0;j<st->magic_samples[i];j++)
+               st->mem[i*st->mem_alloc_size+j] = 0;
            st->magic_samples[i] = 0;
         }
         if (st->filt_len > olen)
@ -765,18 +763,17 @@ static int update_filter(SpeexResamplerState *st)
            /* If the new filter length is still bigger than the "augmented" length */
            /* Copy data going backward */
            for (j=0;j<olen-1;j++)
-               st->mem[start+(st->filt_len-2-j)] = st->mem[start+(olen-2-j)];
+               st->mem[i*st->mem_alloc_size+(st->filt_len-2-j)] = st->mem[i*st->mem_alloc_size+(olen-2-j)];
            /* Then put zeros for lack of anything better */
            for (;j<st->filt_len-1;j++)
-               st->mem[start+(st->filt_len-2-j)] = 0;
+               st->mem[i*st->mem_alloc_size+(st->filt_len-2-j)] = 0;
            /* Adjust last_sample */
            st->last_sample[i] += (st->filt_len - olen)/2;
         } else {
            /* Put back some of the magic! */
-            magic_samples = (olen - st->filt_len)/2;
-            for (j=0;j<st->filt_len-1+magic_samples;j++)
-               st->mem[start+j] = st->mem[start+j+magic_samples];
-            st->magic_samples[i] = magic_samples;
+            st->magic_samples[i] = (olen - st->filt_len)/2;
+            for (j=0;j<st->filt_len-1+st->magic_samples[i];j++)
+               st->mem[i*st->mem_alloc_size+j] = st->mem[i*st->mem_alloc_size+j+st->magic_samples[i]];
         }
      }
   } else if (st->filt_len < old_length)
@ -814,6 +811,7 @@ EXPORT SpeexResamplerState *speex_resampler_init(spx_uint32_t nb_channels, spx_u

 EXPORT SpeexResamplerState *speex_resampler_init_frac(spx_uint32_t nb_channels, spx_uint32_t ratio_num, spx_uint32_t ratio_den, spx_uint32_t in_rate, spx_uint32_t out_rate, int quality, int *err)
 {
+   spx_uint32_t i;
   SpeexResamplerState *st;
   int filter_err;

@ -993,7 +991,8 @@ EXPORT int speex_resampler_process_int(SpeexResamplerState *st, spx_uint32_t cha
   const spx_uint32_t xlen = st->mem_alloc_size - (st->filt_len - 1);
 #ifdef VAR_ARRAYS
   const unsigned int ylen = (olen < FIXED_STACK_ALLOC) ? olen : FIXED_STACK_ALLOC;
-   spx_word16_t ystack[ylen];
+   VARDECL(spx_word16_t *ystack);
+   ALLOC(ystack, ylen, spx_word16_t);
 #else
   const unsigned int ylen = FIXED_STACK_ALLOC;
   spx_word16_t ystack[FIXED_STACK_ALLOC];
@ -1108,7 +1107,7 @@ EXPORT void speex_resampler_get_rate(SpeexResamplerState *st, spx_uint32_t *in_r
   *out_rate = st->out_rate;
 }

-static inline spx_uint32_t compute_gcd(spx_uint32_t a, spx_uint32_t b)
+static inline spx_uint32_t _gcd(spx_uint32_t a, spx_uint32_t b)
 {
   while (b != 0)
   {
@ -1138,7 +1137,7 @@ EXPORT int speex_resampler_set_rate_frac(SpeexResamplerState *st, spx_uint32_t r
   st->num_rate = ratio_num;
   st->den_rate = ratio_den;

-   fact = compute_gcd(st->num_rate, st->den_rate);
+   fact = _gcd (st->num_rate, st->den_rate);

   st->num_rate /= fact;
   st->den_rate /= fact;
@ -1147,8 +1146,8 @@ EXPORT int speex_resampler_set_rate_frac(SpeexResamplerState *st, spx_uint32_t r
   {
      for (i=0;i<st->nb_channels;i++)
      {
-         if (multiply_frac(&st->samp_frac_num[i],st->samp_frac_num[i],st->den_rate,old_den) != RESAMPLER_ERR_SUCCESS) {
-            st->samp_frac_num[i] = st->den_rate-1;
+         if (_muldiv(&st->samp_frac_num[i],st->samp_frac_num[i],st->den_rate,old_den) != RESAMPLER_ERR_SUCCESS) {
+           st->samp_frac_num[i] = st->den_rate-1;
         }
         /* Safety net */
         if (st->samp_frac_num[i] >= st->den_rate)
--- a/media/libspeex_resampler/src/resample_neon.c
+++ b/media/libspeex_resampler/src/resample_neon.c
@ -36,29 +36,17 @@
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-#include <stdint.h>
 #include "simd_detect.h"

+#include <arm_neon.h>
+
 #ifdef FIXED_POINT
-#if defined(__aarch64__)
-static inline int32_t saturate_32bit_to_16bit(int32_t a) {
-    int32_t ret;
-    asm ("fmov s0, %w[a]\n"
-         "sqxtn h0, s0\n"
-         "sxtl v0.4s, v0.4h\n"
-         "fmov %w[ret], s0\n"
-         : [ret] "=r" (ret)
-         : [a] "r" (a)
-         : "v0" );
-    return ret;
-}
-#elif defined(__thumb2__)
+#ifdef __thumb2__
 static inline int32_t saturate_32bit_to_16bit(int32_t a) {
    int32_t ret;
    asm ("ssat %[ret], #16, %[a]"
-         : [ret] "=r" (ret)
-         : [a] "r" (a)
-         : );
+         : [ret] "=&r" (ret)
+         : [a] "r" (a));
    return ret;
 }
 #else
@ -67,7 +55,7 @@ static inline int32_t saturate_32bit_to_16bit(int32_t a) {
    asm ("vmov.s32 d0[0], %[a]\n"
         "vqmovn.s32 d0, q0\n"
         "vmov.s16 %[ret], d0[0]\n"
-         : [ret] "=r" (ret)
+         : [ret] "=&r" (ret)
         : [a] "r" (a)
         : "q0");
    return ret;
@ -77,64 +65,8 @@ static inline int32_t saturate_32bit_to_16bit(int32_t a) {
 #define WORD2INT(x) (saturate_32bit_to_16bit(x))

 #define OVERRIDE_INNER_PRODUCT_SINGLE
-/* Only works when len % 4 == 0 and len >= 4 */
-#if defined(__aarch64__)
-inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
-{
-    int32_t ret;
-    uint32_t remainder = len % 16;
-    len = len - remainder;
-
-    asm volatile ("	 cmp %w[len], #0\n"
-		  "	 b.ne 1f\n"
-		  "	 ld1 {v16.4h}, [%[b]], #8\n"
-		  "	 ld1 {v20.4h}, [%[a]], #8\n"
-		  "	 subs %w[remainder], %w[remainder], #4\n"
-		  "	 smull v0.4s, v16.4h, v20.4h\n"
-		  "      b.ne 4f\n"
-		  "	 b 5f\n"
-		  "1:"
-		  "	 ld1 {v16.4h, v17.4h, v18.4h, v19.4h}, [%[b]], #32\n"
-		  "	 ld1 {v20.4h, v21.4h, v22.4h, v23.4h}, [%[a]], #32\n"
-		  "	 subs %w[len], %w[len], #16\n"
-		  "	 smull v0.4s, v16.4h, v20.4h\n"
-		  "	 smlal v0.4s, v17.4h, v21.4h\n"
-		  "	 smlal v0.4s, v18.4h, v22.4h\n"
-		  "	 smlal v0.4s, v19.4h, v23.4h\n"
-		  "	 b.eq 3f\n"
-		  "2:"
-		  "	 ld1 {v16.4h, v17.4h, v18.4h, v19.4h}, [%[b]], #32\n"
-		  "	 ld1 {v20.4h, v21.4h, v22.4h, v23.4h}, [%[a]], #32\n"
-		  "	 subs %w[len], %w[len], #16\n"
-		  "	 smlal v0.4s, v16.4h, v20.4h\n"
-		  "	 smlal v0.4s, v17.4h, v21.4h\n"
-		  "	 smlal v0.4s, v18.4h, v22.4h\n"
-		  "	 smlal v0.4s, v19.4h, v23.4h\n"
-		  "	 b.ne 2b\n"
-		  "3:"
-		  "	 cmp %w[remainder], #0\n"
-		  "	 b.eq 5f\n"
-		  "4:"
-		  "	 ld1 {v18.4h}, [%[b]], #8\n"
-		  "	 ld1 {v22.4h}, [%[a]], #8\n"
-		  "	 subs %w[remainder], %w[remainder], #4\n"
-		  "	 smlal v0.4s, v18.4h, v22.4h\n"
-		  "	 b.ne 4b\n"
-		  "5:"
-		  "	 saddlv d0, v0.4s\n"
-		  "	 sqxtn s0, d0\n"
-		  "	 sqrshrn h0, s0, #15\n"
-		  "	 sxtl v0.4s, v0.4h\n"
-		  "	 fmov %w[ret], s0\n"
-		  : [ret] "=r" (ret), [a] "+r" (a), [b] "+r" (b),
-		    [len] "+r" (len), [remainder] "+r" (remainder)
-		  :
-		  : "cc", "v0",
-		    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23");
-    return ret;
-}
-#else
-inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
+/* Only works when len % 4 == 0 */
+int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
 {
    int32_t ret;
    uint32_t remainder = len % 16;
@ -181,105 +113,34 @@ inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned
 		  "	 vqmovn.s64 d0, q0\n"
 		  "	 vqrshrn.s32 d0, q0, #15\n"
 		  "	 vmov.s16 %[ret], d0[0]\n"
-		  : [ret] "=r" (ret), [a] "+r" (a), [b] "+r" (b),
+		  : [ret] "=&r" (ret), [a] "+r" (a), [b] "+r" (b),
 		    [len] "+r" (len), [remainder] "+r" (remainder)
 		  :
 		  : "cc", "q0",
-		    "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23");
+		    "d16", "d17", "d18", "d19",
+		    "d20", "d21", "d22", "d23");

    return ret;
 }
-#endif  // !defined(__aarch64__)
-
 #elif defined(FLOATING_POINT)
-#if defined(__aarch64__)
-static inline int32_t saturate_float_to_16bit(float a) {
-    int32_t ret;
-    asm ("fcvtas s1, %s[a]\n"
-         "sqxtn h1, s1\n"
-         "sxtl v1.4s, v1.4h\n"
-         "fmov %w[ret], s1\n"
-         : [ret] "=r" (ret)
-         : [a] "w" (a)
-         : "v1");
-    return ret;
-}
-#else
+
 static inline int32_t saturate_float_to_16bit(float a) {
    int32_t ret;
    asm ("vmov.f32 d0[0], %[a]\n"
         "vcvt.s32.f32 d0, d0, #15\n"
         "vqrshrn.s32 d0, q0, #15\n"
         "vmov.s16 %[ret], d0[0]\n"
-         : [ret] "=r" (ret)
+         : [ret] "=&r" (ret)
         : [a] "r" (a)
         : "q0");
    return ret;
 }
-#endif
-
 #undef WORD2INT
 #define WORD2INT(x) (saturate_float_to_16bit(x))

 #define OVERRIDE_INNER_PRODUCT_SINGLE
-/* Only works when len % 4 == 0 and len >= 4 */
-#if defined(__aarch64__)
-inline float inner_product_single(const float *a, const float *b, unsigned int len)
-{
-    float ret;
-    uint32_t remainder = len % 16;
-    len = len - remainder;
-
-    asm volatile ("	 cmp %w[len], #0\n"
-		  "	 b.ne 1f\n"
-		  "	 ld1 {v16.4s}, [%[b]], #16\n"
-		  "	 ld1 {v20.4s}, [%[a]], #16\n"
-		  "	 subs %w[remainder], %w[remainder], #4\n"
-		  "	 fmul v1.4s, v16.4s, v20.4s\n"
-		  "      b.ne 4f\n"
-		  "	 b 5f\n"
-		  "1:"
-		  "	 ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [%[b]], #64\n"
-		  "	 ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [%[a]], #64\n"
-		  "	 subs %w[len], %w[len], #16\n"
-		  "	 fmul v1.4s, v16.4s, v20.4s\n"
-		  "	 fmul v2.4s, v17.4s, v21.4s\n"
-		  "	 fmul v3.4s, v18.4s, v22.4s\n"
-		  "	 fmul v4.4s, v19.4s, v23.4s\n"
-		  "	 b.eq 3f\n"
-		  "2:"
-		  "	 ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [%[b]], #64\n"
-		  "	 ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [%[a]], #64\n"
-		  "	 subs %w[len], %w[len], #16\n"
-		  "	 fmla v1.4s, v16.4s, v20.4s\n"
-		  "	 fmla v2.4s, v17.4s, v21.4s\n"
-		  "	 fmla v3.4s, v18.4s, v22.4s\n"
-		  "	 fmla v4.4s, v19.4s, v23.4s\n"
-		  "	 b.ne 2b\n"
-		  "3:"
-		  "	 fadd v16.4s, v1.4s, v2.4s\n"
-		  "	 fadd v17.4s, v3.4s, v4.4s\n"
-		  "	 cmp %w[remainder], #0\n"
-		  "	 fadd v1.4s, v16.4s, v17.4s\n"
-		  "	 b.eq 5f\n"
-		  "4:"
-		  "	 ld1 {v18.4s}, [%[b]], #16\n"
-		  "	 ld1 {v22.4s}, [%[a]], #16\n"
-		  "	 subs %w[remainder], %w[remainder], #4\n"
-		  "	 fmla v1.4s, v18.4s, v22.4s\n"
-		  "	 b.ne 4b\n"
-		  "5:"
-		  "	 faddp v1.4s, v1.4s, v1.4s\n"
-		  "	 faddp %[ret].4s, v1.4s, v1.4s\n"
-		  : [ret] "=w" (ret), [a] "+r" (a), [b] "+r" (b),
-		    [len] "+r" (len), [remainder] "+r" (remainder)
-		  :
-		  : "cc", "v1", "v2", "v3", "v4",
-		    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23");
-    return ret;
-}
-#else
-inline float inner_product_single(const float *a, const float *b, unsigned int len)
+/* Only works when len % 4 == 0 */
+float inner_product_single(const float *a, const float *b, unsigned int len)
 {
    float ret;
    uint32_t remainder = len % 16;
@ -331,12 +192,11 @@ inline float inner_product_single(const float *a, const float *b, unsigned int l
 		  "	 vadd.f32 d0, d0, d1\n"
 		  "	 vpadd.f32 d0, d0, d0\n"
 		  "	 vmov.f32 %[ret], d0[0]\n"
-		  : [ret] "=r" (ret), [a] "+r" (a), [b] "+r" (b),
+		  : [ret] "=&r" (ret), [a] "+r" (a), [b] "+r" (b),
 		    [len] "+l" (len), [remainder] "+l" (remainder)
 		  :
-		  : "cc", "q0", "q1", "q2", "q3",
-		    "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11");
+		  : "cc", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8",
+                    "q9", "q10", "q11");
    return ret;
 }
-#endif  // defined(__aarch64__)
 #endif
--- a/media/libspeex_resampler/src/resample_sse.c
+++ b/media/libspeex_resampler/src/resample_sse.c
@ -73,7 +73,7 @@ float interpolate_product_single(const float *a, const float *b, unsigned int le
   return ret;
 }

-#ifdef USE_SSE2
+#ifdef _USE_SSE2
 #include <emmintrin.h>
 #define OVERRIDE_INNER_PRODUCT_DOUBLE

--- a/media/libspeex_resampler/src/stack_alloc.h
+++ b/media/libspeex_resampler/src/stack_alloc.h
@ -0,0 +1,115 @@
+/* Copyright (C) 2002 Jean-Marc Valin */
+/**
+   @file stack_alloc.h
+   @brief Temporary memory allocation on stack
+*/
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   - Neither the name of the Xiph.org Foundation nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef STACK_ALLOC_H
+#define STACK_ALLOC_H
+
+#ifdef USE_ALLOCA
+# ifdef WIN32
+#  include <malloc.h>
+# else
+#  ifdef HAVE_ALLOCA_H
+#   include <alloca.h>
+#  else
+#   include <stdlib.h>
+#  endif
+# endif
+#endif
+
+/**
+ * @def ALIGN(stack, size)
+ *
+ * Aligns the stack to a 'size' boundary
+ *
+ * @param stack Stack
+ * @param size  New size boundary
+ */
+
+/**
+ * @def PUSH(stack, size, type)
+ *
+ * Allocates 'size' elements of type 'type' on the stack
+ *
+ * @param stack Stack
+ * @param size  Number of elements
+ * @param type  Type of element
+ */
+
+/**
+ * @def VARDECL(var)
+ *
+ * Declare variable on stack
+ *
+ * @param var Variable to declare
+ */
+
+/**
+ * @def ALLOC(var, size, type)
+ *
+ * Allocate 'size' elements of 'type' on stack
+ *
+ * @param var  Name of variable to allocate
+ * @param size Number of elements
+ * @param type Type of element
+ */
+
+#ifdef ENABLE_VALGRIND
+
+#include <valgrind/memcheck.h>
+
+#define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1))
+
+#define PUSH(stack, size, type) (VALGRIND_MAKE_NOACCESS(stack, 1000),ALIGN((stack),sizeof(type)),VALGRIND_MAKE_WRITABLE(stack, ((size)*sizeof(type))),(stack)+=((size)*sizeof(type)),(type*)((stack)-((size)*sizeof(type))))
+
+#else
+
+#define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1))
+
+#define PUSH(stack, size, type) (ALIGN((stack),sizeof(type)),(stack)+=((size)*sizeof(type)),(type*)((stack)-((size)*sizeof(type))))
+
+#endif
+
+#if defined(VAR_ARRAYS)
+#define VARDECL(var)
+#define ALLOC(var, size, type) type var[size]
+#elif defined(USE_ALLOCA)
+#define VARDECL(var) var
+#define ALLOC(var, size, type) var = alloca(sizeof(type)*(size))
+#else
+#define VARDECL(var) var
+#define ALLOC(var, size, type) var = PUSH(stack, size, type)
+#endif
+
+
+#endif