b=1042504 update speex resampler to speexdsp 305e54ea r=padenot

Upstream SATURATE32PSHR changes conflict with Gecko's SSE runtime detection patch. That patch is updated to reduce deviation from upstream. --HG-- extra : rebase_source : fdaf355f802b944fcbe286e076ca19dc6bc33f80
2024-11-29 07:42:04 +00:00 · 2014-07-23 16:58:45 +12:00 · 2014-07-23 16:58:45 +12:00 · a3e81ff91a
commit a3e81ff91a
parent 3f4977b36d
6 changed files with 133 additions and 157 deletions
--- a/media/libspeex_resampler/README_MOZILLA
+++ b/media/libspeex_resampler/README_MOZILLA
@ -1,5 +1,5 @@
 This source is from the Speex DSP library
-(http://git.xiph.org/?p=speexdsp.git), from commit bbe7e099.
+(http://git.xiph.org/?p=speexdsp.git), from commit 305e54ea.

 It consists in the audio resampling code (resampler.c) and its header files
 dependancies, imported into the tree using the update.sh script.
--- a/media/libspeex_resampler/src/arch.h
+++ b/media/libspeex_resampler/src/arch.h
@ -163,6 +163,7 @@ typedef float spx_word32_t;
 #define VSHR32(a,shift) (a)
 #define SATURATE16(x,a) (x)
 #define SATURATE32(x,a) (x)
+#define SATURATE32PSHR(x,shift,a) (x)

 #define PSHR(a,shift)       (a)
 #define SHR(a,shift)       (a)
--- a/media/libspeex_resampler/src/fixed_generic.h
+++ b/media/libspeex_resampler/src/fixed_generic.h
@ -52,6 +52,10 @@
 #define SATURATE16(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x)))
 #define SATURATE32(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x)))

+#define SATURATE32PSHR(x,shift,a) (((x)>=(SHL32(a,shift))) ? (a) : \
+                                   (x)<=-(SHL32(a,shift)) ? -(a) : \
+                                   (PSHR32(x, shift)))
+
 #define SHR(a,shift) ((a) >> (shift))
 #define SHL(a,shift) ((spx_word32_t)(a) << (shift))
 #define PSHR(a,shift) (SHR((a)+((EXTEND32(1)<<((shift))>>1)),shift))
--- a/media/libspeex_resampler/src/resample.c
+++ b/media/libspeex_resampler/src/resample.c
@ -79,7 +79,7 @@ static void speex_free (void *ptr) {free(ptr);}
 #include <math.h>

 #ifndef M_PI
-#define M_PI 3.14159263
+#define M_PI 3.14159265358979323846
 #endif

 #ifdef FIXED_POINT
@ -109,6 +109,10 @@ static void speex_free (void *ptr) {free(ptr);}
 #endif
 #endif

+#ifdef _USE_NEON
+#include "resample_neon.h"
+#endif
+
 /* Numer of elements to allocate on the stack */
 #ifdef VAR_ARRAYS
 #define FIXED_STACK_ALLOC 8192
@ -354,9 +358,7 @@ static int resampler_basic_direct_single(SpeexResamplerState *st, spx_uint32_t c
      const spx_word16_t *iptr = & in[last_sample];

 #ifdef OVERRIDE_INNER_PRODUCT_SINGLE
-    if (moz_has_sse()) {
-      sum = inner_product_single(sinct, iptr, N);
-    } else {
+      if (!moz_has_sse()) {
 #endif
      int j;
      sum = 0;
@ -374,11 +376,14 @@ static int resampler_basic_direct_single(SpeexResamplerState *st, spx_uint32_t c
      }
      sum = accum[0] + accum[1] + accum[2] + accum[3];
 */
+      sum = SATURATE32PSHR(sum, 15, 32767);
 #ifdef OVERRIDE_INNER_PRODUCT_SINGLE
-    }
+      } else {
+      sum = inner_product_single(sinct, iptr, N);
+      }
 #endif

-      out[out_stride * out_sample++] = SATURATE32(PSHR32(sum, 15), 32767);
+      out[out_stride * out_sample++] = sum;
      last_sample += int_advance;
      samp_frac_num += frac_advance;
      if (samp_frac_num >= den_rate)
@ -416,20 +421,20 @@ static int resampler_basic_direct_double(SpeexResamplerState *st, spx_uint32_t c

 #ifdef OVERRIDE_INNER_PRODUCT_DOUBLE
      if(moz_has_sse2()) {
-        sum = inner_product_double(sinct, iptr, N);
-      } else {
 #endif
-        int j;
-        double accum[4] = {0,0,0,0};
+      int j;
+      double accum[4] = {0,0,0,0};

-        for(j=0;j<N;j+=4) {
-          accum[0] += sinct[j]*iptr[j];
-          accum[1] += sinct[j+1]*iptr[j+1];
-          accum[2] += sinct[j+2]*iptr[j+2];
-          accum[3] += sinct[j+3]*iptr[j+3];
-        }
-        sum = accum[0] + accum[1] + accum[2] + accum[3];
+      for(j=0;j<N;j+=4) {
+        accum[0] += sinct[j]*iptr[j];
+        accum[1] += sinct[j+1]*iptr[j+1];
+        accum[2] += sinct[j+2]*iptr[j+2];
+        accum[3] += sinct[j+3]*iptr[j+3];
+      }
+      sum = accum[0] + accum[1] + accum[2] + accum[3];
 #ifdef OVERRIDE_INNER_PRODUCT_DOUBLE
+      } else {
+      sum = inner_product_double(sinct, iptr, N);
      }
 #endif

@ -475,28 +480,30 @@ static int resampler_basic_interpolate_single(SpeexResamplerState *st, spx_uint3


 #ifdef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE
-      if (moz_has_sse()) {
-        cubic_coef(frac, interp);
-        sum = interpolate_product_single(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp);
-      } else {
+      if (!moz_has_sse()) {
 #endif
-        int j;
-        spx_word32_t accum[4] = {0,0,0,0};
+      int j;
+      spx_word32_t accum[4] = {0,0,0,0};

-        for(j=0;j<N;j++) {
-          const spx_word16_t curr_in=iptr[j];
-          accum[0] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-2]);
-          accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]);
-          accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]);
-          accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]);
-        }
-        cubic_coef(frac, interp);
-        sum = MULT16_32_Q15(interp[0],SHR32(accum[0], 1)) + MULT16_32_Q15(interp[1],SHR32(accum[1], 1)) + MULT16_32_Q15(interp[2],SHR32(accum[2], 1)) + MULT16_32_Q15(interp[3],SHR32(accum[3], 1));
+      for(j=0;j<N;j++) {
+        const spx_word16_t curr_in=iptr[j];
+        accum[0] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-2]);
+        accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]);
+        accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]);
+        accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]);
+      }
+
+      cubic_coef(frac, interp);
+      sum = MULT16_32_Q15(interp[0],SHR32(accum[0], 1)) + MULT16_32_Q15(interp[1],SHR32(accum[1], 1)) + MULT16_32_Q15(interp[2],SHR32(accum[2], 1)) + MULT16_32_Q15(interp[3],SHR32(accum[3], 1));
+      sum = SATURATE32PSHR(sum, 15, 32767);
 #ifdef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE
+      } else {
+      cubic_coef(frac, interp);
+      sum = interpolate_product_single(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp);
      }
 #endif
-
-      out[out_stride * out_sample++] = SATURATE32(PSHR32(sum, 14), 32767);
+      
+      out[out_stride * out_sample++] = sum;
      last_sample += int_advance;
      samp_frac_num += frac_advance;
      if (samp_frac_num >= den_rate)
@ -540,10 +547,7 @@ static int resampler_basic_interpolate_double(SpeexResamplerState *st, spx_uint3


 #ifdef OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE
-      if (moz_has_sse2()) {
-        cubic_coef(frac, interp);
-        sum = interpolate_product_double(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp);
-      } else {
+      if (!moz_has_sse2()) {
 #endif
      int j;
      double accum[4] = {0,0,0,0};
@ -558,9 +562,13 @@ static int resampler_basic_interpolate_double(SpeexResamplerState *st, spx_uint3

      cubic_coef(frac, interp);
      sum = MULT16_32_Q15(interp[0],accum[0]) + MULT16_32_Q15(interp[1],accum[1]) + MULT16_32_Q15(interp[2],accum[2]) + MULT16_32_Q15(interp[3],accum[3]);
-#ifdef OVERRIDE_INNER_PRODUCT_DOUBLE
+#ifdef OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE
+      } else {
+      cubic_coef(frac, interp);
+      sum = interpolate_product_double(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp);
      }
 #endif
+      
      out[out_stride * out_sample++] = PSHR32(sum,15);
      last_sample += int_advance;
      samp_frac_num += frac_advance;
@ -579,9 +587,10 @@ static int resampler_basic_interpolate_double(SpeexResamplerState *st, spx_uint3

 static void update_filter(SpeexResamplerState *st)
 {
-   spx_uint32_t old_length;
-   
-   old_length = st->filt_len;
+   spx_uint32_t old_length = st->filt_len;
+   spx_uint32_t old_alloc_size = st->mem_alloc_size;
+   spx_uint32_t min_alloc_size;
+
   st->oversample = quality_map[st->quality].oversample;
   st->filt_len = quality_map[st->quality].base_length;
   
@ -609,12 +618,14 @@ static void update_filter(SpeexResamplerState *st)
   }
   
   /* Choose the resampling type that requires the least amount of memory */
+#ifdef RESAMPLE_FULL_SINC_TABLE
+   if (1)
+#else
   if (st->filt_len*st->den_rate <= st->filt_len*st->oversample+8)
+#endif
   {
      spx_uint32_t i;
-      if (!st->sinc_table)
-         st->sinc_table = (spx_word16_t *)speex_alloc(st->filt_len*st->den_rate*sizeof(spx_word16_t));
-      else if (st->sinc_table_length < st->filt_len*st->den_rate)
+      if (st->sinc_table_length < st->filt_len*st->den_rate)
      {
         st->sinc_table = (spx_word16_t *)speex_realloc(st->sinc_table,st->filt_len*st->den_rate*sizeof(spx_word16_t));
         st->sinc_table_length = st->filt_len*st->den_rate;
@ -638,9 +649,7 @@ static void update_filter(SpeexResamplerState *st)
      /*fprintf (stderr, "resampler uses direct sinc table and normalised cutoff %f\n", cutoff);*/
   } else {
      spx_int32_t i;
-      if (!st->sinc_table)
-         st->sinc_table = (spx_word16_t *)speex_alloc((st->filt_len*st->oversample+8)*sizeof(spx_word16_t));
-      else if (st->sinc_table_length < st->filt_len*st->oversample+8)
+      if (st->sinc_table_length < st->filt_len*st->oversample+8)
      {
         st->sinc_table = (spx_word16_t *)speex_realloc(st->sinc_table,(st->filt_len*st->oversample+8)*sizeof(spx_word16_t));
         st->sinc_table_length = st->filt_len*st->oversample+8;
@ -664,36 +673,26 @@ static void update_filter(SpeexResamplerState *st)
   /* Here's the place where we update the filter memory to take into account
      the change in filter length. It's probably the messiest part of the code
      due to handling of lots of corner cases. */
-   if (!st->mem)
+   min_alloc_size = st->filt_len-1 + st->buffer_size;
+   if (min_alloc_size > st->mem_alloc_size)
+   {
+      st->mem = (spx_word16_t*)speex_realloc(st->mem, st->nb_channels*min_alloc_size * sizeof(spx_word16_t));
+      st->mem_alloc_size = min_alloc_size;
+   }
+   if (!st->started)
   {
      spx_uint32_t i;
-      st->mem_alloc_size = st->filt_len-1 + st->buffer_size;
-      st->mem = (spx_word16_t*)speex_alloc(st->nb_channels*st->mem_alloc_size * sizeof(spx_word16_t));
-      for (i=0;i<st->nb_channels*st->mem_alloc_size;i++)
-         st->mem[i] = 0;
-      /*speex_warning("init filter");*/
-   } else if (!st->started)
-   {
-      spx_uint32_t i;
-      st->mem_alloc_size = st->filt_len-1 + st->buffer_size;
-      st->mem = (spx_word16_t*)speex_realloc(st->mem, st->nb_channels*st->mem_alloc_size * sizeof(spx_word16_t));
      for (i=0;i<st->nb_channels*st->mem_alloc_size;i++)
         st->mem[i] = 0;
      /*speex_warning("reinit filter");*/
   } else if (st->filt_len > old_length)
   {
-      spx_int32_t i;
+      spx_uint32_t i;
      /* Increase the filter length */
      /*speex_warning("increase filter size");*/
-      int old_alloc_size = st->mem_alloc_size;
-      if ((st->filt_len-1 + st->buffer_size) > st->mem_alloc_size)
+      for (i=st->nb_channels;i--;)
      {
-         st->mem_alloc_size = st->filt_len-1 + st->buffer_size;
-         st->mem = (spx_word16_t*)speex_realloc(st->mem, st->nb_channels*st->mem_alloc_size * sizeof(spx_word16_t));
-      }
-      for (i=st->nb_channels-1;i>=0;i--)
-      {
-         spx_int32_t j;
+         spx_uint32_t j;
         spx_uint32_t olen = old_length;
         /*if (st->magic_samples[i])*/
         {
@ -701,7 +700,7 @@ static void update_filter(SpeexResamplerState *st)
            
            /* FIXME: This is wrong but for now we need it to avoid going over the array bounds */
            olen = old_length + 2*st->magic_samples[i];
-            for (j=old_length-2+st->magic_samples[i];j>=0;j--)
+            for (j=old_length-1+st->magic_samples[i];j--;)
               st->mem[i*st->mem_alloc_size+j+st->magic_samples[i]] = st->mem[i*old_alloc_size+j];
            for (j=0;j<st->magic_samples[i];j++)
               st->mem[i*st->mem_alloc_size+j] = 0;
--- a/media/libspeex_resampler/sse-detect-runtime.patch
+++ b/media/libspeex_resampler/sse-detect-runtime.patch
@ -1,5 +1,6 @@
--- a/src/resample.c	2014-07-01 17:25:53.999320032 +1200
-+++ b/src/resample.c	2014-07-01 17:42:18.822611775 +1200
+diff --git a/media/libspeex_resampler/src/resample.c b/media/libspeex_resampler/src/resample.c
+--- a/media/libspeex_resampler/src/resample.c
+++ b/media/libspeex_resampler/src/resample.c
@@ -90,18 +90,28 @@ static void speex_free (void *ptr) {free
                
 #define IMAX(a,b) ((a) > (b) ? (a) : (b))
@ -23,13 +24,13 @@
 +#endif
 #endif
 
+ #ifdef _USE_NEON
+ #include "resample_neon.h"
+ #endif
+ 
 /* Numer of elements to allocate on the stack */
 #ifdef VAR_ARRAYS
- #define FIXED_STACK_ALLOC 8192
- #else
- #define FIXED_STACK_ALLOC 1024
- #endif
-@@ -338,35 +348,39 @@ static int resampler_basic_direct_single
+@@ -342,17 +352,19 @@ static int resampler_basic_direct_single
    const spx_uint32_t den_rate = st->den_rate;
    spx_word32_t sum;
 
@ -40,9 +41,7 @@
 
 -#ifndef OVERRIDE_INNER_PRODUCT_SINGLE
 +#ifdef OVERRIDE_INNER_PRODUCT_SINGLE
-+    if (moz_has_sse()) {
-+      sum = inner_product_single(sinct, iptr, N);
-+    } else {
+      if (!moz_has_sse()) {
 +#endif
       int j;
       sum = 0;
@ -52,7 +51,7 @@
       Plus this this forces truncation to 32 bits and you lose the HW guard bits.
       I think we can trust the compiler and let it vectorize and/or unroll itself.
       spx_word32_t accum[4] = {0,0,0,0};
-       for(j=0;j<N;j+=4) {
+@@ -360,18 +372,20 @@ static int resampler_basic_direct_single
         accum[0] += MULT16_16(sinct[j], iptr[j]);
         accum[1] += MULT16_16(sinct[j+1], iptr[j+1]);
         accum[2] += MULT16_16(sinct[j+2], iptr[j+2]);
@ -60,19 +59,21 @@
       }
       sum = accum[0] + accum[1] + accum[2] + accum[3];
 */
+       sum = SATURATE32PSHR(sum, 15, 32767);
 -#else
-      sum = inner_product_single(sinct, iptr, N);
 +#ifdef OVERRIDE_INNER_PRODUCT_SINGLE
-+    }
+      } else {
+       sum = inner_product_single(sinct, iptr, N);
+      }
 #endif
 
-       out[out_stride * out_sample++] = SATURATE32(PSHR32(sum, 15), 32767);
+       out[out_stride * out_sample++] = sum;
       last_sample += int_advance;
       samp_frac_num += frac_advance;
       if (samp_frac_num >= den_rate)
       {
          samp_frac_num -= den_rate;
-@@ -395,29 +409,33 @@ static int resampler_basic_direct_double
+@@ -400,29 +414,33 @@ static int resampler_basic_direct_double
    const spx_uint32_t den_rate = st->den_rate;
    double sum;
 
@ -82,34 +83,24 @@
       const spx_word16_t *iptr = & in[last_sample];
 
 -#ifndef OVERRIDE_INNER_PRODUCT_DOUBLE
-      int j;
-      double accum[4] = {0,0,0,0};
-
-      for(j=0;j<N;j+=4) {
-        accum[0] += sinct[j]*iptr[j];
-        accum[1] += sinct[j+1]*iptr[j+1];
-        accum[2] += sinct[j+2]*iptr[j+2];
-        accum[3] += sinct[j+3]*iptr[j+3];
 +#ifdef OVERRIDE_INNER_PRODUCT_DOUBLE
 +      if(moz_has_sse2()) {
-+        sum = inner_product_double(sinct, iptr, N);
-+      } else {
 +#endif
-+        int j;
-+        double accum[4] = {0,0,0,0};
-+
-+        for(j=0;j<N;j+=4) {
-+          accum[0] += sinct[j]*iptr[j];
-+          accum[1] += sinct[j+1]*iptr[j+1];
-+          accum[2] += sinct[j+2]*iptr[j+2];
-+          accum[3] += sinct[j+3]*iptr[j+3];
-+        }
-+        sum = accum[0] + accum[1] + accum[2] + accum[3];
-+#ifdef OVERRIDE_INNER_PRODUCT_DOUBLE
+       int j;
+       double accum[4] = {0,0,0,0};
+ 
+       for(j=0;j<N;j+=4) {
+         accum[0] += sinct[j]*iptr[j];
+         accum[1] += sinct[j+1]*iptr[j+1];
+         accum[2] += sinct[j+2]*iptr[j+2];
+         accum[3] += sinct[j+3]*iptr[j+3];
       }
-      sum = accum[0] + accum[1] + accum[2] + accum[3];
+       sum = accum[0] + accum[1] + accum[2] + accum[3];
 -#else
-      sum = inner_product_double(sinct, iptr, N);
+#ifdef OVERRIDE_INNER_PRODUCT_DOUBLE
+      } else {
+       sum = inner_product_double(sinct, iptr, N);
+      }
 #endif
 
       out[out_stride * out_sample++] = PSHR32(sum, 15);
@ -118,7 +109,7 @@
       if (samp_frac_num >= den_rate)
       {
          samp_frac_num -= den_rate;
-@@ -451,35 +469,38 @@ static int resampler_basic_interpolate_s
+@@ -456,34 +474,38 @@ static int resampler_basic_interpolate_s
 #ifdef FIXED_POINT
       const spx_word16_t frac = PDIV32(SHL32((samp_frac_num*st->oversample) % st->den_rate,15),st->den_rate);
 #else
@ -128,53 +119,38 @@
 
 
 -#ifndef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE
-      int j;
-      spx_word32_t accum[4] = {0,0,0,0};
-
-      for(j=0;j<N;j++) {
-        const spx_word16_t curr_in=iptr[j];
-        accum[0] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-2]);
-        accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]);
-        accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]);
-        accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]);
 +#ifdef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE
-+      if (moz_has_sse()) {
-+        cubic_coef(frac, interp);
-+        sum = interpolate_product_single(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp);
-+      } else {
+      if (!moz_has_sse()) {
 +#endif
-+        int j;
-+        spx_word32_t accum[4] = {0,0,0,0};
-+
-+        for(j=0;j<N;j++) {
-+          const spx_word16_t curr_in=iptr[j];
-+          accum[0] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-2]);
-+          accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]);
-+          accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]);
-+          accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]);
-+        }
-+        cubic_coef(frac, interp);
-+        sum = MULT16_32_Q15(interp[0],SHR32(accum[0], 1)) + MULT16_32_Q15(interp[1],SHR32(accum[1], 1)) + MULT16_32_Q15(interp[2],SHR32(accum[2], 1)) + MULT16_32_Q15(interp[3],SHR32(accum[3], 1));
-+#ifdef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE
+       int j;
+       spx_word32_t accum[4] = {0,0,0,0};
+ 
+       for(j=0;j<N;j++) {
+         const spx_word16_t curr_in=iptr[j];
+         accum[0] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-2]);
+         accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]);
+         accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]);
+         accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]);
       }
-
-      cubic_coef(frac, interp);
-      sum = MULT16_32_Q15(interp[0],SHR32(accum[0], 1)) + MULT16_32_Q15(interp[1],SHR32(accum[1], 1)) + MULT16_32_Q15(interp[2],SHR32(accum[2], 1)) + MULT16_32_Q15(interp[3],SHR32(accum[3], 1));
+ 
+       cubic_coef(frac, interp);
+       sum = MULT16_32_Q15(interp[0],SHR32(accum[0], 1)) + MULT16_32_Q15(interp[1],SHR32(accum[1], 1)) + MULT16_32_Q15(interp[2],SHR32(accum[2], 1)) + MULT16_32_Q15(interp[3],SHR32(accum[3], 1));
+       sum = SATURATE32PSHR(sum, 15, 32767);
 -#else
-      cubic_coef(frac, interp);
-      sum = interpolate_product_single(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp);
+#ifdef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE
+      } else {
+       cubic_coef(frac, interp);
+       sum = interpolate_product_single(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp);
+      }
 #endif
-      
-+
-       out[out_stride * out_sample++] = SATURATE32(PSHR32(sum, 14), 32767);
+       
+       out[out_stride * out_sample++] = sum;
       last_sample += int_advance;
       samp_frac_num += frac_advance;
       if (samp_frac_num >= den_rate)
       {
          samp_frac_num -= den_rate;
-          last_sample++;
-       }
-@@ -513,35 +534,38 @@ static int resampler_basic_interpolate_d
+@@ -519,33 +541,37 @@ static int resampler_basic_interpolate_d
 #ifdef FIXED_POINT
       const spx_word16_t frac = PDIV32(SHL32((samp_frac_num*st->oversample) % st->den_rate,15),st->den_rate);
 #else
@ -185,10 +161,7 @@
 
 -#ifndef OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE
 +#ifdef OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE
-+      if (moz_has_sse2()) {
-+        cubic_coef(frac, interp);
-+        sum = interpolate_product_double(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp);
-+      } else {
+      if (!moz_has_sse2()) {
 +#endif
       int j;
       double accum[4] = {0,0,0,0};
@ -204,17 +177,16 @@
       cubic_coef(frac, interp);
       sum = MULT16_32_Q15(interp[0],accum[0]) + MULT16_32_Q15(interp[1],accum[1]) + MULT16_32_Q15(interp[2],accum[2]) + MULT16_32_Q15(interp[3],accum[3]);
 -#else
-      cubic_coef(frac, interp);
-      sum = interpolate_product_double(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp);
-+#ifdef OVERRIDE_INNER_PRODUCT_DOUBLE
+#ifdef OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE
+      } else {
+       cubic_coef(frac, interp);
+       sum = interpolate_product_double(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp);
 +      }
 #endif
-      
+       
       out[out_stride * out_sample++] = PSHR32(sum,15);
       last_sample += int_advance;
       samp_frac_num += frac_advance;
       if (samp_frac_num >= den_rate)
       {
          samp_frac_num -= den_rate;
-          last_sample++;
-       }
--- a/media/libspeex_resampler/update.sh
+++ b/media/libspeex_resampler/update.sh
@ -20,5 +20,5 @@ cp $1/COPYING .

 # apply outstanding local patches
 patch -p3 < outside-speex.patch
-patch -p1 < sse-detect-runtime.patch
+patch -p3 < sse-detect-runtime.patch
 patch -p3 < set-skip-frac.patch