mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-23 12:51:06 +00:00
Backed out 4 changesets (bug 1854912) for causing failures in sub-sample-scheduling.html
Backed out changeset 6d23b44c6fc0 (bug 1854912) Backed out changeset 3719cb3c102a (bug 1854912) Backed out changeset 7fe68331b4e9 (bug 1854912) Backed out changeset 1715d7aafa06 (bug 1854912)
This commit is contained in:
parent
26e0b0be0a
commit
99f938e4bd
@ -1,37 +1,22 @@
|
||||
diff --git a/src/resample.c b/src/resample.c
|
||||
--- a/src/resample.c
|
||||
+++ b/src/resample.c
|
||||
@@ -91,23 +91,17 @@ static void speex_free(void *ptr) {free(
|
||||
#ifndef NULL
|
||||
#define NULL 0
|
||||
@@ -94,13 +94,7 @@ static void speex_free (void *ptr) {free(ptr);}
|
||||
#define UINT32_MAX 4294967296U
|
||||
#endif
|
||||
|
||||
#ifndef UINT32_MAX
|
||||
#define UINT32_MAX 4294967295U
|
||||
#endif
|
||||
|
||||
-#ifdef USE_SSE
|
||||
-#ifdef _USE_SSE
|
||||
-#include "resample_sse.h"
|
||||
-#endif
|
||||
-
|
||||
-#ifdef USE_NEON
|
||||
-#ifdef _USE_NEON
|
||||
-#include "resample_neon.h"
|
||||
-#endif
|
||||
+#include "simd_detect.h"
|
||||
|
||||
/* Number of elements to allocate on the stack */
|
||||
/* Numer of elements to allocate on the stack */
|
||||
#ifdef VAR_ARRAYS
|
||||
#define FIXED_STACK_ALLOC 8192
|
||||
#else
|
||||
#define FIXED_STACK_ALLOC 1024
|
||||
#endif
|
||||
|
||||
@@ -341,17 +335,19 @@ static int resampler_basic_direct_single
|
||||
const spx_uint32_t den_rate = st->den_rate;
|
||||
spx_word32_t sum;
|
||||
|
||||
while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
|
||||
{
|
||||
@@ -346,7 +340,9 @@ static int resampler_basic_direct_single(SpeexResamplerState *st, spx_uint32_t c
|
||||
const spx_word16_t *sinct = & sinc_table[samp_frac_num*N];
|
||||
const spx_word16_t *iptr = & in[last_sample];
|
||||
|
||||
@ -42,17 +27,7 @@ diff --git a/src/resample.c b/src/resample.c
|
||||
int j;
|
||||
sum = 0;
|
||||
for(j=0;j<N;j++) sum += MULT16_16(sinct[j], iptr[j]);
|
||||
|
||||
/* This code is slower on most DSPs which have only 2 accumulators.
|
||||
Plus this this forces truncation to 32 bits and you lose the HW guard bits.
|
||||
I think we can trust the compiler and let it vectorize and/or unroll itself.
|
||||
spx_word32_t accum[4] = {0,0,0,0};
|
||||
@@ -359,18 +355,20 @@ static int resampler_basic_direct_single
|
||||
accum[0] += MULT16_16(sinct[j], iptr[j]);
|
||||
accum[1] += MULT16_16(sinct[j+1], iptr[j+1]);
|
||||
accum[2] += MULT16_16(sinct[j+2], iptr[j+2]);
|
||||
accum[3] += MULT16_16(sinct[j+3], iptr[j+3]);
|
||||
}
|
||||
@@ -364,8 +360,10 @@ static int resampler_basic_direct_single(SpeexResamplerState *st, spx_uint32_t c
|
||||
sum = accum[0] + accum[1] + accum[2] + accum[3];
|
||||
*/
|
||||
sum = SATURATE32PSHR(sum, 15, 32767);
|
||||
@ -64,17 +39,7 @@ diff --git a/src/resample.c b/src/resample.c
|
||||
#endif
|
||||
|
||||
out[out_stride * out_sample++] = sum;
|
||||
last_sample += int_advance;
|
||||
samp_frac_num += frac_advance;
|
||||
if (samp_frac_num >= den_rate)
|
||||
{
|
||||
samp_frac_num -= den_rate;
|
||||
@@ -399,29 +397,33 @@ static int resampler_basic_direct_double
|
||||
const spx_uint32_t den_rate = st->den_rate;
|
||||
double sum;
|
||||
|
||||
while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
|
||||
{
|
||||
@@ -404,7 +402,9 @@ static int resampler_basic_direct_double(SpeexResamplerState *st, spx_uint32_t c
|
||||
const spx_word16_t *sinct = & sinc_table[samp_frac_num*N];
|
||||
const spx_word16_t *iptr = & in[last_sample];
|
||||
|
||||
@ -85,10 +50,7 @@ diff --git a/src/resample.c b/src/resample.c
|
||||
int j;
|
||||
double accum[4] = {0,0,0,0};
|
||||
|
||||
for(j=0;j<N;j+=4) {
|
||||
accum[0] += sinct[j]*iptr[j];
|
||||
accum[1] += sinct[j+1]*iptr[j+1];
|
||||
accum[2] += sinct[j+2]*iptr[j+2];
|
||||
@@ -415,8 +415,10 @@ static int resampler_basic_direct_double(SpeexResamplerState *st, spx_uint32_t c
|
||||
accum[3] += sinct[j+3]*iptr[j+3];
|
||||
}
|
||||
sum = accum[0] + accum[1] + accum[2] + accum[3];
|
||||
@ -100,17 +62,7 @@ diff --git a/src/resample.c b/src/resample.c
|
||||
#endif
|
||||
|
||||
out[out_stride * out_sample++] = PSHR32(sum, 15);
|
||||
last_sample += int_advance;
|
||||
samp_frac_num += frac_advance;
|
||||
if (samp_frac_num >= den_rate)
|
||||
{
|
||||
samp_frac_num -= den_rate;
|
||||
@@ -455,34 +457,38 @@ static int resampler_basic_interpolate_s
|
||||
#ifdef FIXED_POINT
|
||||
const spx_word16_t frac = PDIV32(SHL32((samp_frac_num*st->oversample) % st->den_rate,15),st->den_rate);
|
||||
#else
|
||||
const spx_word16_t frac = ((float)((samp_frac_num*st->oversample) % st->den_rate))/st->den_rate;
|
||||
#endif
|
||||
@@ -460,7 +462,9 @@ static int resampler_basic_interpolate_single(SpeexResamplerState *st, spx_uint3
|
||||
spx_word16_t interp[4];
|
||||
|
||||
|
||||
@ -121,16 +73,9 @@ diff --git a/src/resample.c b/src/resample.c
|
||||
int j;
|
||||
spx_word32_t accum[4] = {0,0,0,0};
|
||||
|
||||
for(j=0;j<N;j++) {
|
||||
const spx_word16_t curr_in=iptr[j];
|
||||
accum[0] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-2]);
|
||||
accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]);
|
||||
accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]);
|
||||
accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]);
|
||||
}
|
||||
|
||||
@@ -475,9 +479,11 @@ static int resampler_basic_interpolate_single(SpeexResamplerState *st, spx_uint3
|
||||
cubic_coef(frac, interp);
|
||||
sum = MULT16_32_Q15(interp[0],accum[0]) + MULT16_32_Q15(interp[1],accum[1]) + MULT16_32_Q15(interp[2],accum[2]) + MULT16_32_Q15(interp[3],accum[3]);
|
||||
sum = MULT16_32_Q15(interp[0],SHR32(accum[0], 1)) + MULT16_32_Q15(interp[1],SHR32(accum[1], 1)) + MULT16_32_Q15(interp[2],SHR32(accum[2], 1)) + MULT16_32_Q15(interp[3],SHR32(accum[3], 1));
|
||||
sum = SATURATE32PSHR(sum, 15, 32767);
|
||||
-#else
|
||||
+#ifdef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE
|
||||
@ -141,17 +86,7 @@ diff --git a/src/resample.c b/src/resample.c
|
||||
#endif
|
||||
|
||||
out[out_stride * out_sample++] = sum;
|
||||
last_sample += int_advance;
|
||||
samp_frac_num += frac_advance;
|
||||
if (samp_frac_num >= den_rate)
|
||||
{
|
||||
samp_frac_num -= den_rate;
|
||||
@@ -518,33 +524,37 @@ static int resampler_basic_interpolate_d
|
||||
#ifdef FIXED_POINT
|
||||
const spx_word16_t frac = PDIV32(SHL32((samp_frac_num*st->oversample) % st->den_rate,15),st->den_rate);
|
||||
#else
|
||||
const spx_word16_t frac = ((float)((samp_frac_num*st->oversample) % st->den_rate))/st->den_rate;
|
||||
#endif
|
||||
@@ -523,7 +529,9 @@ static int resampler_basic_interpolate_double(SpeexResamplerState *st, spx_uint3
|
||||
spx_word16_t interp[4];
|
||||
|
||||
|
||||
@ -162,13 +97,7 @@ diff --git a/src/resample.c b/src/resample.c
|
||||
int j;
|
||||
double accum[4] = {0,0,0,0};
|
||||
|
||||
for(j=0;j<N;j++) {
|
||||
const double curr_in=iptr[j];
|
||||
accum[0] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-2]);
|
||||
accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]);
|
||||
accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]);
|
||||
accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]);
|
||||
}
|
||||
@@ -537,9 +545,11 @@ static int resampler_basic_interpolate_double(SpeexResamplerState *st, spx_uint3
|
||||
|
||||
cubic_coef(frac, interp);
|
||||
sum = MULT16_32_Q15(interp[0],accum[0]) + MULT16_32_Q15(interp[1],accum[1]) + MULT16_32_Q15(interp[2],accum[2]) + MULT16_32_Q15(interp[3],accum[3]);
|
||||
@ -181,15 +110,11 @@ diff --git a/src/resample.c b/src/resample.c
|
||||
#endif
|
||||
|
||||
out[out_stride * out_sample++] = PSHR32(sum,15);
|
||||
last_sample += int_advance;
|
||||
samp_frac_num += frac_advance;
|
||||
if (samp_frac_num >= den_rate)
|
||||
{
|
||||
samp_frac_num -= den_rate;
|
||||
diff --git a/src/resample_neon.c b/src/resample_neon.c
|
||||
--- a/src/resample_neon.c
|
||||
+++ b/src/resample_neon.c
|
||||
@@ -32,16 +32,17 @@
|
||||
@@ -31,16 +31,18 @@
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
@ -197,46 +122,27 @@ diff --git a/src/resample_neon.c b/src/resample_neon.c
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
+#include "simd_detect.h"
|
||||
+
|
||||
#include <arm_neon.h>
|
||||
|
||||
#ifdef FIXED_POINT
|
||||
#if defined(__aarch64__)
|
||||
#ifdef __thumb2__
|
||||
static inline int32_t saturate_32bit_to_16bit(int32_t a) {
|
||||
int32_t ret;
|
||||
asm ("fmov s0, %w[a]\n"
|
||||
"sqxtn h0, s0\n"
|
||||
"sxtl v0.4s, v0.4h\n"
|
||||
@@ -73,17 +74,17 @@
|
||||
asm ("ssat %[ret], #16, %[a]"
|
||||
: [ret] "=&r" (ret)
|
||||
@@ -60,17 +62,17 @@ static inline int32_t saturate_32bit_to_
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
#undef WORD2INT
|
||||
#define WORD2INT(x) (saturate_32bit_to_16bit(x))
|
||||
|
||||
#define OVERRIDE_INNER_PRODUCT_SINGLE
|
||||
/* Only works when len % 4 == 0 and len >= 4 */
|
||||
#if defined(__aarch64__)
|
||||
/* Only works when len % 4 == 0 */
|
||||
-static inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
|
||||
+inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
|
||||
{
|
||||
int32_t ret;
|
||||
uint32_t remainder = len % 16;
|
||||
len = len - remainder;
|
||||
|
||||
asm volatile (" cmp %w[len], #0\n"
|
||||
" b.ne 1f\n"
|
||||
" ld1 {v16.4h}, [%[b]], #8\n"
|
||||
@@ -128,17 +129,17 @@
|
||||
: [ret] "=r" (ret), [a] "+r" (a), [b] "+r" (b),
|
||||
[len] "+r" (len), [remainder] "+r" (remainder)
|
||||
:
|
||||
: "cc", "v0",
|
||||
"v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23");
|
||||
return ret;
|
||||
}
|
||||
#else
|
||||
-static inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
|
||||
+inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
|
||||
+int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
|
||||
{
|
||||
int32_t ret;
|
||||
uint32_t remainder = len % 16;
|
||||
@ -245,36 +151,17 @@ diff --git a/src/resample_neon.c b/src/resample_neon.c
|
||||
asm volatile (" cmp %[len], #0\n"
|
||||
" bne 1f\n"
|
||||
" vld1.16 {d16}, [%[b]]!\n"
|
||||
@@ -218,17 +219,17 @@
|
||||
#endif
|
||||
|
||||
@@ -134,17 +136,17 @@ static inline int32_t saturate_float_to_
|
||||
: "q0");
|
||||
return ret;
|
||||
}
|
||||
#undef WORD2INT
|
||||
#define WORD2INT(x) (saturate_float_to_16bit(x))
|
||||
|
||||
#define OVERRIDE_INNER_PRODUCT_SINGLE
|
||||
/* Only works when len % 4 == 0 and len >= 4 */
|
||||
#if defined(__aarch64__)
|
||||
/* Only works when len % 4 == 0 */
|
||||
-static inline float inner_product_single(const float *a, const float *b, unsigned int len)
|
||||
+inline float inner_product_single(const float *a, const float *b, unsigned int len)
|
||||
{
|
||||
float ret;
|
||||
uint32_t remainder = len % 16;
|
||||
len = len - remainder;
|
||||
|
||||
asm volatile (" cmp %w[len], #0\n"
|
||||
" b.ne 1f\n"
|
||||
" ld1 {v16.4s}, [%[b]], #16\n"
|
||||
@@ -273,17 +274,17 @@
|
||||
: [ret] "=w" (ret), [a] "+r" (a), [b] "+r" (b),
|
||||
[len] "+r" (len), [remainder] "+r" (remainder)
|
||||
:
|
||||
: "cc", "v1", "v2", "v3", "v4",
|
||||
"v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23");
|
||||
return ret;
|
||||
}
|
||||
#else
|
||||
-static inline float inner_product_single(const float *a, const float *b, unsigned int len)
|
||||
+inline float inner_product_single(const float *a, const float *b, unsigned int len)
|
||||
+float inner_product_single(const float *a, const float *b, unsigned int len)
|
||||
{
|
||||
float ret;
|
||||
uint32_t remainder = len % 16;
|
||||
@ -333,7 +220,7 @@ diff --git a/src/resample_sse.c b/src/resample_sse.c
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef USE_SSE2
|
||||
#ifdef _USE_SSE2
|
||||
#include <emmintrin.h>
|
||||
#define OVERRIDE_INNER_PRODUCT_DOUBLE
|
||||
|
||||
|
@ -14,13 +14,13 @@ diff --git a/src/resample.c b/src/resample.c
|
||||
+
|
||||
#ifdef OUTSIDE_SPEEX
|
||||
#include <stdlib.h>
|
||||
static void *speex_alloc(int size) {return calloc(size,1);}
|
||||
static void *speex_realloc(void *ptr, int size) {return realloc(ptr, size);}
|
||||
static void speex_free(void *ptr) {free(ptr);}
|
||||
#ifndef EXPORT
|
||||
#define EXPORT
|
||||
#endif
|
||||
@@ -633,25 +645,26 @@ static int update_filter(SpeexResamplerS
|
||||
static void *speex_alloc (int size) {return calloc(size,1);}
|
||||
static void *speex_realloc (void *ptr, int size) {return realloc(ptr, size);}
|
||||
static void speex_free (void *ptr) {free(ptr);}
|
||||
#include "speex_resampler.h"
|
||||
#include "arch.h"
|
||||
#else /* OUTSIDE_SPEEX */
|
||||
@@ -643,25 +645,26 @@ static int update_filter(SpeexResamplerS
|
||||
st->oversample >>= 1;
|
||||
if (st->oversample < 1)
|
||||
st->oversample = 1;
|
||||
@ -29,24 +29,23 @@ diff --git a/src/resample.c b/src/resample.c
|
||||
st->cutoff = quality_map[st->quality].upsample_bandwidth;
|
||||
}
|
||||
|
||||
- /* Choose the resampling type that requires the least amount of memory */
|
||||
-#ifdef RESAMPLE_FULL_SINC_TABLE
|
||||
- use_direct = 1;
|
||||
- if (INT_MAX/sizeof(spx_word16_t)/st->den_rate < st->filt_len)
|
||||
- goto fail;
|
||||
-#else
|
||||
- /* Choose the resampling type that requires the least amount of memory */
|
||||
- use_direct = st->filt_len*st->den_rate <= st->filt_len*st->oversample+8
|
||||
- && INT_MAX/sizeof(spx_word16_t)/st->den_rate >= st->filt_len;
|
||||
+ use_direct =
|
||||
+#ifdef RESAMPLE_HUGEMEM
|
||||
+ /* Choose the direct resampler, even with higher initialization costs,
|
||||
+ when resampling any multiple of 100 to 44100. */
|
||||
+ st->den_rate <= 441
|
||||
+#else
|
||||
#else
|
||||
- use_direct = st->filt_len*st->den_rate <= st->filt_len*st->oversample+8
|
||||
+ /* Choose the resampling type that requires the least amount of memory */
|
||||
+ st->filt_len*st->den_rate <= st->filt_len*st->oversample+8
|
||||
#endif
|
||||
+ && INT_MAX/sizeof(spx_word16_t)/st->den_rate >= st->filt_len;
|
||||
+#endif
|
||||
&& INT_MAX/sizeof(spx_word16_t)/st->den_rate >= st->filt_len;
|
||||
-#endif
|
||||
if (use_direct)
|
||||
{
|
||||
min_sinc_table_length = st->filt_len*st->den_rate;
|
||||
@ -55,3 +54,4 @@ diff --git a/src/resample.c b/src/resample.c
|
||||
goto fail;
|
||||
|
||||
min_sinc_table_length = st->filt_len*st->oversample+8;
|
||||
|
||||
|
33
media/libspeex_resampler/05_remove-empty-asm-clobber.patch
Normal file
33
media/libspeex_resampler/05_remove-empty-asm-clobber.patch
Normal file
@ -0,0 +1,33 @@
|
||||
https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#Extended-Asm says
|
||||
|
||||
asm [volatile] ( AssemblerTemplate : [OutputOperands] [ : [InputOperands] [ : [Clobbers] ] ] )
|
||||
|
||||
which implies that Clobbers is optional even after the third colon, but
|
||||
the gcc used for b2g_try_emulator_dep builds says
|
||||
|
||||
resample_neon.c: In function 'saturate_32bit_to_16bit':
|
||||
resample_neon.c:50: error: expected string literal before ')' token
|
||||
|
||||
diff --git a/src/resample_neon.c b/src/resample_neon.c
|
||||
--- a/src/resample_neon.c
|
||||
+++ b/src/resample_neon.c
|
||||
@@ -41,18 +41,17 @@
|
||||
#include <arm_neon.h>
|
||||
|
||||
#ifdef FIXED_POINT
|
||||
#ifdef __thumb2__
|
||||
static inline int32_t saturate_32bit_to_16bit(int32_t a) {
|
||||
int32_t ret;
|
||||
asm ("ssat %[ret], #16, %[a]"
|
||||
: [ret] "=&r" (ret)
|
||||
- : [a] "r" (a)
|
||||
- : );
|
||||
+ : [a] "r" (a));
|
||||
return ret;
|
||||
}
|
||||
#else
|
||||
static inline int32_t saturate_32bit_to_16bit(int32_t a) {
|
||||
int32_t ret;
|
||||
asm ("vmov.s32 d0[0], %[a]\n"
|
||||
"vqmovn.s32 d0, q0\n"
|
||||
"vmov.s16 %[ret], d0[0]\n"
|
@ -1,9 +1,7 @@
|
||||
This is a fix for https://bugzilla.mozilla.org/show_bug.cgi?id=1274083
|
||||
|
||||
diff --git a/src/resample.c b/src/resample.c
|
||||
--- a/src/resample.c
|
||||
+++ b/src/resample.c
|
||||
@@ -1129,18 +1129,19 @@ EXPORT int speex_resampler_set_rate_frac
|
||||
@@ -1141,18 +1141,19 @@ EXPORT int speex_resampler_set_rate_frac
|
||||
|
||||
st->num_rate /= fact;
|
||||
st->den_rate /= fact;
|
||||
@ -12,10 +10,10 @@ diff --git a/src/resample.c b/src/resample.c
|
||||
{
|
||||
for (i=0;i<st->nb_channels;i++)
|
||||
{
|
||||
- if (multiply_frac(&st->samp_frac_num[i],st->samp_frac_num[i],st->den_rate,old_den) != RESAMPLER_ERR_SUCCESS)
|
||||
- if (_muldiv(&st->samp_frac_num[i],st->samp_frac_num[i],st->den_rate,old_den) != RESAMPLER_ERR_SUCCESS)
|
||||
- return RESAMPLER_ERR_OVERFLOW;
|
||||
+ if (multiply_frac(&st->samp_frac_num[i],st->samp_frac_num[i],st->den_rate,old_den) != RESAMPLER_ERR_SUCCESS) {
|
||||
+ st->samp_frac_num[i] = st->den_rate-1;
|
||||
+ if (_muldiv(&st->samp_frac_num[i],st->samp_frac_num[i],st->den_rate,old_den) != RESAMPLER_ERR_SUCCESS) {
|
||||
+ st->samp_frac_num[i] = st->den_rate-1;
|
||||
+ }
|
||||
/* Safety net */
|
||||
if (st->samp_frac_num[i] >= st->den_rate)
|
95
media/libspeex_resampler/07_integer-halving.patch
Normal file
95
media/libspeex_resampler/07_integer-halving.patch
Normal file
@ -0,0 +1,95 @@
|
||||
diff --git a/src/arch.h b/src/arch.h
|
||||
--- a/src/arch.h
|
||||
+++ b/src/arch.h
|
||||
@@ -172,26 +172,23 @@ typedef float spx_word32_t;
|
||||
#define SHL(a,shift) (a)
|
||||
#define SATURATE(x,a) (x)
|
||||
|
||||
#define ADD16(a,b) ((a)+(b))
|
||||
#define SUB16(a,b) ((a)-(b))
|
||||
#define ADD32(a,b) ((a)+(b))
|
||||
#define SUB32(a,b) ((a)-(b))
|
||||
#define MULT16_16_16(a,b) ((a)*(b))
|
||||
+#define MULT16_32_32(a,b) ((a)*(b))
|
||||
#define MULT16_16(a,b) ((spx_word32_t)(a)*(spx_word32_t)(b))
|
||||
#define MAC16_16(c,a,b) ((c)+(spx_word32_t)(a)*(spx_word32_t)(b))
|
||||
|
||||
-#define MULT16_32_Q11(a,b) ((a)*(b))
|
||||
-#define MULT16_32_Q13(a,b) ((a)*(b))
|
||||
-#define MULT16_32_Q14(a,b) ((a)*(b))
|
||||
#define MULT16_32_Q15(a,b) ((a)*(b))
|
||||
#define MULT16_32_P15(a,b) ((a)*(b))
|
||||
|
||||
-#define MAC16_32_Q11(c,a,b) ((c)+(a)*(b))
|
||||
#define MAC16_32_Q15(c,a,b) ((c)+(a)*(b))
|
||||
|
||||
#define MAC16_16_Q11(c,a,b) ((c)+(a)*(b))
|
||||
#define MAC16_16_Q13(c,a,b) ((c)+(a)*(b))
|
||||
#define MAC16_16_P13(c,a,b) ((c)+(a)*(b))
|
||||
#define MULT16_16_Q11_32(a,b) ((a)*(b))
|
||||
#define MULT16_16_Q13(a,b) ((a)*(b))
|
||||
#define MULT16_16_Q14(a,b) ((a)*(b))
|
||||
diff --git a/src/fixed_generic.h b/src/fixed_generic.h
|
||||
--- a/src/fixed_generic.h
|
||||
+++ b/src/fixed_generic.h
|
||||
@@ -64,32 +64,27 @@
|
||||
|
||||
#define ADD16(a,b) ((spx_word16_t)((spx_word16_t)(a)+(spx_word16_t)(b)))
|
||||
#define SUB16(a,b) ((spx_word16_t)(a)-(spx_word16_t)(b))
|
||||
#define ADD32(a,b) ((spx_word32_t)(a)+(spx_word32_t)(b))
|
||||
#define SUB32(a,b) ((spx_word32_t)(a)-(spx_word32_t)(b))
|
||||
|
||||
|
||||
/* result fits in 16 bits */
|
||||
-#define MULT16_16_16(a,b) ((((spx_word16_t)(a))*((spx_word16_t)(b))))
|
||||
+#define MULT16_16_16(a,b) (((spx_word16_t)(a))*((spx_word16_t)(b)))
|
||||
+/* result fits in 32 bits */
|
||||
+#define MULT16_32_32(a,b) (((spx_word16_t)(a))*((spx_word32_t)(b)))
|
||||
|
||||
/* (spx_word32_t)(spx_word16_t) gives TI compiler a hint that it's 16x16->32 multiply */
|
||||
#define MULT16_16(a,b) (((spx_word32_t)(spx_word16_t)(a))*((spx_word32_t)(spx_word16_t)(b)))
|
||||
|
||||
#define MAC16_16(c,a,b) (ADD32((c),MULT16_16((a),(b))))
|
||||
-#define MULT16_32_Q12(a,b) ADD32(MULT16_16((a),SHR((b),12)), SHR(MULT16_16((a),((b)&0x00000fff)),12))
|
||||
-#define MULT16_32_Q13(a,b) ADD32(MULT16_16((a),SHR((b),13)), SHR(MULT16_16((a),((b)&0x00001fff)),13))
|
||||
-#define MULT16_32_Q14(a,b) ADD32(MULT16_16((a),SHR((b),14)), SHR(MULT16_16((a),((b)&0x00003fff)),14))
|
||||
-
|
||||
-#define MULT16_32_Q11(a,b) ADD32(MULT16_16((a),SHR((b),11)), SHR(MULT16_16((a),((b)&0x000007ff)),11))
|
||||
-#define MAC16_32_Q11(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),11)), SHR(MULT16_16((a),((b)&0x000007ff)),11)))
|
||||
-
|
||||
-#define MULT16_32_P15(a,b) ADD32(MULT16_16((a),SHR((b),15)), PSHR(MULT16_16((a),((b)&0x00007fff)),15))
|
||||
-#define MULT16_32_Q15(a,b) ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))
|
||||
-#define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15)))
|
||||
+#define MULT16_32_P15(a,b) ADD32(MULT16_32_32(a,SHR((b),15)), PSHR(MULT16_16((a),((b)&0x00007fff)),15))
|
||||
+#define MULT16_32_Q15(a,b) ADD32(MULT16_32_32(a,SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))
|
||||
+#define MAC16_32_Q15(c,a,b) ADD32(c,MULT16_32_Q15(a,b))
|
||||
|
||||
|
||||
#define MAC16_16_Q11(c,a,b) (ADD32((c),SHR(MULT16_16((a),(b)),11)))
|
||||
#define MAC16_16_Q13(c,a,b) (ADD32((c),SHR(MULT16_16((a),(b)),13)))
|
||||
#define MAC16_16_P13(c,a,b) (ADD32((c),SHR(ADD32(4096,MULT16_16((a),(b))),13)))
|
||||
|
||||
#define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11))
|
||||
#define MULT16_16_Q13(a,b) (SHR(MULT16_16((a),(b)),13))
|
||||
diff --git a/src/resample.c b/src/resample.c
|
||||
--- a/src/resample.c
|
||||
+++ b/src/resample.c
|
||||
@@ -474,17 +474,17 @@ static int resampler_basic_interpolate_s
|
||||
const spx_word16_t curr_in=iptr[j];
|
||||
accum[0] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-2]);
|
||||
accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]);
|
||||
accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]);
|
||||
accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]);
|
||||
}
|
||||
|
||||
cubic_coef(frac, interp);
|
||||
- sum = MULT16_32_Q15(interp[0],SHR32(accum[0], 1)) + MULT16_32_Q15(interp[1],SHR32(accum[1], 1)) + MULT16_32_Q15(interp[2],SHR32(accum[2], 1)) + MULT16_32_Q15(interp[3],SHR32(accum[3], 1));
|
||||
+ sum = MULT16_32_Q15(interp[0],accum[0]) + MULT16_32_Q15(interp[1],accum[1]) + MULT16_32_Q15(interp[2],accum[2]) + MULT16_32_Q15(interp[3],accum[3]);
|
||||
sum = SATURATE32PSHR(sum, 15, 32767);
|
||||
#ifdef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE
|
||||
} else {
|
||||
cubic_coef(frac, interp);
|
||||
sum = interpolate_product_single(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp);
|
||||
}
|
||||
#endif
|
||||
|
@ -1,7 +1,7 @@
|
||||
Copyright 2002-2008 Xiph.org Foundation
|
||||
Copyright 2002-2008 Jean-Marc Valin
|
||||
Copyright 2005-2007 Analog Devices Inc.
|
||||
Copyright 2005-2008 Commonwealth Scientific and Industrial Research
|
||||
Copyright 2005-2008 Commonwealth Scientific and Industrial Research
|
||||
Organisation (CSIRO)
|
||||
Copyright 1993, 2002, 2006 David Rowe
|
||||
Copyright 2003 EpicGames
|
||||
|
@ -10,8 +10,8 @@ origin:
|
||||
|
||||
url: https://speex.org/
|
||||
|
||||
release: 738e17905e1ca2a1fa932ddd9c2a85d089f4e845 (2023-02-28T16:25:59.000-05:00).
|
||||
revision: 738e17905e1ca2a1fa932ddd9c2a85d089f4e845
|
||||
release: 79822c8fb79d202cbe7e899f7406acf17beb247c (2017-01-22T14:16:02.000-05:00).
|
||||
revision: 79822c8fb79d202cbe7e899f7406acf17beb247c
|
||||
|
||||
license: BSD-3-Clause
|
||||
license-file: COPYING
|
||||
@ -35,6 +35,7 @@ vendoring:
|
||||
- libspeexdsp/resample_sse.h
|
||||
- libspeexdsp/resample_neon.h
|
||||
- libspeexdsp/arch.h
|
||||
- libspeexdsp/stack_alloc.h
|
||||
- libspeexdsp/fixed_generic.h
|
||||
- include/speex/speex_resampler.h
|
||||
- AUTHORS
|
||||
@ -57,6 +58,9 @@ vendoring:
|
||||
- action: move-file
|
||||
from: '{vendor_dir}/libspeexdsp/arch.h'
|
||||
to: '{vendor_dir}/src/arch.h'
|
||||
- action: move-file
|
||||
from: '{vendor_dir}/libspeexdsp/stack_alloc.h'
|
||||
to: '{vendor_dir}/src/stack_alloc.h'
|
||||
- action: move-file
|
||||
from: '{vendor_dir}/libspeexdsp/fixed_generic.h'
|
||||
to: '{vendor_dir}/src/fixed_generic.h'
|
||||
@ -69,4 +73,6 @@ vendoring:
|
||||
- 02_simd-detect-runtime.patch
|
||||
- 03_set-skip-frac.patch
|
||||
- 04_hugemem.patch
|
||||
- 05_set-rate-overflow-no-return.patch
|
||||
- 05_remove-empty-asm-clobber.patch
|
||||
- 06_set-rate-overflow-no-return.patch
|
||||
- 07_integer-halving.patch
|
||||
|
@ -41,10 +41,10 @@
|
||||
#ifdef FLOATING_POINT
|
||||
#error You cannot compile as floating point and fixed point at the same time
|
||||
#endif
|
||||
#ifdef USE_SSE
|
||||
#ifdef _USE_SSE
|
||||
#error SSE is only for floating-point
|
||||
#endif
|
||||
#if defined(ARM4_ASM) + defined(ARM5E_ASM) + defined(BFIN_ASM) > 1
|
||||
#if ((defined (ARM4_ASM)||defined (ARM4_ASM)) && defined(BFIN_ASM)) || (defined (ARM4_ASM)&&defined(ARM5E_ASM))
|
||||
#error Make up your mind. What CPU do you have?
|
||||
#endif
|
||||
#ifdef VORBIS_PSYCHO
|
||||
@ -56,10 +56,10 @@
|
||||
#ifndef FLOATING_POINT
|
||||
#error You now need to define either FIXED_POINT or FLOATING_POINT
|
||||
#endif
|
||||
#if defined(ARM4_ASM) || defined(ARM5E_ASM) || defined(BFIN_ASM)
|
||||
#if defined (ARM4_ASM) || defined(ARM5E_ASM) || defined(BFIN_ASM)
|
||||
#error I suppose you can have a [ARM4/ARM5E/Blackfin] that has float instructions?
|
||||
#endif
|
||||
#ifdef FIXED_DEBUG
|
||||
#ifdef FIXED_POINT_DEBUG
|
||||
#error "Don't you think enabling fixed-point is a good thing to do if you want to debug that?"
|
||||
#endif
|
||||
|
||||
@ -117,9 +117,9 @@ typedef spx_word32_t spx_sig_t;
|
||||
|
||||
#ifdef ARM5E_ASM
|
||||
#include "fixed_arm5e.h"
|
||||
#elif defined(ARM4_ASM)
|
||||
#elif defined (ARM4_ASM)
|
||||
#include "fixed_arm4.h"
|
||||
#elif defined(BFIN_ASM)
|
||||
#elif defined (BFIN_ASM)
|
||||
#include "fixed_bfin.h"
|
||||
#endif
|
||||
|
||||
@ -207,7 +207,7 @@ typedef float spx_word32_t;
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(CONFIG_TI_C54X) || defined(CONFIG_TI_C55X)
|
||||
#if defined (CONFIG_TI_C54X) || defined (CONFIG_TI_C55X)
|
||||
|
||||
/* 2 on TI C5x DSP */
|
||||
#define BYTES_PER_CHAR 2
|
||||
|
@ -77,7 +77,6 @@
|
||||
#define MULT16_16(a,b) (((spx_word32_t)(spx_word16_t)(a))*((spx_word32_t)(spx_word16_t)(b)))
|
||||
|
||||
#define MAC16_16(c,a,b) (ADD32((c),MULT16_16((a),(b))))
|
||||
|
||||
#define MULT16_32_P15(a,b) ADD32(MULT16_32_32(a,SHR((b),15)), PSHR(MULT16_16((a),((b)&0x00007fff)),15))
|
||||
#define MULT16_32_Q15(a,b) ADD32(MULT16_32_32(a,SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))
|
||||
#define MAC16_32_Q15(c,a,b) ADD32(c,MULT16_32_Q15(a,b))
|
||||
|
@ -28,15 +28,15 @@ DEFINES['FLOATING_POINT'] = True
|
||||
|
||||
# Only use SSE code when using floating point samples, and on x86
|
||||
if CONFIG['INTEL_ARCHITECTURE']:
|
||||
DEFINES['USE_SSE'] = True
|
||||
DEFINES['USE_SSE2'] = True
|
||||
DEFINES['_USE_SSE'] = True
|
||||
DEFINES['_USE_SSE2'] = True
|
||||
SOURCES += [
|
||||
'resample_sse.c'
|
||||
]
|
||||
SOURCES['resample_sse.c'].flags += CONFIG['SSE2_FLAGS']
|
||||
|
||||
if CONFIG['CPU_ARCH'] == 'arm' and CONFIG['BUILD_ARM_NEON']:
|
||||
DEFINES['USE_NEON'] = True
|
||||
DEFINES['_USE_NEON'] = True
|
||||
SOURCES += [
|
||||
'resample_neon.c'
|
||||
]
|
||||
|
@ -46,7 +46,7 @@
|
||||
Smith, Julius O. Digital Audio Resampling Home Page
|
||||
Center for Computer Research in Music and Acoustics (CCRMA),
|
||||
Stanford University, 2007.
|
||||
Web published at https://ccrma.stanford.edu/~jos/resample/.
|
||||
Web published at http://ccrma.stanford.edu/~jos/resample/.
|
||||
|
||||
There is one main difference, though. This resampler uses cubic
|
||||
interpolation instead of linear interpolation in the above paper. This
|
||||
@ -65,12 +65,9 @@
|
||||
|
||||
#ifdef OUTSIDE_SPEEX
|
||||
#include <stdlib.h>
|
||||
static void *speex_alloc(int size) {return calloc(size,1);}
|
||||
static void *speex_realloc(void *ptr, int size) {return realloc(ptr, size);}
|
||||
static void speex_free(void *ptr) {free(ptr);}
|
||||
#ifndef EXPORT
|
||||
#define EXPORT
|
||||
#endif
|
||||
static void *speex_alloc (int size) {return calloc(size,1);}
|
||||
static void *speex_realloc (void *ptr, int size) {return realloc(ptr, size);}
|
||||
static void speex_free (void *ptr) {free(ptr);}
|
||||
#include "speex_resampler.h"
|
||||
#include "arch.h"
|
||||
#else /* OUTSIDE_SPEEX */
|
||||
@ -80,6 +77,7 @@ static void speex_free(void *ptr) {free(ptr);}
|
||||
#include "os_support.h"
|
||||
#endif /* OUTSIDE_SPEEX */
|
||||
|
||||
#include "stack_alloc.h"
|
||||
#include <math.h>
|
||||
#include <limits.h>
|
||||
|
||||
@ -95,12 +93,12 @@ static void speex_free(void *ptr) {free(ptr);}
|
||||
#endif
|
||||
|
||||
#ifndef UINT32_MAX
|
||||
#define UINT32_MAX 4294967295U
|
||||
#define UINT32_MAX 4294967296U
|
||||
#endif
|
||||
|
||||
#include "simd_detect.h"
|
||||
|
||||
/* Number of elements to allocate on the stack */
|
||||
/* Numer of elements to allocate on the stack */
|
||||
#ifdef VAR_ARRAYS
|
||||
#define FIXED_STACK_ALLOC 8192
|
||||
#else
|
||||
@ -192,14 +190,16 @@ struct FuncDef {
|
||||
int oversample;
|
||||
};
|
||||
|
||||
static const struct FuncDef kaiser12_funcdef = {kaiser12_table, 64};
|
||||
#define KAISER12 (&kaiser12_funcdef)
|
||||
static const struct FuncDef kaiser10_funcdef = {kaiser10_table, 32};
|
||||
#define KAISER10 (&kaiser10_funcdef)
|
||||
static const struct FuncDef kaiser8_funcdef = {kaiser8_table, 32};
|
||||
#define KAISER8 (&kaiser8_funcdef)
|
||||
static const struct FuncDef kaiser6_funcdef = {kaiser6_table, 32};
|
||||
#define KAISER6 (&kaiser6_funcdef)
|
||||
static const struct FuncDef _KAISER12 = {kaiser12_table, 64};
|
||||
#define KAISER12 (&_KAISER12)
|
||||
/*static struct FuncDef _KAISER12 = {kaiser12_table, 32};
|
||||
#define KAISER12 (&_KAISER12)*/
|
||||
static const struct FuncDef _KAISER10 = {kaiser10_table, 32};
|
||||
#define KAISER10 (&_KAISER10)
|
||||
static const struct FuncDef _KAISER8 = {kaiser8_table, 32};
|
||||
#define KAISER8 (&_KAISER8)
|
||||
static const struct FuncDef _KAISER6 = {kaiser6_table, 32};
|
||||
#define KAISER6 (&_KAISER6)
|
||||
|
||||
struct QualityMapping {
|
||||
int base_length;
|
||||
@ -584,7 +584,6 @@ static int resampler_basic_zero(SpeexResamplerState *st, spx_uint32_t channel_in
|
||||
const int frac_advance = st->frac_advance;
|
||||
const spx_uint32_t den_rate = st->den_rate;
|
||||
|
||||
(void)in;
|
||||
while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
|
||||
{
|
||||
out[out_stride * out_sample++] = 0;
|
||||
@ -602,15 +601,16 @@ static int resampler_basic_zero(SpeexResamplerState *st, spx_uint32_t channel_in
|
||||
return out_sample;
|
||||
}
|
||||
|
||||
static int multiply_frac(spx_uint32_t *result, spx_uint32_t value, spx_uint32_t num, spx_uint32_t den)
|
||||
static int _muldiv(spx_uint32_t *result, spx_uint32_t value, spx_uint32_t mul, spx_uint32_t div)
|
||||
{
|
||||
spx_uint32_t major = value / den;
|
||||
spx_uint32_t remain = value % den;
|
||||
speex_assert(result);
|
||||
spx_uint32_t major = value / div;
|
||||
spx_uint32_t remainder = value % div;
|
||||
/* TODO: Could use 64 bits operation to check for overflow. But only guaranteed in C99+ */
|
||||
if (remain > UINT32_MAX / num || major > UINT32_MAX / num
|
||||
|| major * num > UINT32_MAX - remain * num / den)
|
||||
if (remainder > UINT32_MAX / mul || major > UINT32_MAX / mul
|
||||
|| major * mul > UINT32_MAX - remainder * mul / div)
|
||||
return RESAMPLER_ERR_OVERFLOW;
|
||||
*result = remain * num / den + major * num;
|
||||
*result = remainder * mul / div + major * mul;
|
||||
return RESAMPLER_ERR_SUCCESS;
|
||||
}
|
||||
|
||||
@ -631,7 +631,7 @@ static int update_filter(SpeexResamplerState *st)
|
||||
{
|
||||
/* down-sampling */
|
||||
st->cutoff = quality_map[st->quality].downsample_bandwidth * st->den_rate / st->num_rate;
|
||||
if (multiply_frac(&st->filt_len,st->filt_len,st->num_rate,st->den_rate) != RESAMPLER_ERR_SUCCESS)
|
||||
if (_muldiv(&st->filt_len,st->filt_len,st->num_rate,st->den_rate) != RESAMPLER_ERR_SUCCESS)
|
||||
goto fail;
|
||||
/* Round up to make sure we have a multiple of 8 for SSE */
|
||||
st->filt_len = ((st->filt_len-1)&(~0x7))+8;
|
||||
@ -746,18 +746,16 @@ static int update_filter(SpeexResamplerState *st)
|
||||
{
|
||||
spx_uint32_t j;
|
||||
spx_uint32_t olen = old_length;
|
||||
spx_uint32_t start = i*st->mem_alloc_size;
|
||||
spx_uint32_t magic_samples = st->magic_samples[i];
|
||||
/*if (st->magic_samples[i])*/
|
||||
{
|
||||
/* Try and remove the magic samples as if nothing had happened */
|
||||
|
||||
/* FIXME: This is wrong but for now we need it to avoid going over the array bounds */
|
||||
olen = old_length + 2*magic_samples;
|
||||
for (j=old_length-1+magic_samples;j--;)
|
||||
st->mem[start+j+magic_samples] = st->mem[i*old_alloc_size+j];
|
||||
for (j=0;j<magic_samples;j++)
|
||||
st->mem[start+j] = 0;
|
||||
olen = old_length + 2*st->magic_samples[i];
|
||||
for (j=old_length-1+st->magic_samples[i];j--;)
|
||||
st->mem[i*st->mem_alloc_size+j+st->magic_samples[i]] = st->mem[i*old_alloc_size+j];
|
||||
for (j=0;j<st->magic_samples[i];j++)
|
||||
st->mem[i*st->mem_alloc_size+j] = 0;
|
||||
st->magic_samples[i] = 0;
|
||||
}
|
||||
if (st->filt_len > olen)
|
||||
@ -765,18 +763,17 @@ static int update_filter(SpeexResamplerState *st)
|
||||
/* If the new filter length is still bigger than the "augmented" length */
|
||||
/* Copy data going backward */
|
||||
for (j=0;j<olen-1;j++)
|
||||
st->mem[start+(st->filt_len-2-j)] = st->mem[start+(olen-2-j)];
|
||||
st->mem[i*st->mem_alloc_size+(st->filt_len-2-j)] = st->mem[i*st->mem_alloc_size+(olen-2-j)];
|
||||
/* Then put zeros for lack of anything better */
|
||||
for (;j<st->filt_len-1;j++)
|
||||
st->mem[start+(st->filt_len-2-j)] = 0;
|
||||
st->mem[i*st->mem_alloc_size+(st->filt_len-2-j)] = 0;
|
||||
/* Adjust last_sample */
|
||||
st->last_sample[i] += (st->filt_len - olen)/2;
|
||||
} else {
|
||||
/* Put back some of the magic! */
|
||||
magic_samples = (olen - st->filt_len)/2;
|
||||
for (j=0;j<st->filt_len-1+magic_samples;j++)
|
||||
st->mem[start+j] = st->mem[start+j+magic_samples];
|
||||
st->magic_samples[i] = magic_samples;
|
||||
st->magic_samples[i] = (olen - st->filt_len)/2;
|
||||
for (j=0;j<st->filt_len-1+st->magic_samples[i];j++)
|
||||
st->mem[i*st->mem_alloc_size+j] = st->mem[i*st->mem_alloc_size+j+st->magic_samples[i]];
|
||||
}
|
||||
}
|
||||
} else if (st->filt_len < old_length)
|
||||
@ -814,6 +811,7 @@ EXPORT SpeexResamplerState *speex_resampler_init(spx_uint32_t nb_channels, spx_u
|
||||
|
||||
EXPORT SpeexResamplerState *speex_resampler_init_frac(spx_uint32_t nb_channels, spx_uint32_t ratio_num, spx_uint32_t ratio_den, spx_uint32_t in_rate, spx_uint32_t out_rate, int quality, int *err)
|
||||
{
|
||||
spx_uint32_t i;
|
||||
SpeexResamplerState *st;
|
||||
int filter_err;
|
||||
|
||||
@ -993,7 +991,8 @@ EXPORT int speex_resampler_process_int(SpeexResamplerState *st, spx_uint32_t cha
|
||||
const spx_uint32_t xlen = st->mem_alloc_size - (st->filt_len - 1);
|
||||
#ifdef VAR_ARRAYS
|
||||
const unsigned int ylen = (olen < FIXED_STACK_ALLOC) ? olen : FIXED_STACK_ALLOC;
|
||||
spx_word16_t ystack[ylen];
|
||||
VARDECL(spx_word16_t *ystack);
|
||||
ALLOC(ystack, ylen, spx_word16_t);
|
||||
#else
|
||||
const unsigned int ylen = FIXED_STACK_ALLOC;
|
||||
spx_word16_t ystack[FIXED_STACK_ALLOC];
|
||||
@ -1108,7 +1107,7 @@ EXPORT void speex_resampler_get_rate(SpeexResamplerState *st, spx_uint32_t *in_r
|
||||
*out_rate = st->out_rate;
|
||||
}
|
||||
|
||||
static inline spx_uint32_t compute_gcd(spx_uint32_t a, spx_uint32_t b)
|
||||
static inline spx_uint32_t _gcd(spx_uint32_t a, spx_uint32_t b)
|
||||
{
|
||||
while (b != 0)
|
||||
{
|
||||
@ -1138,7 +1137,7 @@ EXPORT int speex_resampler_set_rate_frac(SpeexResamplerState *st, spx_uint32_t r
|
||||
st->num_rate = ratio_num;
|
||||
st->den_rate = ratio_den;
|
||||
|
||||
fact = compute_gcd(st->num_rate, st->den_rate);
|
||||
fact = _gcd (st->num_rate, st->den_rate);
|
||||
|
||||
st->num_rate /= fact;
|
||||
st->den_rate /= fact;
|
||||
@ -1147,8 +1146,8 @@ EXPORT int speex_resampler_set_rate_frac(SpeexResamplerState *st, spx_uint32_t r
|
||||
{
|
||||
for (i=0;i<st->nb_channels;i++)
|
||||
{
|
||||
if (multiply_frac(&st->samp_frac_num[i],st->samp_frac_num[i],st->den_rate,old_den) != RESAMPLER_ERR_SUCCESS) {
|
||||
st->samp_frac_num[i] = st->den_rate-1;
|
||||
if (_muldiv(&st->samp_frac_num[i],st->samp_frac_num[i],st->den_rate,old_den) != RESAMPLER_ERR_SUCCESS) {
|
||||
st->samp_frac_num[i] = st->den_rate-1;
|
||||
}
|
||||
/* Safety net */
|
||||
if (st->samp_frac_num[i] >= st->den_rate)
|
||||
|
@ -36,29 +36,17 @@
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include "simd_detect.h"
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#ifdef FIXED_POINT
|
||||
#if defined(__aarch64__)
|
||||
static inline int32_t saturate_32bit_to_16bit(int32_t a) {
|
||||
int32_t ret;
|
||||
asm ("fmov s0, %w[a]\n"
|
||||
"sqxtn h0, s0\n"
|
||||
"sxtl v0.4s, v0.4h\n"
|
||||
"fmov %w[ret], s0\n"
|
||||
: [ret] "=r" (ret)
|
||||
: [a] "r" (a)
|
||||
: "v0" );
|
||||
return ret;
|
||||
}
|
||||
#elif defined(__thumb2__)
|
||||
#ifdef __thumb2__
|
||||
static inline int32_t saturate_32bit_to_16bit(int32_t a) {
|
||||
int32_t ret;
|
||||
asm ("ssat %[ret], #16, %[a]"
|
||||
: [ret] "=r" (ret)
|
||||
: [a] "r" (a)
|
||||
: );
|
||||
: [ret] "=&r" (ret)
|
||||
: [a] "r" (a));
|
||||
return ret;
|
||||
}
|
||||
#else
|
||||
@ -67,7 +55,7 @@ static inline int32_t saturate_32bit_to_16bit(int32_t a) {
|
||||
asm ("vmov.s32 d0[0], %[a]\n"
|
||||
"vqmovn.s32 d0, q0\n"
|
||||
"vmov.s16 %[ret], d0[0]\n"
|
||||
: [ret] "=r" (ret)
|
||||
: [ret] "=&r" (ret)
|
||||
: [a] "r" (a)
|
||||
: "q0");
|
||||
return ret;
|
||||
@ -77,64 +65,8 @@ static inline int32_t saturate_32bit_to_16bit(int32_t a) {
|
||||
#define WORD2INT(x) (saturate_32bit_to_16bit(x))
|
||||
|
||||
#define OVERRIDE_INNER_PRODUCT_SINGLE
|
||||
/* Only works when len % 4 == 0 and len >= 4 */
|
||||
#if defined(__aarch64__)
|
||||
inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
|
||||
{
|
||||
int32_t ret;
|
||||
uint32_t remainder = len % 16;
|
||||
len = len - remainder;
|
||||
|
||||
asm volatile (" cmp %w[len], #0\n"
|
||||
" b.ne 1f\n"
|
||||
" ld1 {v16.4h}, [%[b]], #8\n"
|
||||
" ld1 {v20.4h}, [%[a]], #8\n"
|
||||
" subs %w[remainder], %w[remainder], #4\n"
|
||||
" smull v0.4s, v16.4h, v20.4h\n"
|
||||
" b.ne 4f\n"
|
||||
" b 5f\n"
|
||||
"1:"
|
||||
" ld1 {v16.4h, v17.4h, v18.4h, v19.4h}, [%[b]], #32\n"
|
||||
" ld1 {v20.4h, v21.4h, v22.4h, v23.4h}, [%[a]], #32\n"
|
||||
" subs %w[len], %w[len], #16\n"
|
||||
" smull v0.4s, v16.4h, v20.4h\n"
|
||||
" smlal v0.4s, v17.4h, v21.4h\n"
|
||||
" smlal v0.4s, v18.4h, v22.4h\n"
|
||||
" smlal v0.4s, v19.4h, v23.4h\n"
|
||||
" b.eq 3f\n"
|
||||
"2:"
|
||||
" ld1 {v16.4h, v17.4h, v18.4h, v19.4h}, [%[b]], #32\n"
|
||||
" ld1 {v20.4h, v21.4h, v22.4h, v23.4h}, [%[a]], #32\n"
|
||||
" subs %w[len], %w[len], #16\n"
|
||||
" smlal v0.4s, v16.4h, v20.4h\n"
|
||||
" smlal v0.4s, v17.4h, v21.4h\n"
|
||||
" smlal v0.4s, v18.4h, v22.4h\n"
|
||||
" smlal v0.4s, v19.4h, v23.4h\n"
|
||||
" b.ne 2b\n"
|
||||
"3:"
|
||||
" cmp %w[remainder], #0\n"
|
||||
" b.eq 5f\n"
|
||||
"4:"
|
||||
" ld1 {v18.4h}, [%[b]], #8\n"
|
||||
" ld1 {v22.4h}, [%[a]], #8\n"
|
||||
" subs %w[remainder], %w[remainder], #4\n"
|
||||
" smlal v0.4s, v18.4h, v22.4h\n"
|
||||
" b.ne 4b\n"
|
||||
"5:"
|
||||
" saddlv d0, v0.4s\n"
|
||||
" sqxtn s0, d0\n"
|
||||
" sqrshrn h0, s0, #15\n"
|
||||
" sxtl v0.4s, v0.4h\n"
|
||||
" fmov %w[ret], s0\n"
|
||||
: [ret] "=r" (ret), [a] "+r" (a), [b] "+r" (b),
|
||||
[len] "+r" (len), [remainder] "+r" (remainder)
|
||||
:
|
||||
: "cc", "v0",
|
||||
"v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23");
|
||||
return ret;
|
||||
}
|
||||
#else
|
||||
inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
|
||||
/* Only works when len % 4 == 0 */
|
||||
int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
|
||||
{
|
||||
int32_t ret;
|
||||
uint32_t remainder = len % 16;
|
||||
@ -181,105 +113,34 @@ inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned
|
||||
" vqmovn.s64 d0, q0\n"
|
||||
" vqrshrn.s32 d0, q0, #15\n"
|
||||
" vmov.s16 %[ret], d0[0]\n"
|
||||
: [ret] "=r" (ret), [a] "+r" (a), [b] "+r" (b),
|
||||
: [ret] "=&r" (ret), [a] "+r" (a), [b] "+r" (b),
|
||||
[len] "+r" (len), [remainder] "+r" (remainder)
|
||||
:
|
||||
: "cc", "q0",
|
||||
"d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23");
|
||||
"d16", "d17", "d18", "d19",
|
||||
"d20", "d21", "d22", "d23");
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif // !defined(__aarch64__)
|
||||
|
||||
#elif defined(FLOATING_POINT)
|
||||
#if defined(__aarch64__)
|
||||
static inline int32_t saturate_float_to_16bit(float a) {
|
||||
int32_t ret;
|
||||
asm ("fcvtas s1, %s[a]\n"
|
||||
"sqxtn h1, s1\n"
|
||||
"sxtl v1.4s, v1.4h\n"
|
||||
"fmov %w[ret], s1\n"
|
||||
: [ret] "=r" (ret)
|
||||
: [a] "w" (a)
|
||||
: "v1");
|
||||
return ret;
|
||||
}
|
||||
#else
|
||||
|
||||
static inline int32_t saturate_float_to_16bit(float a) {
|
||||
int32_t ret;
|
||||
asm ("vmov.f32 d0[0], %[a]\n"
|
||||
"vcvt.s32.f32 d0, d0, #15\n"
|
||||
"vqrshrn.s32 d0, q0, #15\n"
|
||||
"vmov.s16 %[ret], d0[0]\n"
|
||||
: [ret] "=r" (ret)
|
||||
: [ret] "=&r" (ret)
|
||||
: [a] "r" (a)
|
||||
: "q0");
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
#undef WORD2INT
|
||||
#define WORD2INT(x) (saturate_float_to_16bit(x))
|
||||
|
||||
#define OVERRIDE_INNER_PRODUCT_SINGLE
|
||||
/* Only works when len % 4 == 0 and len >= 4 */
|
||||
#if defined(__aarch64__)
|
||||
inline float inner_product_single(const float *a, const float *b, unsigned int len)
|
||||
{
|
||||
float ret;
|
||||
uint32_t remainder = len % 16;
|
||||
len = len - remainder;
|
||||
|
||||
asm volatile (" cmp %w[len], #0\n"
|
||||
" b.ne 1f\n"
|
||||
" ld1 {v16.4s}, [%[b]], #16\n"
|
||||
" ld1 {v20.4s}, [%[a]], #16\n"
|
||||
" subs %w[remainder], %w[remainder], #4\n"
|
||||
" fmul v1.4s, v16.4s, v20.4s\n"
|
||||
" b.ne 4f\n"
|
||||
" b 5f\n"
|
||||
"1:"
|
||||
" ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [%[b]], #64\n"
|
||||
" ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [%[a]], #64\n"
|
||||
" subs %w[len], %w[len], #16\n"
|
||||
" fmul v1.4s, v16.4s, v20.4s\n"
|
||||
" fmul v2.4s, v17.4s, v21.4s\n"
|
||||
" fmul v3.4s, v18.4s, v22.4s\n"
|
||||
" fmul v4.4s, v19.4s, v23.4s\n"
|
||||
" b.eq 3f\n"
|
||||
"2:"
|
||||
" ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [%[b]], #64\n"
|
||||
" ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [%[a]], #64\n"
|
||||
" subs %w[len], %w[len], #16\n"
|
||||
" fmla v1.4s, v16.4s, v20.4s\n"
|
||||
" fmla v2.4s, v17.4s, v21.4s\n"
|
||||
" fmla v3.4s, v18.4s, v22.4s\n"
|
||||
" fmla v4.4s, v19.4s, v23.4s\n"
|
||||
" b.ne 2b\n"
|
||||
"3:"
|
||||
" fadd v16.4s, v1.4s, v2.4s\n"
|
||||
" fadd v17.4s, v3.4s, v4.4s\n"
|
||||
" cmp %w[remainder], #0\n"
|
||||
" fadd v1.4s, v16.4s, v17.4s\n"
|
||||
" b.eq 5f\n"
|
||||
"4:"
|
||||
" ld1 {v18.4s}, [%[b]], #16\n"
|
||||
" ld1 {v22.4s}, [%[a]], #16\n"
|
||||
" subs %w[remainder], %w[remainder], #4\n"
|
||||
" fmla v1.4s, v18.4s, v22.4s\n"
|
||||
" b.ne 4b\n"
|
||||
"5:"
|
||||
" faddp v1.4s, v1.4s, v1.4s\n"
|
||||
" faddp %[ret].4s, v1.4s, v1.4s\n"
|
||||
: [ret] "=w" (ret), [a] "+r" (a), [b] "+r" (b),
|
||||
[len] "+r" (len), [remainder] "+r" (remainder)
|
||||
:
|
||||
: "cc", "v1", "v2", "v3", "v4",
|
||||
"v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23");
|
||||
return ret;
|
||||
}
|
||||
#else
|
||||
inline float inner_product_single(const float *a, const float *b, unsigned int len)
|
||||
/* Only works when len % 4 == 0 */
|
||||
float inner_product_single(const float *a, const float *b, unsigned int len)
|
||||
{
|
||||
float ret;
|
||||
uint32_t remainder = len % 16;
|
||||
@ -331,12 +192,11 @@ inline float inner_product_single(const float *a, const float *b, unsigned int l
|
||||
" vadd.f32 d0, d0, d1\n"
|
||||
" vpadd.f32 d0, d0, d0\n"
|
||||
" vmov.f32 %[ret], d0[0]\n"
|
||||
: [ret] "=r" (ret), [a] "+r" (a), [b] "+r" (b),
|
||||
: [ret] "=&r" (ret), [a] "+r" (a), [b] "+r" (b),
|
||||
[len] "+l" (len), [remainder] "+l" (remainder)
|
||||
:
|
||||
: "cc", "q0", "q1", "q2", "q3",
|
||||
"q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11");
|
||||
: "cc", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8",
|
||||
"q9", "q10", "q11");
|
||||
return ret;
|
||||
}
|
||||
#endif // defined(__aarch64__)
|
||||
#endif
|
||||
|
@ -73,7 +73,7 @@ float interpolate_product_single(const float *a, const float *b, unsigned int le
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef USE_SSE2
|
||||
#ifdef _USE_SSE2
|
||||
#include <emmintrin.h>
|
||||
#define OVERRIDE_INNER_PRODUCT_DOUBLE
|
||||
|
||||
|
115
media/libspeex_resampler/src/stack_alloc.h
Normal file
115
media/libspeex_resampler/src/stack_alloc.h
Normal file
@ -0,0 +1,115 @@
|
||||
/* Copyright (C) 2002 Jean-Marc Valin */
|
||||
/**
|
||||
@file stack_alloc.h
|
||||
@brief Temporary memory allocation on stack
|
||||
*/
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
- Neither the name of the Xiph.org Foundation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef STACK_ALLOC_H
|
||||
#define STACK_ALLOC_H
|
||||
|
||||
#ifdef USE_ALLOCA
|
||||
# ifdef WIN32
|
||||
# include <malloc.h>
|
||||
# else
|
||||
# ifdef HAVE_ALLOCA_H
|
||||
# include <alloca.h>
|
||||
# else
|
||||
# include <stdlib.h>
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @def ALIGN(stack, size)
|
||||
*
|
||||
* Aligns the stack to a 'size' boundary
|
||||
*
|
||||
* @param stack Stack
|
||||
* @param size New size boundary
|
||||
*/
|
||||
|
||||
/**
|
||||
* @def PUSH(stack, size, type)
|
||||
*
|
||||
* Allocates 'size' elements of type 'type' on the stack
|
||||
*
|
||||
* @param stack Stack
|
||||
* @param size Number of elements
|
||||
* @param type Type of element
|
||||
*/
|
||||
|
||||
/**
|
||||
* @def VARDECL(var)
|
||||
*
|
||||
* Declare variable on stack
|
||||
*
|
||||
* @param var Variable to declare
|
||||
*/
|
||||
|
||||
/**
|
||||
* @def ALLOC(var, size, type)
|
||||
*
|
||||
* Allocate 'size' elements of 'type' on stack
|
||||
*
|
||||
* @param var Name of variable to allocate
|
||||
* @param size Number of elements
|
||||
* @param type Type of element
|
||||
*/
|
||||
|
||||
#ifdef ENABLE_VALGRIND
|
||||
|
||||
#include <valgrind/memcheck.h>
|
||||
|
||||
#define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1))
|
||||
|
||||
#define PUSH(stack, size, type) (VALGRIND_MAKE_NOACCESS(stack, 1000),ALIGN((stack),sizeof(type)),VALGRIND_MAKE_WRITABLE(stack, ((size)*sizeof(type))),(stack)+=((size)*sizeof(type)),(type*)((stack)-((size)*sizeof(type))))
|
||||
|
||||
#else
|
||||
|
||||
#define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1))
|
||||
|
||||
#define PUSH(stack, size, type) (ALIGN((stack),sizeof(type)),(stack)+=((size)*sizeof(type)),(type*)((stack)-((size)*sizeof(type))))
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(VAR_ARRAYS)
|
||||
#define VARDECL(var)
|
||||
#define ALLOC(var, size, type) type var[size]
|
||||
#elif defined(USE_ALLOCA)
|
||||
#define VARDECL(var) var
|
||||
#define ALLOC(var, size, type) var = alloca(sizeof(type)*(size))
|
||||
#else
|
||||
#define VARDECL(var) var
|
||||
#define ALLOC(var, size, type) var = PUSH(stack, size, type)
|
||||
#endif
|
||||
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user