diff --git a/libswresample/x86/audio_convert.asm b/libswresample/x86/audio_convert.asm index b6e9e5d79d..d77e93439b 100644 --- a/libswresample/x86/audio_convert.asm +++ b/libswresample/x86/audio_convert.asm @@ -245,15 +245,27 @@ pack_6ch_%2_to_%1_u_int %+ SUFFIX mov%3 m4, [srcq+src4q] mov%3 m5, [srcq+src5q] %7 x,x,x,x,m7,x -%if cpuflag(sse4) +%if cpuflag(sse) SBUTTERFLYPS 0, 1, 6 SBUTTERFLYPS 2, 3, 6 SBUTTERFLYPS 4, 5, 6 +%if cpuflag(avx) blendps m6, m4, m0, 1100b +%else + movaps m6, m4 + shufps m4, m0, q3210 + SWAP 4,6 +%endif movlhps m0, m2 movhlps m4, m2 +%if cpuflag(avx) blendps m2, m5, m1, 1100b +%else + movaps m2, m5 + shufps m5, m1, q3210 + SWAP 2,5 +%endif movlhps m1, m3 movhlps m5, m3 @@ -380,6 +392,10 @@ CONV int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N PACK_6CH float, float, u, 2, 2, NOP_N, NOP_N PACK_6CH float, float, a, 2, 2, NOP_N, NOP_N +INIT_XMM sse +PACK_6CH float, float, u, 2, 2, NOP_N, NOP_N +PACK_6CH float, float, a, 2, 2, NOP_N, NOP_N + INIT_XMM sse2 CONV int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N CONV int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N @@ -431,6 +447,10 @@ UNPACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT UNPACK_2CH int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT UNPACK_2CH int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT +PACK_6CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT +PACK_6CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT +PACK_6CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT +PACK_6CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT INIT_XMM ssse3 UNPACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N @@ -440,15 +460,6 @@ UNPACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N UNPACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT UNPACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT -INIT_XMM sse4 -PACK_6CH float, float, u, 2, 2, NOP_N, NOP_N -PACK_6CH float, float, a, 2, 2, NOP_N, NOP_N - -PACK_6CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT -PACK_6CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT -PACK_6CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT -PACK_6CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT - %if HAVE_AVX_EXTERNAL INIT_XMM avx PACK_6CH float, float, u, 2, 2, NOP_N, NOP_N diff --git a/libswresample/x86/audio_convert_init.c b/libswresample/x86/audio_convert_init.c index a26cdf6ea6..769575d0fc 100644 --- a/libswresample/x86/audio_convert_init.c +++ b/libswresample/x86/audio_convert_init.c @@ -58,7 +58,12 @@ MULTI_CAPS_FUNC(SSE2, sse2) ac->simd_f = ff_pack_6ch_float_to_float_a_mmx; } } - + if(EXTERNAL_SSE(mm_flags)) { + if(channels == 6) { + if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P) + ac->simd_f = ff_pack_6ch_float_to_float_a_sse; + } + } if(EXTERNAL_SSE2(mm_flags)) { if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32P) ac->simd_f = ff_int32_to_float_a_sse2; @@ -105,6 +110,12 @@ MULTI_CAPS_FUNC(SSE2, sse2) if( out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_FLT) ac->simd_f = ff_unpack_2ch_float_to_int16_a_sse2; } + if(channels == 6) { + if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S32P) + ac->simd_f = ff_pack_6ch_int32_to_float_a_sse2; + if( out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_FLTP) + ac->simd_f = ff_pack_6ch_float_to_int32_a_sse2; + } } if(EXTERNAL_SSSE3(mm_flags)) { if(channels == 2) { @@ -116,16 +127,6 @@ MULTI_CAPS_FUNC(SSE2, sse2) ac->simd_f = ff_unpack_2ch_int16_to_float_a_ssse3; } } - if(EXTERNAL_SSE4(mm_flags)) { - if(channels == 6) { - if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P) - ac->simd_f = ff_pack_6ch_float_to_float_a_sse4; - if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S32P) - ac->simd_f = ff_pack_6ch_int32_to_float_a_sse4; - if( out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_FLTP) - ac->simd_f = ff_pack_6ch_float_to_int32_a_sse4; - } - } if(EXTERNAL_AVX(mm_flags)) { if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32P) ac->simd_f = ff_int32_to_float_a_avx;