lavr: Add x86-optimized function for flt to s16 conversion

This commit is contained in:
Justin Ruggles 2012-04-23 22:22:28 -04:00
parent 97ce1ba867
commit 6c63cbfe7a
2 changed files with 37 additions and 0 deletions

View File

@ -28,6 +28,7 @@ SECTION_RODATA 32
pf_s32_inv_scale: times 8 dd 0x30000000
pf_s16_inv_scale: times 4 dd 0x38000000
pf_s16_scale: times 4 dd 0x47000000
SECTION_TEXT
@ -158,6 +159,38 @@ INIT_YMM avx
CONV_S32_TO_FLT
%endif
;------------------------------------------------------------------------------
; void ff_conv_flt_to_s16(int16_t *dst, const float *src, int len);
;------------------------------------------------------------------------------
INIT_XMM sse2
cglobal conv_flt_to_s16, 3,3,5, dst, src, len
lea lenq, [2*lend]
lea srcq, [srcq+2*lenq]
add dstq, lenq
neg lenq
mova m4, [pf_s16_scale]
.loop:
mova m0, [srcq+2*lenq ]
mova m1, [srcq+2*lenq+1*mmsize]
mova m2, [srcq+2*lenq+2*mmsize]
mova m3, [srcq+2*lenq+3*mmsize]
mulps m0, m4
mulps m1, m4
mulps m2, m4
mulps m3, m4
cvtps2dq m0, m0
cvtps2dq m1, m1
cvtps2dq m2, m2
cvtps2dq m3, m3
packssdw m0, m1
packssdw m2, m3
mova [dstq+lenq ], m0
mova [dstq+lenq+mmsize], m2
add lenq, mmsize*2
jl .loop
REP_RET
;-----------------------------------------------------------------------------
; void ff_conv_fltp_to_flt_6ch(float *dst, float *const *src, int len,
; int channels);

View File

@ -33,6 +33,8 @@ extern void ff_conv_s32_to_s16_sse2(int16_t *dst, const int32_t *src, int len);
extern void ff_conv_s32_to_flt_sse2(float *dst, const int32_t *src, int len);
extern void ff_conv_s32_to_flt_avx (float *dst, const int32_t *src, int len);
extern void ff_conv_flt_to_s16_sse2(int16_t *dst, const float *src, int len);
extern void ff_conv_fltp_to_flt_6ch_mmx (float *dst, float *const *src, int len);
extern void ff_conv_fltp_to_flt_6ch_sse4(float *dst, float *const *src, int len);
extern void ff_conv_fltp_to_flt_6ch_avx (float *dst, float *const *src, int len);
@ -67,6 +69,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
0, 16, 8, "SSE2", ff_conv_s16_to_flt_sse2);
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32,
0, 16, 8, "SSE2", ff_conv_s32_to_flt_sse2);
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLT,
0, 16, 16, "SSE2", ff_conv_flt_to_s16_sse2);
}
if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) {
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16,