x86: ac3dsp: port to cpuflags

This commit is contained in:
Diego Biurrun 2012-07-15 15:41:30 +02:00
parent 61bc2bc7d4
commit 9ce02e14f0

View File

@ -41,8 +41,8 @@ SECTION .text
; void ff_ac3_exponent_min(uint8_t *exp, int num_reuse_blocks, int nb_coefs) ; void ff_ac3_exponent_min(uint8_t *exp, int num_reuse_blocks, int nb_coefs)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro AC3_EXPONENT_MIN 1 %macro AC3_EXPONENT_MIN 0
cglobal ac3_exponent_min_%1, 3,4,2, exp, reuse_blks, expn, offset cglobal ac3_exponent_min, 3, 4, 2, exp, reuse_blks, expn, offset
shl reuse_blksq, 8 shl reuse_blksq, 8
jz .end jz .end
LOOP_ALIGN LOOP_ALIGN
@ -65,16 +65,17 @@ cglobal ac3_exponent_min_%1, 3,4,2, exp, reuse_blks, expn, offset
%define PMINUB PMINUB_MMX %define PMINUB PMINUB_MMX
%define LOOP_ALIGN %define LOOP_ALIGN
INIT_MMX INIT_MMX mmx
AC3_EXPONENT_MIN mmx AC3_EXPONENT_MIN
%if HAVE_MMXEXT %if HAVE_MMXEXT
%define PMINUB PMINUB_MMXEXT %define PMINUB PMINUB_MMXEXT
%define LOOP_ALIGN ALIGN 16 %define LOOP_ALIGN ALIGN 16
AC3_EXPONENT_MIN mmxext INIT_MMX mmxext
AC3_EXPONENT_MIN
%endif %endif
%if HAVE_SSE2_EXTERNAL %if HAVE_SSE2_EXTERNAL
INIT_XMM INIT_XMM sse2
AC3_EXPONENT_MIN sse2 AC3_EXPONENT_MIN
%endif %endif
%undef PMINUB %undef PMINUB
%undef LOOP_ALIGN %undef LOOP_ALIGN
@ -168,8 +169,8 @@ AC3_MAX_MSB_ABS_INT16 or_abs
; macro used for ff_ac3_lshift_int16() and ff_ac3_rshift_int32() ; macro used for ff_ac3_lshift_int16() and ff_ac3_rshift_int32()
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro AC3_SHIFT 4 ; l/r, 16/32, shift instruction, instruction set %macro AC3_SHIFT 3 ; l/r, 16/32, shift instruction, instruction set
cglobal ac3_%1shift_int%2_%4, 3,3,5, src, len, shift cglobal ac3_%1shift_int%2, 3, 3, 5, src, len, shift
movd m0, shiftd movd m0, shiftd
.loop: .loop:
mova m1, [srcq ] mova m1, [srcq ]
@ -195,19 +196,19 @@ cglobal ac3_%1shift_int%2_%4, 3,3,5, src, len, shift
; void ff_ac3_lshift_int16(int16_t *src, unsigned int len, unsigned int shift) ; void ff_ac3_lshift_int16(int16_t *src, unsigned int len, unsigned int shift)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
INIT_MMX INIT_MMX mmx
AC3_SHIFT l, 16, psllw, mmx AC3_SHIFT l, 16, psllw
INIT_XMM INIT_XMM sse2
AC3_SHIFT l, 16, psllw, sse2 AC3_SHIFT l, 16, psllw
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void ff_ac3_rshift_int32(int32_t *src, unsigned int len, unsigned int shift) ; void ff_ac3_rshift_int32(int32_t *src, unsigned int len, unsigned int shift)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
INIT_MMX INIT_MMX mmx
AC3_SHIFT r, 32, psrad, mmx AC3_SHIFT r, 32, psrad
INIT_XMM INIT_XMM sse2
AC3_SHIFT r, 32, psrad, sse2 AC3_SHIFT r, 32, psrad
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void ff_float_to_fixed24(int32_t *dst, const float *src, unsigned int len) ; void ff_float_to_fixed24(int32_t *dst, const float *src, unsigned int len)
@ -215,8 +216,8 @@ AC3_SHIFT r, 32, psrad, sse2
; The 3DNow! version is not bit-identical because pf2id uses truncation rather ; The 3DNow! version is not bit-identical because pf2id uses truncation rather
; than round-to-nearest. ; than round-to-nearest.
INIT_MMX INIT_MMX 3dnow
cglobal float_to_fixed24_3dnow, 3,3,0, dst, src, len cglobal float_to_fixed24, 3, 3, 0, dst, src, len
movq m0, [pf_1_24] movq m0, [pf_1_24]
.loop: .loop:
movq m1, [srcq ] movq m1, [srcq ]
@ -242,8 +243,8 @@ cglobal float_to_fixed24_3dnow, 3,3,0, dst, src, len
femms femms
RET RET
INIT_XMM INIT_XMM sse
cglobal float_to_fixed24_sse, 3,3,3, dst, src, len cglobal float_to_fixed24, 3, 3, 3, dst, src, len
movaps m0, [pf_1_24] movaps m0, [pf_1_24]
.loop: .loop:
movaps m1, [srcq ] movaps m1, [srcq ]
@ -267,8 +268,8 @@ cglobal float_to_fixed24_sse, 3,3,3, dst, src, len
emms emms
RET RET
INIT_XMM INIT_XMM sse2
cglobal float_to_fixed24_sse2, 3,3,9, dst, src, len cglobal float_to_fixed24, 3, 3, 9, dst, src, len
movaps m0, [pf_1_24] movaps m0, [pf_1_24]
.loop: .loop:
movaps m1, [srcq ] movaps m1, [srcq ]
@ -332,8 +333,8 @@ cglobal float_to_fixed24_sse2, 3,3,9, dst, src, len
paddd %1, %2 paddd %1, %2
%endmacro %endmacro
INIT_XMM INIT_XMM sse2
cglobal ac3_compute_mantissa_size_sse2, 1,2,4, mant_cnt, sum cglobal ac3_compute_mantissa_size, 1, 2, 4, mant_cnt, sum
movdqa m0, [mant_cntq ] movdqa m0, [mant_cntq ]
movdqa m1, [mant_cntq+ 1*16] movdqa m1, [mant_cntq+ 1*16]
paddw m0, [mant_cntq+ 2*16] paddw m0, [mant_cntq+ 2*16]
@ -373,20 +374,20 @@ cglobal ac3_compute_mantissa_size_sse2, 1,2,4, mant_cnt, sum
; void ff_ac3_extract_exponents(uint8_t *exp, int32_t *coef, int nb_coefs) ; void ff_ac3_extract_exponents(uint8_t *exp, int32_t *coef, int nb_coefs)
;------------------------------------------------------------------------------ ;------------------------------------------------------------------------------
%macro PABSD_MMX 2 ; src/dst, tmp %macro PABSD 1-2 ; src/dst, unused
%if cpuflag(ssse3)
pabsd %1, %1
%else ; src/dst, tmp
pxor %2, %2 pxor %2, %2
pcmpgtd %2, %1 pcmpgtd %2, %1
pxor %1, %2 pxor %1, %2
psubd %1, %2 psubd %1, %2
%endmacro %endif
%macro PABSD_SSSE3 1-2 ; src/dst, unused
pabsd %1, %1
%endmacro %endmacro
%if HAVE_AMD3DNOW_EXTERNAL %if HAVE_AMD3DNOW_EXTERNAL
INIT_MMX INIT_MMX 3dnow
cglobal ac3_extract_exponents_3dnow, 3,3,0, exp, coef, len cglobal ac3_extract_exponents, 3, 3, 0, exp, coef, len
add expq, lenq add expq, lenq
lea coefq, [coefq+4*lenq] lea coefq, [coefq+4*lenq]
neg lenq neg lenq
@ -395,8 +396,8 @@ cglobal ac3_extract_exponents_3dnow, 3,3,0, exp, coef, len
.loop: .loop:
movq m0, [coefq+4*lenq ] movq m0, [coefq+4*lenq ]
movq m1, [coefq+4*lenq+8] movq m1, [coefq+4*lenq+8]
PABSD_MMX m0, m2 PABSD m0, m2
PABSD_MMX m1, m2 PABSD m1, m2
pslld m0, 1 pslld m0, 1
por m0, m3 por m0, m3
pi2fd m2, m0 pi2fd m2, m0
@ -420,8 +421,8 @@ cglobal ac3_extract_exponents_3dnow, 3,3,0, exp, coef, len
REP_RET REP_RET
%endif %endif
%macro AC3_EXTRACT_EXPONENTS 1 %macro AC3_EXTRACT_EXPONENTS 0
cglobal ac3_extract_exponents_%1, 3,3,4, exp, coef, len cglobal ac3_extract_exponents, 3, 3, 4, exp, coef, len
add expq, lenq add expq, lenq
lea coefq, [coefq+4*lenq] lea coefq, [coefq+4*lenq]
neg lenq neg lenq
@ -453,11 +454,10 @@ cglobal ac3_extract_exponents_%1, 3,3,4, exp, coef, len
%endmacro %endmacro
%if HAVE_SSE2_EXTERNAL %if HAVE_SSE2_EXTERNAL
INIT_XMM INIT_XMM sse2
%define PABSD PABSD_MMX AC3_EXTRACT_EXPONENTS
AC3_EXTRACT_EXPONENTS sse2 %endif
%if HAVE_SSSE3_EXTERNAL %if HAVE_SSSE3_EXTERNAL
%define PABSD PABSD_SSSE3 INIT_XMM ssse3
AC3_EXTRACT_EXPONENTS ssse3 AC3_EXTRACT_EXPONENTS
%endif
%endif %endif