x86: Refactor PSWAPD fallback implementations and port to cpuflags

This commit is contained in:
Diego Biurrun 2012-08-02 00:55:34 +02:00
parent 9a07c1332c
commit 0a7a94f2e5
3 changed files with 16 additions and 29 deletions

View File

@ -105,7 +105,8 @@ SECTION_TEXT
pfadd %5, %4 ; {t6,t5}
pxor %3, [ps_m1p1] ; {t8,t7}
mova %6, %1
PSWAPD %3, %3
movd [r0+12], %3
punpckhdq %3, [r0+8]
pfadd %1, %5 ; {r0,i0}
pfsub %6, %5 ; {r2,i2}
mova %4, %2
@ -498,19 +499,6 @@ fft8 %+ SUFFIX:
%endmacro
%if ARCH_X86_32
%macro PSWAPD 2
%if cpuflag(3dnowext)
pswapd %1, %2
%elifidn %1, %2
movd [r0+12], %1
punpckhdq %1, [r0+8]
%else
movq %1, %2
psrlq %1, 32
punpckldq %1, %2
%endif
%endmacro
INIT_MMX 3dnowext
FFT48_3DNOW

View File

@ -246,16 +246,6 @@ FLOAT_TO_INT16_INTERLEAVE2
INIT_XMM sse2
FLOAT_TO_INT16_INTERLEAVE2
%macro PSWAPD_SSE 2
pshufw %1, %2, 0x4e
%endmacro
%macro PSWAPD_3DNOW 2
movq %1, %2
psrlq %1, 32
punpckldq %1, %2
%endmacro
%macro FLOAT_TO_INT16_INTERLEAVE6 0
; void float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len)
cglobal float_to_int16_interleave6, 2, 8, 0, dst, src, src1, src2, src3, src4, src5, len
@ -285,11 +275,11 @@ cglobal float_to_int16_interleave6, 2, 8, 0, dst, src, src1, src2, src3, src4, s
packssdw mm0, mm3
packssdw mm1, mm4
packssdw mm2, mm5
pswapd mm3, mm0
PSWAPD mm3, mm0
punpcklwd mm0, mm1
punpckhwd mm1, mm2
punpcklwd mm2, mm3
pswapd mm3, mm0
PSWAPD mm3, mm0
punpckldq mm0, mm2
punpckhdq mm2, mm1
punpckldq mm1, mm3
@ -305,12 +295,9 @@ cglobal float_to_int16_interleave6, 2, 8, 0, dst, src, src1, src2, src3, src4, s
%endmacro ; FLOAT_TO_INT16_INTERLEAVE6
INIT_MMX sse
%define pswapd PSWAPD_SSE
FLOAT_TO_INT16_INTERLEAVE6
INIT_MMX 3dnow
%define pswapd PSWAPD_3DNOW
FLOAT_TO_INT16_INTERLEAVE6
%undef pswapd
INIT_MMX 3dnowext
FLOAT_TO_INT16_INTERLEAVE6

View File

@ -319,6 +319,18 @@
%endif
%endmacro
%macro PSWAPD 2
%if cpuflag(mmxext)
pshufw %1, %2, q1032
%elif cpuflag(3dnowext)
pswapd %1, %2
%elif cpuflag(3dnow)
movq %1, %2
psrlq %1, 32
punpckldq %1, %2
%endif
%endmacro
%macro DEINTB 5 ; mask, reg1, mask, reg2, optional src to fill masks from
%ifnum %5
pand m%3, m%5, m%4 ; src .. y6 .. y4