x86/imdct36: use sse3 instructions in the last BUTTERF step when possible

Signed-off-by: James Almer <jamrial@gmail.com>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
James Almer 2014-02-27 14:40:28 -03:00 committed by Michael Niedermayer
parent fbf98375e4
commit 2163a40a46

View File

@ -50,7 +50,7 @@ ps_cosh_sse3: dd 1.0, -0.50190991877167369479, 1.0, -5.73685662283492756461
dd 1.0, -0.51763809020504152469, 1.0, -1.93185165257813657349 dd 1.0, -0.51763809020504152469, 1.0, -1.93185165257813657349
dd 1.0, -0.55168895948124587824, -1.0, 1.18310079157624925896 dd 1.0, -0.55168895948124587824, -1.0, 1.18310079157624925896
dd 1.0, -0.61038729438072803416, -1.0, 0.87172339781054900991 dd 1.0, -0.61038729438072803416, -1.0, 0.87172339781054900991
dd 1.0, 0.70710678118654752439, 0.0, 0.0 dd 1.0, -0.70710678118654752439, 0.0, 0.0
costabs: times 4 dd 0.98480773 costabs: times 4 dd 0.98480773
times 4 dd 0.93969262 times 4 dd 0.93969262
@ -129,6 +129,19 @@ SECTION_TEXT
%endif %endif
%endmacro %endmacro
%macro BUTTERF2 3
%if cpuflag(sse3)
mulps %1, %1, [ps_cosh_sse3 + %3]
PSHUFD %2, %1, 0xe1
addsubps %1, %1, %2
%else
mulps %1, [ps_cosh + %3]
PSHUFD %2, %1, 0xe1
xorps %1, [ps_p1m1p1m1]
addps %1, %2
%endif
%endmacro
%macro STORE 4 %macro STORE 4
movhlps %2, %1 movhlps %2, %1
movss [%3 ], %1 movss [%3 ], %1
@ -279,11 +292,7 @@ cglobal imdct36_float, 4,4,9, out, buf, in, win
BUTTERF m7, m2, 16 BUTTERF m7, m2, 16
BUTTERF m3, m6, 32 BUTTERF m3, m6, 32
BUTTERF m4, m1, 48 BUTTERF m4, m1, 48
BUTTERF2 m5, m1, 64
mulps m5, m5, [ps_cosh + 64]
PSHUFD m1, m5, 0xe1
xorps m5, m5, [ps_p1m1p1m1]
addps m5, m5, m1
; permutates: ; permutates:
; m0 0 1 2 3 => 2 6 10 14 m1 ; m0 0 1 2 3 => 2 6 10 14 m1