mirror of
https://github.com/xenia-project/FFmpeg.git
synced 2024-11-27 21:40:34 +00:00
x86/rv34dsp: add ff_rv34_idct_dc_add_sse2
Also disable ff_rv34_idct_dc_add_mmx on x86_64 as the presence of sse2 is guaranteed in such builds. Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
parent
ab5c4d006d
commit
c8467abbad
@ -64,6 +64,7 @@ rv34_idct dc
|
||||
rv34_idct dc_noround
|
||||
|
||||
; ff_rv34_idct_dc_add_mmx(uint8_t *dst, int stride, int dc);
|
||||
%if ARCH_X86_32
|
||||
INIT_MMX mmx
|
||||
cglobal rv34_idct_dc_add, 3, 3
|
||||
; calculate DC
|
||||
@ -97,6 +98,7 @@ cglobal rv34_idct_dc_add, 3, 3
|
||||
movh [r2], m4
|
||||
movh [r2+r1], m5
|
||||
RET
|
||||
%endif
|
||||
|
||||
; Load coeffs and perform row transform
|
||||
; Output: coeffs in mm[0467], rounder in mm5
|
||||
@ -167,7 +169,7 @@ cglobal rv34_idct_add, 3,3,0, d, s, b
|
||||
ret
|
||||
|
||||
; ff_rv34_idct_dc_add_sse4(uint8_t *dst, int stride, int dc);
|
||||
INIT_XMM sse4
|
||||
%macro RV34_IDCT_DC_ADD 0
|
||||
cglobal rv34_idct_dc_add, 3, 3, 6
|
||||
; load data
|
||||
IDCT_DC_ROUND r2
|
||||
@ -190,7 +192,22 @@ cglobal rv34_idct_dc_add, 3, 3, 6
|
||||
paddw m4, m0
|
||||
packuswb m2, m4
|
||||
movd [r0], m2
|
||||
%if cpuflag(sse4)
|
||||
pextrd [r0+r1], m2, 1
|
||||
pextrd [r2], m2, 2
|
||||
pextrd [r2+r1], m2, 3
|
||||
%else
|
||||
psrldq m2, 4
|
||||
movd [r0+r1], m2
|
||||
psrldq m2, 4
|
||||
movd [r2], m2
|
||||
psrldq m2, 4
|
||||
movd [r2+r1], m2
|
||||
%endif
|
||||
RET
|
||||
%endmacro
|
||||
|
||||
INIT_XMM sse2
|
||||
RV34_IDCT_DC_ADD
|
||||
INIT_XMM sse4
|
||||
RV34_IDCT_DC_ADD
|
||||
|
@ -27,6 +27,7 @@
|
||||
void ff_rv34_idct_dc_mmxext(int16_t *block);
|
||||
void ff_rv34_idct_dc_noround_mmxext(int16_t *block);
|
||||
void ff_rv34_idct_dc_add_mmx(uint8_t *dst, ptrdiff_t stride, int dc);
|
||||
void ff_rv34_idct_dc_add_sse2(uint8_t *dst, ptrdiff_t stride, int dc);
|
||||
void ff_rv34_idct_dc_add_sse4(uint8_t *dst, ptrdiff_t stride, int dc);
|
||||
void ff_rv34_idct_add_mmxext(uint8_t *dst, ptrdiff_t stride, int16_t *block);
|
||||
|
||||
@ -34,12 +35,14 @@ av_cold void ff_rv34dsp_init_x86(RV34DSPContext* c)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
if (EXTERNAL_MMX(cpu_flags))
|
||||
if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags))
|
||||
c->rv34_idct_dc_add = ff_rv34_idct_dc_add_mmx;
|
||||
if (EXTERNAL_MMXEXT(cpu_flags)) {
|
||||
c->rv34_inv_transform_dc = ff_rv34_idct_dc_noround_mmxext;
|
||||
c->rv34_idct_add = ff_rv34_idct_add_mmxext;
|
||||
}
|
||||
if (EXTERNAL_SSE2(cpu_flags))
|
||||
c->rv34_idct_dc_add = ff_rv34_idct_dc_add_sse2;
|
||||
if (EXTERNAL_SSE4(cpu_flags))
|
||||
c->rv34_idct_dc_add = ff_rv34_idct_dc_add_sse4;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user