mirror of
https://github.com/xenia-project/FFmpeg.git
synced 2024-11-24 03:59:43 +00:00
Merge commit '307eb1a8ee363db1fcf869e427a8deb6d9538881'
* commit '307eb1a8ee363db1fcf869e427a8deb6d9538881': x86: vp8dsp: port FILTER_BILINEAR macro to cpuflags Merged-by: James Almer <jamrial@gmail.com>
This commit is contained in:
commit
53eea3a569
@ -664,6 +664,37 @@ INIT_XMM sse2
|
||||
FILTER_V 8
|
||||
|
||||
%macro FILTER_BILINEAR 1
|
||||
%if cpuflag(ssse3)
|
||||
cglobal put_vp8_bilinear%1_v, 7, 7, 5, dst, dststride, src, srcstride, height, picreg, my
|
||||
shl myd, 4
|
||||
%ifdef PIC
|
||||
lea picregq, [bilinear_filter_vb_m]
|
||||
%endif
|
||||
pxor m4, m4
|
||||
mova m3, [bilinear_filter_vb+myq-16]
|
||||
.nextrow:
|
||||
movh m0, [srcq+srcstrideq*0]
|
||||
movh m1, [srcq+srcstrideq*1]
|
||||
movh m2, [srcq+srcstrideq*2]
|
||||
punpcklbw m0, m1
|
||||
punpcklbw m1, m2
|
||||
pmaddubsw m0, m3
|
||||
pmaddubsw m1, m3
|
||||
psraw m0, 2
|
||||
psraw m1, 2
|
||||
pavgw m0, m4
|
||||
pavgw m1, m4
|
||||
%if mmsize==8
|
||||
packuswb m0, m0
|
||||
packuswb m1, m1
|
||||
movh [dstq+dststrideq*0], m0
|
||||
movh [dstq+dststrideq*1], m1
|
||||
%else
|
||||
packuswb m0, m1
|
||||
movh [dstq+dststrideq*0], m0
|
||||
movhps [dstq+dststrideq*1], m0
|
||||
%endif
|
||||
%else ; cpuflag(ssse3)
|
||||
cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, picreg, my
|
||||
shl myd, 4
|
||||
%ifdef PIC
|
||||
@ -701,6 +732,7 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, p
|
||||
movh [dstq+dststrideq*0], m0
|
||||
movhps [dstq+dststrideq*1], m0
|
||||
%endif
|
||||
%endif ; cpuflag(ssse3)
|
||||
|
||||
lea dstq, [dstq+dststrideq*2]
|
||||
lea srcq, [srcq+srcstrideq*2]
|
||||
@ -708,6 +740,37 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, p
|
||||
jg .nextrow
|
||||
REP_RET
|
||||
|
||||
%if cpuflag(ssse3)
|
||||
cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride, height, mx, picreg
|
||||
shl mxd, 4
|
||||
%ifdef PIC
|
||||
lea picregq, [bilinear_filter_vb_m]
|
||||
%endif
|
||||
pxor m4, m4
|
||||
mova m2, [filter_h2_shuf]
|
||||
mova m3, [bilinear_filter_vb+mxq-16]
|
||||
.nextrow:
|
||||
movu m0, [srcq+srcstrideq*0]
|
||||
movu m1, [srcq+srcstrideq*1]
|
||||
pshufb m0, m2
|
||||
pshufb m1, m2
|
||||
pmaddubsw m0, m3
|
||||
pmaddubsw m1, m3
|
||||
psraw m0, 2
|
||||
psraw m1, 2
|
||||
pavgw m0, m4
|
||||
pavgw m1, m4
|
||||
%if mmsize==8
|
||||
packuswb m0, m0
|
||||
packuswb m1, m1
|
||||
movh [dstq+dststrideq*0], m0
|
||||
movh [dstq+dststrideq*1], m1
|
||||
%else
|
||||
packuswb m0, m1
|
||||
movh [dstq+dststrideq*0], m0
|
||||
movhps [dstq+dststrideq*1], m0
|
||||
%endif
|
||||
%else ; cpuflag(ssse3)
|
||||
cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, height, mx, picreg
|
||||
shl mxd, 4
|
||||
%ifdef PIC
|
||||
@ -746,6 +809,7 @@ cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride
|
||||
movh [dstq+dststrideq*0], m0
|
||||
movhps [dstq+dststrideq*1], m0
|
||||
%endif
|
||||
%endif ; cpuflag(ssse3)
|
||||
|
||||
lea dstq, [dstq+dststrideq*2]
|
||||
lea srcq, [srcq+srcstrideq*2]
|
||||
@ -758,85 +822,10 @@ INIT_MMX mmxext
|
||||
FILTER_BILINEAR 4
|
||||
INIT_XMM sse2
|
||||
FILTER_BILINEAR 8
|
||||
|
||||
%macro FILTER_BILINEAR_SSSE3 1
|
||||
cglobal put_vp8_bilinear%1_v, 7, 7, 5, dst, dststride, src, srcstride, height, picreg, my
|
||||
shl myd, 4
|
||||
%ifdef PIC
|
||||
lea picregq, [bilinear_filter_vb_m]
|
||||
%endif
|
||||
pxor m4, m4
|
||||
mova m3, [bilinear_filter_vb+myq-16]
|
||||
.nextrow:
|
||||
movh m0, [srcq+srcstrideq*0]
|
||||
movh m1, [srcq+srcstrideq*1]
|
||||
movh m2, [srcq+srcstrideq*2]
|
||||
punpcklbw m0, m1
|
||||
punpcklbw m1, m2
|
||||
pmaddubsw m0, m3
|
||||
pmaddubsw m1, m3
|
||||
psraw m0, 2
|
||||
psraw m1, 2
|
||||
pavgw m0, m4
|
||||
pavgw m1, m4
|
||||
%if mmsize==8
|
||||
packuswb m0, m0
|
||||
packuswb m1, m1
|
||||
movh [dstq+dststrideq*0], m0
|
||||
movh [dstq+dststrideq*1], m1
|
||||
%else
|
||||
packuswb m0, m1
|
||||
movh [dstq+dststrideq*0], m0
|
||||
movhps [dstq+dststrideq*1], m0
|
||||
%endif
|
||||
|
||||
lea dstq, [dstq+dststrideq*2]
|
||||
lea srcq, [srcq+srcstrideq*2]
|
||||
sub heightd, 2
|
||||
jg .nextrow
|
||||
REP_RET
|
||||
|
||||
cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride, height, mx, picreg
|
||||
shl mxd, 4
|
||||
%ifdef PIC
|
||||
lea picregq, [bilinear_filter_vb_m]
|
||||
%endif
|
||||
pxor m4, m4
|
||||
mova m2, [filter_h2_shuf]
|
||||
mova m3, [bilinear_filter_vb+mxq-16]
|
||||
.nextrow:
|
||||
movu m0, [srcq+srcstrideq*0]
|
||||
movu m1, [srcq+srcstrideq*1]
|
||||
pshufb m0, m2
|
||||
pshufb m1, m2
|
||||
pmaddubsw m0, m3
|
||||
pmaddubsw m1, m3
|
||||
psraw m0, 2
|
||||
psraw m1, 2
|
||||
pavgw m0, m4
|
||||
pavgw m1, m4
|
||||
%if mmsize==8
|
||||
packuswb m0, m0
|
||||
packuswb m1, m1
|
||||
movh [dstq+dststrideq*0], m0
|
||||
movh [dstq+dststrideq*1], m1
|
||||
%else
|
||||
packuswb m0, m1
|
||||
movh [dstq+dststrideq*0], m0
|
||||
movhps [dstq+dststrideq*1], m0
|
||||
%endif
|
||||
|
||||
lea dstq, [dstq+dststrideq*2]
|
||||
lea srcq, [srcq+srcstrideq*2]
|
||||
sub heightd, 2
|
||||
jg .nextrow
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_MMX ssse3
|
||||
FILTER_BILINEAR_SSSE3 4
|
||||
FILTER_BILINEAR 4
|
||||
INIT_XMM ssse3
|
||||
FILTER_BILINEAR_SSSE3 8
|
||||
FILTER_BILINEAR 8
|
||||
|
||||
INIT_MMX mmx
|
||||
cglobal put_vp8_pixels8, 5, 5, 0, dst, dststride, src, srcstride, height
|
||||
|
Loading…
Reference in New Issue
Block a user