mirror of
https://github.com/xenia-project/FFmpeg.git
synced 2024-12-01 07:20:58 +00:00
vp9/x86: iwht4x4 (lossless) mmx.
This commit is contained in:
parent
d43efa68bd
commit
97474d527f
@ -173,6 +173,7 @@ itxfm_funcs(16, ssse3);
|
||||
itxfm_funcs(16, avx);
|
||||
itxfm_func(idct, idct, 32, ssse3);
|
||||
itxfm_func(idct, idct, 32, avx);
|
||||
itxfm_func(iwht, iwht, 4, mmx);
|
||||
|
||||
#undef itxfm_func
|
||||
#undef itxfm_funcs
|
||||
@ -223,6 +224,10 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
|
||||
if (EXTERNAL_MMX(cpu_flags)) {
|
||||
init_fpel(4, 0, 4, put, mmx);
|
||||
init_fpel(3, 0, 8, put, mmx);
|
||||
dsp->itxfm_add[4 /* lossless */][DCT_DCT] =
|
||||
dsp->itxfm_add[4 /* lossless */][ADST_DCT] =
|
||||
dsp->itxfm_add[4 /* lossless */][DCT_ADST] =
|
||||
dsp->itxfm_add[4 /* lossless */][ADST_ADST] = ff_vp9_iwht_iwht_4x4_add_mmx;
|
||||
}
|
||||
|
||||
if (EXTERNAL_MMXEXT(cpu_flags)) {
|
||||
|
@ -151,6 +151,47 @@ SECTION .text
|
||||
%endrep
|
||||
%endmacro
|
||||
|
||||
;-------------------------------------------------------------------------------------------
|
||||
; void vp9_iwht_iwht_4x4_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
|
||||
;-------------------------------------------------------------------------------------------
|
||||
|
||||
%macro VP9_IWHT4_1D 0
|
||||
SWAP 1, 2, 3
|
||||
paddw m0, m2
|
||||
psubw m3, m1
|
||||
psubw m4, m0, m3
|
||||
psraw m4, 1
|
||||
psubw m5, m4, m1
|
||||
SWAP 5, 1
|
||||
psubw m4, m2
|
||||
SWAP 4, 2
|
||||
psubw m0, m1
|
||||
paddw m3, m2
|
||||
SWAP 3, 2, 1
|
||||
%endmacro
|
||||
|
||||
INIT_MMX mmx
|
||||
cglobal vp9_iwht_iwht_4x4_add, 3, 3, 0, dst, stride, block, eob
|
||||
mova m0, [blockq+0*8]
|
||||
mova m1, [blockq+1*8]
|
||||
mova m2, [blockq+2*8]
|
||||
mova m3, [blockq+3*8]
|
||||
psraw m0, 2
|
||||
psraw m1, 2
|
||||
psraw m2, 2
|
||||
psraw m3, 2
|
||||
|
||||
VP9_IWHT4_1D
|
||||
TRANSPOSE4x4W 0, 1, 2, 3, 4
|
||||
VP9_IWHT4_1D
|
||||
|
||||
pxor m4, m4
|
||||
VP9_STORE_2X 0, 1, 5, 6, 4
|
||||
lea dstq, [dstq+strideq*2]
|
||||
VP9_STORE_2X 2, 3, 5, 6, 4
|
||||
ZERO_BLOCK blockq, 8, 4, m4
|
||||
RET
|
||||
|
||||
;-------------------------------------------------------------------------------------------
|
||||
; void vp9_idct_idct_4x4_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
|
||||
;-------------------------------------------------------------------------------------------
|
||||
|
Loading…
Reference in New Issue
Block a user