mirror of
https://github.com/xenia-project/FFmpeg.git
synced 2024-11-24 12:09:55 +00:00
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
Originally committed as revision 23783 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
d6f8476be4
commit
4af8cdfc3f
@ -1299,4 +1299,5 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id){
|
||||
h->pred16x16_add[ HOR_PRED8x8]= pred16x16_horizontal_add_c;
|
||||
|
||||
if (ARCH_ARM) ff_h264_pred_init_arm(h, codec_id);
|
||||
if (ARCH_X86) ff_h264_pred_init_x86(h, codec_id);
|
||||
}
|
||||
|
@ -87,5 +87,6 @@ typedef struct H264PredContext{
|
||||
|
||||
void ff_h264_pred_init(H264PredContext *h, int codec_id);
|
||||
void ff_h264_pred_init_arm(H264PredContext *h, int codec_id);
|
||||
void ff_h264_pred_init_x86(H264PredContext *h, int codec_id);
|
||||
|
||||
#endif /* AVCODEC_H264PRED_H */
|
||||
|
@ -8,6 +8,7 @@ YASM-OBJS-$(CONFIG_FFT) += x86/fft_mmx.o \
|
||||
$(YASM-OBJS-FFT-yes)
|
||||
YASM-OBJS-$(CONFIG_GPL) += x86/h264_deblock_sse2.o \
|
||||
x86/h264_idct_sse2.o \
|
||||
x86/h264_intrapred.o \
|
||||
|
||||
MMX-OBJS-$(CONFIG_CAVS_DECODER) += x86/cavsdsp_mmx.o
|
||||
MMX-OBJS-$(CONFIG_MP1FLOAT_DECODER) += x86/mpegaudiodec_mmx.o
|
||||
|
@ -62,7 +62,7 @@ DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) = 0x0080008000800080ULL;
|
||||
DECLARE_ALIGNED(8, const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL;
|
||||
|
||||
DECLARE_ALIGNED(8, const uint64_t, ff_pb_1 ) = 0x0101010101010101ULL;
|
||||
DECLARE_ALIGNED(8, const uint64_t, ff_pb_3 ) = 0x0303030303030303ULL;
|
||||
DECLARE_ALIGNED(16, const xmm_reg, ff_pb_3 ) = {0x0303030303030303ULL, 0x0303030303030303ULL};
|
||||
DECLARE_ALIGNED(8, const uint64_t, ff_pb_7 ) = 0x0707070707070707ULL;
|
||||
DECLARE_ALIGNED(8, const uint64_t, ff_pb_1F ) = 0x1F1F1F1F1F1F1F1FULL;
|
||||
DECLARE_ALIGNED(8, const uint64_t, ff_pb_3F ) = 0x3F3F3F3F3F3F3F3FULL;
|
||||
|
@ -48,7 +48,7 @@ extern const uint64_t ff_pw_128;
|
||||
extern const uint64_t ff_pw_255;
|
||||
|
||||
extern const uint64_t ff_pb_1;
|
||||
extern const uint64_t ff_pb_3;
|
||||
extern const xmm_reg ff_pb_3;
|
||||
extern const uint64_t ff_pb_7;
|
||||
extern const uint64_t ff_pb_1F;
|
||||
extern const uint64_t ff_pb_3F;
|
||||
|
486
libavcodec/x86/h264_intrapred.asm
Normal file
486
libavcodec/x86/h264_intrapred.asm
Normal file
@ -0,0 +1,486 @@
|
||||
;******************************************************************************
|
||||
;* H.264 intra prediction asm optimizations
|
||||
;* Copyright (c) 2010 Jason Garrett-Glaser
|
||||
;*
|
||||
;* This file is part of FFmpeg.
|
||||
;*
|
||||
;* FFmpeg is free software; you can redistribute it and/or
|
||||
;* modify it under the terms of the GNU Lesser General Public
|
||||
;* License as published by the Free Software Foundation; either
|
||||
;* version 2.1 of the License, or (at your option) any later version.
|
||||
;*
|
||||
;* FFmpeg is distributed in the hope that it will be useful,
|
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;* Lesser General Public License for more details.
|
||||
;*
|
||||
;* You should have received a copy of the GNU Lesser General Public
|
||||
;* License along with FFmpeg; if not, write to the Free Software
|
||||
;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
;******************************************************************************
|
||||
|
||||
%include "x86inc.asm"
|
||||
|
||||
SECTION_RODATA
|
||||
|
||||
tm_shuf: times 8 db 0x03, 0x80
|
||||
|
||||
SECTION .text
|
||||
|
||||
cextern pb_3
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; void pred16x16_vertical(uint8_t *src, int stride)
|
||||
;-----------------------------------------------------------------------------
|
||||
|
||||
cglobal pred16x16_vertical_mmx, 2,3
|
||||
sub r0, r1
|
||||
mov r2, 8
|
||||
movq mm0, [r0+0]
|
||||
movq mm1, [r0+8]
|
||||
.loop:
|
||||
movq [r0+r1*1+0], mm0
|
||||
movq [r0+r1*1+8], mm1
|
||||
movq [r0+r1*2+0], mm0
|
||||
movq [r0+r1*2+8], mm1
|
||||
lea r0, [r0+r1*2]
|
||||
dec r2
|
||||
jg .loop
|
||||
REP_RET
|
||||
|
||||
cglobal pred16x16_vertical_sse, 2,3
|
||||
sub r0, r1
|
||||
mov r2, 4
|
||||
movaps xmm0, [r0]
|
||||
.loop:
|
||||
movaps [r0+r1*1], xmm0
|
||||
movaps [r0+r1*2], xmm0
|
||||
lea r0, [r0+r1*2]
|
||||
movaps [r0+r1*1], xmm0
|
||||
movaps [r0+r1*2], xmm0
|
||||
lea r0, [r0+r1*2]
|
||||
dec r2
|
||||
jg .loop
|
||||
REP_RET
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; void pred16x16_horizontal(uint8_t *src, int stride)
|
||||
;-----------------------------------------------------------------------------
|
||||
|
||||
%macro PRED16x16_H 1
|
||||
cglobal pred16x16_horizontal_%1, 2,3
|
||||
mov r2, 8
|
||||
%ifidn %1, ssse3
|
||||
mova m2, [pb_3]
|
||||
%endif
|
||||
.loop:
|
||||
movd m0, [r0+r1*0-4]
|
||||
movd m1, [r0+r1*1-4]
|
||||
|
||||
%ifidn %1, ssse3
|
||||
pshufb m0, m2
|
||||
pshufb m1, m2
|
||||
%else
|
||||
punpcklbw m0, m0
|
||||
punpcklbw m1, m1
|
||||
%ifidn %1, mmxext
|
||||
pshufw m0, m0, 0xff
|
||||
pshufw m1, m1, 0xff
|
||||
%else
|
||||
punpckhwd m0, m0
|
||||
punpckhwd m1, m1
|
||||
punpckhdq m0, m0
|
||||
punpckhdq m1, m1
|
||||
%endif
|
||||
mova [r0+r1*0+8], m0
|
||||
mova [r0+r1*1+8], m1
|
||||
%endif
|
||||
|
||||
mova [r0+r1*0], m0
|
||||
mova [r0+r1*1], m1
|
||||
lea r0, [r0+r1*2]
|
||||
dec r2
|
||||
jg .loop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_MMX
|
||||
PRED16x16_H mmx
|
||||
PRED16x16_H mmxext
|
||||
INIT_XMM
|
||||
PRED16x16_H ssse3
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; void pred16x16_dc(uint8_t *src, int stride)
|
||||
;-----------------------------------------------------------------------------
|
||||
|
||||
%macro PRED16x16_DC 2
|
||||
cglobal pred16x16_dc_%1, 2,7
|
||||
mov r4, r0
|
||||
sub r0, r1
|
||||
pxor mm0, mm0
|
||||
pxor mm1, mm1
|
||||
psadbw mm0, [r0+0]
|
||||
psadbw mm1, [r0+8]
|
||||
dec r0
|
||||
movzx r5d, byte [r0+r1*1]
|
||||
paddw mm0, mm1
|
||||
movd r6d, mm0
|
||||
lea r0, [r0+r1*2]
|
||||
%rep 7
|
||||
movzx r2d, byte [r0+r1*0]
|
||||
movzx r3d, byte [r0+r1*1]
|
||||
add r5d, r2d
|
||||
add r6d, r3d
|
||||
lea r0, [r0+r1*2]
|
||||
%endrep
|
||||
movzx r2d, byte [r0+r1*0]
|
||||
add r5d, r6d
|
||||
lea r2d, [r2+r5+16]
|
||||
shr r2d, 5
|
||||
%ifidn %1, mmx
|
||||
movd m0, r2d
|
||||
punpcklbw m0, m0
|
||||
punpcklwd m0, m0
|
||||
punpckldq m0, m0
|
||||
%elifidn %1, mmxext
|
||||
movd m0, r2d
|
||||
punpcklbw m0, m0
|
||||
pshufw m0, m0, 0
|
||||
%elifidn %1, sse
|
||||
imul r2d, 0x01010101
|
||||
movd m0, r2d
|
||||
shufps m0, m0, 0
|
||||
%elifidn %1, sse2
|
||||
movd m0, r2d
|
||||
punpcklbw m0, m0
|
||||
pshuflw m0, m0, 0
|
||||
punpcklqdq m0, m0
|
||||
%elifidn %1, ssse3
|
||||
pxor m1, m1
|
||||
movd m0, r2d
|
||||
pshufb m0, m1
|
||||
%endif
|
||||
|
||||
%if mmsize==8
|
||||
mov r3d, 8
|
||||
.loop:
|
||||
%2 [r4+r1*0+0], m0
|
||||
%2 [r4+r1*0+8], m0
|
||||
%2 [r4+r1*1+0], m0
|
||||
%2 [r4+r1*1+8], m0
|
||||
%else
|
||||
mov r3d, 4
|
||||
.loop:
|
||||
%2 [r4+r1*0], m0
|
||||
%2 [r4+r1*1], m0
|
||||
lea r4, [r4+r1*2]
|
||||
%2 [r4+r1*0], m0
|
||||
%2 [r4+r1*1], m0
|
||||
%endif
|
||||
lea r4, [r4+r1*2]
|
||||
dec r3d
|
||||
jg .loop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_MMX
|
||||
PRED16x16_DC mmx, movq
|
||||
PRED16x16_DC mmxext, movq
|
||||
INIT_XMM
|
||||
PRED16x16_DC sse, movaps
|
||||
PRED16x16_DC sse2, movdqa
|
||||
PRED16x16_DC ssse3, movdqa
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; void pred16x16_tm_vp8(uint8_t *src, int stride)
|
||||
;-----------------------------------------------------------------------------
|
||||
|
||||
%macro PRED16x16_TM_MMX 1
|
||||
cglobal pred16x16_tm_vp8_%1, 2,5
|
||||
sub r0, r1
|
||||
pxor mm7, mm7
|
||||
movq mm0, [r0+0]
|
||||
movq mm2, [r0+8]
|
||||
movq mm1, mm0
|
||||
movq mm3, mm2
|
||||
punpcklbw mm0, mm7
|
||||
punpckhbw mm1, mm7
|
||||
punpcklbw mm2, mm7
|
||||
punpckhbw mm3, mm7
|
||||
movzx r3d, byte [r0-1]
|
||||
mov r4d, 16
|
||||
.loop:
|
||||
movzx r2d, byte [r0+r1-1]
|
||||
sub r2d, r3d
|
||||
movd mm4, r2d
|
||||
%ifidn %1, mmx
|
||||
punpcklwd mm4, mm4
|
||||
punpckldq mm4, mm4
|
||||
%else
|
||||
pshufw mm4, mm4, 0
|
||||
%endif
|
||||
movq mm5, mm4
|
||||
movq mm6, mm4
|
||||
movq mm7, mm4
|
||||
paddw mm4, mm0
|
||||
paddw mm5, mm1
|
||||
paddw mm6, mm2
|
||||
paddw mm7, mm3
|
||||
packuswb mm4, mm5
|
||||
packuswb mm6, mm7
|
||||
movq [r0+r1+0], mm4
|
||||
movq [r0+r1+8], mm6
|
||||
add r0, r1
|
||||
dec r4d
|
||||
jg .loop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
PRED16x16_TM_MMX mmx
|
||||
PRED16x16_TM_MMX mmxext
|
||||
|
||||
cglobal pred16x16_tm_vp8_sse2, 2,6,6
|
||||
sub r0, r1
|
||||
pxor xmm2, xmm2
|
||||
movdqa xmm0, [r0]
|
||||
movdqa xmm1, xmm0
|
||||
punpcklbw xmm0, xmm2
|
||||
punpckhbw xmm1, xmm2
|
||||
movzx r4d, byte [r0-1]
|
||||
mov r5d, 8
|
||||
.loop:
|
||||
movzx r2d, byte [r0+r1*1-1]
|
||||
movzx r3d, byte [r0+r1*2-1]
|
||||
sub r2d, r4d
|
||||
sub r3d, r4d
|
||||
movd xmm2, r2d
|
||||
movd xmm4, r3d
|
||||
pshuflw xmm2, xmm2, 0
|
||||
pshuflw xmm4, xmm4, 0
|
||||
punpcklqdq xmm2, xmm2
|
||||
punpcklqdq xmm4, xmm4
|
||||
movdqa xmm3, xmm2
|
||||
movdqa xmm5, xmm4
|
||||
paddw xmm2, xmm0
|
||||
paddw xmm3, xmm1
|
||||
paddw xmm4, xmm0
|
||||
paddw xmm5, xmm1
|
||||
packuswb xmm2, xmm3
|
||||
packuswb xmm4, xmm5
|
||||
movdqa [r0+r1*1], xmm2
|
||||
movdqa [r0+r1*2], xmm4
|
||||
lea r0, [r0+r1*2]
|
||||
dec r5d
|
||||
jg .loop
|
||||
REP_RET
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; void pred8x8_vertical(uint8_t *src, int stride)
|
||||
;-----------------------------------------------------------------------------
|
||||
|
||||
cglobal pred8x8_vertical_mmx, 2,2
|
||||
sub r0, r1
|
||||
movq mm0, [r0]
|
||||
%rep 3
|
||||
movq [r0+r1*1], mm0
|
||||
movq [r0+r1*2], mm0
|
||||
lea r0, [r0+r1*2]
|
||||
%endrep
|
||||
movq [r0+r1*1], mm0
|
||||
movq [r0+r1*2], mm0
|
||||
RET
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; void pred8x8_horizontal(uint8_t *src, int stride)
|
||||
;-----------------------------------------------------------------------------
|
||||
|
||||
%macro PRED8x8_H 1
|
||||
cglobal pred8x8_horizontal_%1, 2,3
|
||||
mov r2, 4
|
||||
%ifidn %1, ssse3
|
||||
mova m2, [pb_3]
|
||||
%endif
|
||||
.loop:
|
||||
movd m0, [r0+r1*0-4]
|
||||
movd m1, [r0+r1*1-4]
|
||||
%ifidn %1, ssse3
|
||||
pshufb m0, m2
|
||||
pshufb m1, m2
|
||||
%else
|
||||
punpcklbw m0, m0
|
||||
punpcklbw m1, m1
|
||||
%ifidn %1, mmxext
|
||||
pshufw m0, m0, 0xff
|
||||
pshufw m1, m1, 0xff
|
||||
%else
|
||||
punpckhwd m0, m0
|
||||
punpckhwd m1, m1
|
||||
punpckhdq m0, m0
|
||||
punpckhdq m1, m1
|
||||
%endif
|
||||
%endif
|
||||
mova [r0+r1*0], m0
|
||||
mova [r0+r1*1], m1
|
||||
lea r0, [r0+r1*2]
|
||||
dec r2
|
||||
jg .loop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_MMX
|
||||
PRED8x8_H mmx
|
||||
PRED8x8_H mmxext
|
||||
PRED8x8_H ssse3
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; void pred8x8_dc_rv40(uint8_t *src, int stride)
|
||||
;-----------------------------------------------------------------------------
|
||||
|
||||
%macro PRED8x8_DC 1
|
||||
cglobal pred8x8_dc_rv40_%1, 2,7
|
||||
mov r4, r0
|
||||
sub r0, r1
|
||||
pxor mm0, mm0
|
||||
psadbw mm0, [r0]
|
||||
dec r0
|
||||
movzx r5d, byte [r0+r1*1]
|
||||
movd r6d, mm0
|
||||
lea r0, [r0+r1*2]
|
||||
%rep 3
|
||||
movzx r2d, byte [r0+r1*0]
|
||||
movzx r3d, byte [r0+r1*1]
|
||||
add r5d, r2d
|
||||
add r6d, r3d
|
||||
lea r0, [r0+r1*2]
|
||||
%endrep
|
||||
movzx r2d, byte [r0+r1*0]
|
||||
add r5d, r6d
|
||||
lea r2d, [r2+r5+8]
|
||||
shr r2d, 4
|
||||
%ifidn %1, mmx
|
||||
movd mm0, r2d
|
||||
punpcklbw mm0, mm0
|
||||
punpcklwd mm0, mm0
|
||||
punpckldq mm0, mm0
|
||||
%else
|
||||
movd mm0, r2d
|
||||
punpcklbw mm0, mm0
|
||||
pshufw mm0, mm0, 0
|
||||
%endif
|
||||
mov r3d, 4
|
||||
.loop:
|
||||
movq [r4+r1*0], mm0
|
||||
movq [r4+r1*1], mm0
|
||||
lea r4, [r4+r1*2]
|
||||
dec r3d
|
||||
jg .loop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
|
||||
PRED8x8_DC mmx
|
||||
PRED8x8_DC mmxext
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; void pred8x8_tm_vp8(uint8_t *src, int stride)
|
||||
;-----------------------------------------------------------------------------
|
||||
|
||||
%macro PRED8x8_TM_MMX 1
|
||||
cglobal pred8x8_tm_vp8_%1, 2,6
|
||||
sub r0, r1
|
||||
pxor mm7, mm7
|
||||
movq mm0, [r0]
|
||||
movq mm1, mm0
|
||||
punpcklbw mm0, mm7
|
||||
punpckhbw mm1, mm7
|
||||
movzx r4d, byte [r0-1]
|
||||
mov r5d, 4
|
||||
.loop:
|
||||
movzx r2d, byte [r0+r1*1-1]
|
||||
movzx r3d, byte [r0+r1*2-1]
|
||||
sub r2d, r4d
|
||||
sub r3d, r4d
|
||||
movd mm2, r2d
|
||||
movd mm4, r3d
|
||||
%ifidn %1, mmx
|
||||
punpcklwd mm2, mm2
|
||||
punpcklwd mm4, mm4
|
||||
punpckldq mm2, mm2
|
||||
punpckldq mm4, mm4
|
||||
%else
|
||||
pshufw mm2, mm2, 0
|
||||
pshufw mm4, mm4, 0
|
||||
%endif
|
||||
movq mm3, mm2
|
||||
movq mm5, mm4
|
||||
paddw mm2, mm0
|
||||
paddw mm3, mm1
|
||||
paddw mm4, mm0
|
||||
paddw mm5, mm1
|
||||
packuswb mm2, mm3
|
||||
packuswb mm4, mm5
|
||||
movq [r0+r1*1], mm2
|
||||
movq [r0+r1*2], mm4
|
||||
lea r0, [r0+r1*2]
|
||||
dec r5d
|
||||
jg .loop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
PRED8x8_TM_MMX mmx
|
||||
PRED8x8_TM_MMX mmxext
|
||||
|
||||
cglobal pred8x8_tm_vp8_sse2, 2,6,4
|
||||
sub r0, r1
|
||||
pxor xmm1, xmm1
|
||||
movq xmm0, [r0]
|
||||
punpcklbw xmm0, xmm1
|
||||
movzx r4d, byte [r0-1]
|
||||
mov r5d, 4
|
||||
.loop:
|
||||
movzx r2d, byte [r0+r1*1-1]
|
||||
movzx r3d, byte [r0+r1*2-1]
|
||||
sub r2d, r4d
|
||||
sub r3d, r4d
|
||||
movd xmm2, r2d
|
||||
movd xmm3, r3d
|
||||
pshuflw xmm2, xmm2, 0
|
||||
pshuflw xmm3, xmm3, 0
|
||||
punpcklqdq xmm2, xmm2
|
||||
punpcklqdq xmm3, xmm3
|
||||
paddw xmm2, xmm0
|
||||
paddw xmm3, xmm0
|
||||
packuswb xmm2, xmm3
|
||||
movq [r0+r1*1], xmm2
|
||||
movhps [r0+r1*2], xmm2
|
||||
lea r0, [r0+r1*2]
|
||||
dec r5d
|
||||
jg .loop
|
||||
REP_RET
|
||||
|
||||
cglobal pred8x8_tm_vp8_ssse3, 2,3,6
|
||||
sub r0, r1
|
||||
movdqa xmm4, [tm_shuf]
|
||||
pxor xmm1, xmm1
|
||||
movq xmm0, [r0]
|
||||
punpcklbw xmm0, xmm1
|
||||
movd xmm5, [r0-4]
|
||||
pshufb xmm5, xmm4
|
||||
mov r2d, 4
|
||||
.loop:
|
||||
movd xmm2, [r0+r1*1-4]
|
||||
movd xmm3, [r0+r1*2-4]
|
||||
pshufb xmm2, xmm4
|
||||
pshufb xmm3, xmm4
|
||||
psubw xmm2, xmm5
|
||||
psubw xmm3, xmm5
|
||||
paddw xmm2, xmm0
|
||||
paddw xmm3, xmm0
|
||||
packuswb xmm2, xmm3
|
||||
movq [r0+r1*1], xmm2
|
||||
movhps [r0+r1*2], xmm2
|
||||
lea r0, [r0+r1*2]
|
||||
dec r2d
|
||||
jg .loop
|
||||
REP_RET
|
@ -19,6 +19,7 @@
|
||||
*/
|
||||
|
||||
#include "dsputil_mmx.h"
|
||||
#include "libavcodec/h264pred.h"
|
||||
|
||||
DECLARE_ALIGNED(8, static const uint64_t, ff_pb_3_1 ) = 0x0103010301030103ULL;
|
||||
DECLARE_ALIGNED(8, static const uint64_t, ff_pb_7_3 ) = 0x0307030703070307ULL;
|
||||
@ -2322,3 +2323,77 @@ H264_WEIGHT( 4, 8)
|
||||
H264_WEIGHT( 4, 4)
|
||||
H264_WEIGHT( 4, 2)
|
||||
|
||||
void ff_pred16x16_vertical_mmx (uint8_t *src, int stride);
|
||||
void ff_pred16x16_vertical_sse (uint8_t *src, int stride);
|
||||
void ff_pred16x16_horizontal_mmx (uint8_t *src, int stride);
|
||||
void ff_pred16x16_horizontal_mmxext(uint8_t *src, int stride);
|
||||
void ff_pred16x16_horizontal_ssse3 (uint8_t *src, int stride);
|
||||
void ff_pred16x16_dc_mmx (uint8_t *src, int stride);
|
||||
void ff_pred16x16_dc_mmxext (uint8_t *src, int stride);
|
||||
void ff_pred16x16_dc_sse (uint8_t *src, int stride);
|
||||
void ff_pred16x16_dc_sse2 (uint8_t *src, int stride);
|
||||
void ff_pred16x16_dc_ssse3 (uint8_t *src, int stride);
|
||||
void ff_pred16x16_tm_vp8_mmx (uint8_t *src, int stride);
|
||||
void ff_pred16x16_tm_vp8_mmxext (uint8_t *src, int stride);
|
||||
void ff_pred16x16_tm_vp8_sse2 (uint8_t *src, int stride);
|
||||
void ff_pred8x8_dc_rv40_mmx (uint8_t *src, int stride);
|
||||
void ff_pred8x8_dc_rv40_mmxext (uint8_t *src, int stride);
|
||||
void ff_pred8x8_vertical_mmx (uint8_t *src, int stride);
|
||||
void ff_pred8x8_horizontal_mmx (uint8_t *src, int stride);
|
||||
void ff_pred8x8_horizontal_mmxext (uint8_t *src, int stride);
|
||||
void ff_pred8x8_horizontal_ssse3 (uint8_t *src, int stride);
|
||||
void ff_pred8x8_tm_vp8_mmx (uint8_t *src, int stride);
|
||||
void ff_pred8x8_tm_vp8_mmxext (uint8_t *src, int stride);
|
||||
void ff_pred8x8_tm_vp8_sse2 (uint8_t *src, int stride);
|
||||
void ff_pred8x8_tm_vp8_ssse3 (uint8_t *src, int stride);
|
||||
|
||||
void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
|
||||
{
|
||||
#if HAVE_YASM
|
||||
if (mm_flags & FF_MM_MMX) {
|
||||
h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_mmx;
|
||||
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmx;
|
||||
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmx;
|
||||
h->pred8x8 [VERT_PRED8x8] = ff_pred8x8_vertical_mmx;
|
||||
h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmx;
|
||||
if (codec_id == CODEC_ID_VP8) {
|
||||
h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmx;
|
||||
h->pred8x8 [DC_PRED8x8 ] = ff_pred8x8_dc_rv40_mmx;
|
||||
h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_mmx;
|
||||
}
|
||||
}
|
||||
|
||||
if (mm_flags & FF_MM_MMXEXT) {
|
||||
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmxext;
|
||||
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmxext;
|
||||
h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext;
|
||||
if (codec_id == CODEC_ID_VP8) {
|
||||
h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmxext;
|
||||
h->pred8x8 [DC_PRED8x8 ] = ff_pred8x8_dc_rv40_mmxext;
|
||||
h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_mmxext;
|
||||
}
|
||||
}
|
||||
|
||||
if (mm_flags & FF_MM_SSE) {
|
||||
h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_sse;
|
||||
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_sse;
|
||||
}
|
||||
|
||||
if (mm_flags & FF_MM_SSE2) {
|
||||
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_sse2;
|
||||
if (codec_id == CODEC_ID_VP8) {
|
||||
h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_sse2;
|
||||
h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_sse2;
|
||||
}
|
||||
}
|
||||
|
||||
if (mm_flags & FF_MM_SSSE3) {
|
||||
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_ssse3;
|
||||
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_ssse3;
|
||||
h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_ssse3;
|
||||
if (codec_id == CODEC_ID_VP8) {
|
||||
h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_ssse3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user