avcodec/mips: version 1 of vp8dsp optimizations for loongson mmi

Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
This commit is contained in:
Zhou Xiaoyong 2016-10-10 16:10:22 +08:00 committed by Michael Niedermayer
parent 3f55752cd5
commit c5c6e30781
6 changed files with 3261 additions and 0 deletions

View File

@ -77,4 +77,5 @@ MMI-OBJS-$(CONFIG_MPEG4_DECODER) += mips/xvid_idct_mmi.o
MMI-OBJS-$(CONFIG_BLOCKDSP) += mips/blockdsp_mmi.o
MMI-OBJS-$(CONFIG_PIXBLOCKDSP) += mips/pixblockdsp_mmi.o
MMI-OBJS-$(CONFIG_H264QPEL) += mips/h264qpel_mmi.o
MMI-OBJS-$(CONFIG_VP8_DECODER) += mips/vp8dsp_mmi.o
MMI-OBJS-$(CONFIG_HPELDSP) += mips/hpeldsp_mmi.o

View File

@ -28,6 +28,7 @@ DECLARE_ALIGNED(8, const uint64_t, ff_pw_2) = {0x0002000200020002ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_3) = {0x0003000300030003ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_4) = {0x0004000400040004ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_5) = {0x0005000500050005ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_6) = {0x0006000600060006ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_8) = {0x0008000800080008ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_9) = {0x0009000900090009ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_10) = {0x000A000A000A000AULL};

View File

@ -29,6 +29,7 @@ extern const uint64_t ff_pw_2;
extern const uint64_t ff_pw_3;
extern const uint64_t ff_pw_4;
extern const uint64_t ff_pw_5;
extern const uint64_t ff_pw_6;
extern const uint64_t ff_pw_8;
extern const uint64_t ff_pw_9;
extern const uint64_t ff_pw_10;

View File

@ -1,5 +1,6 @@
/*
* Copyright (c) 2015 Manojkumar Bhosale (Manojkumar.Bhosale@imgtec.com)
* Copyright (c) 2016 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
*
* This file is part of FFmpeg.
*
@ -105,9 +106,97 @@ static av_cold void vp8dsp_init_msa(VP8DSPContext *dsp)
}
#endif // #if HAVE_MSA
#if HAVE_MMI
static av_cold void vp8dsp_init_mmi(VP8DSPContext *dsp)
{
dsp->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_mmi;
dsp->vp8_luma_dc_wht_dc = ff_vp8_luma_dc_wht_dc_mmi;
dsp->vp8_idct_add = ff_vp8_idct_add_mmi;
dsp->vp8_idct_dc_add = ff_vp8_idct_dc_add_mmi;
dsp->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_mmi;
dsp->vp8_idct_dc_add4uv = ff_vp8_idct_dc_add4uv_mmi;
dsp->put_vp8_epel_pixels_tab[0][0][1] = ff_put_vp8_epel16_h4_mmi;
dsp->put_vp8_epel_pixels_tab[0][0][2] = ff_put_vp8_epel16_h6_mmi;
dsp->put_vp8_epel_pixels_tab[0][1][0] = ff_put_vp8_epel16_v4_mmi;
dsp->put_vp8_epel_pixels_tab[0][1][1] = ff_put_vp8_epel16_h4v4_mmi;
dsp->put_vp8_epel_pixels_tab[0][1][2] = ff_put_vp8_epel16_h6v4_mmi;
dsp->put_vp8_epel_pixels_tab[0][2][0] = ff_put_vp8_epel16_v6_mmi;
dsp->put_vp8_epel_pixels_tab[0][2][1] = ff_put_vp8_epel16_h4v6_mmi;
dsp->put_vp8_epel_pixels_tab[0][2][2] = ff_put_vp8_epel16_h6v6_mmi;
dsp->put_vp8_epel_pixels_tab[1][0][1] = ff_put_vp8_epel8_h4_mmi;
dsp->put_vp8_epel_pixels_tab[1][0][2] = ff_put_vp8_epel8_h6_mmi;
dsp->put_vp8_epel_pixels_tab[1][1][0] = ff_put_vp8_epel8_v4_mmi;
dsp->put_vp8_epel_pixels_tab[1][1][1] = ff_put_vp8_epel8_h4v4_mmi;
dsp->put_vp8_epel_pixels_tab[1][1][2] = ff_put_vp8_epel8_h6v4_mmi;
dsp->put_vp8_epel_pixels_tab[1][2][0] = ff_put_vp8_epel8_v6_mmi;
dsp->put_vp8_epel_pixels_tab[1][2][1] = ff_put_vp8_epel8_h4v6_mmi;
dsp->put_vp8_epel_pixels_tab[1][2][2] = ff_put_vp8_epel8_h6v6_mmi;
dsp->put_vp8_epel_pixels_tab[2][0][1] = ff_put_vp8_epel4_h4_mmi;
dsp->put_vp8_epel_pixels_tab[2][0][2] = ff_put_vp8_epel4_h6_mmi;
dsp->put_vp8_epel_pixels_tab[2][1][0] = ff_put_vp8_epel4_v4_mmi;
dsp->put_vp8_epel_pixels_tab[2][1][1] = ff_put_vp8_epel4_h4v4_mmi;
dsp->put_vp8_epel_pixels_tab[2][1][2] = ff_put_vp8_epel4_h6v4_mmi;
dsp->put_vp8_epel_pixels_tab[2][2][0] = ff_put_vp8_epel4_v6_mmi;
dsp->put_vp8_epel_pixels_tab[2][2][1] = ff_put_vp8_epel4_h4v6_mmi;
dsp->put_vp8_epel_pixels_tab[2][2][2] = ff_put_vp8_epel4_h6v6_mmi;
dsp->put_vp8_bilinear_pixels_tab[0][0][1] = ff_put_vp8_bilinear16_h_mmi;
dsp->put_vp8_bilinear_pixels_tab[0][0][2] = ff_put_vp8_bilinear16_h_mmi;
dsp->put_vp8_bilinear_pixels_tab[0][1][0] = ff_put_vp8_bilinear16_v_mmi;
dsp->put_vp8_bilinear_pixels_tab[0][1][1] = ff_put_vp8_bilinear16_hv_mmi;
dsp->put_vp8_bilinear_pixels_tab[0][1][2] = ff_put_vp8_bilinear16_hv_mmi;
dsp->put_vp8_bilinear_pixels_tab[0][2][0] = ff_put_vp8_bilinear16_v_mmi;
dsp->put_vp8_bilinear_pixels_tab[0][2][1] = ff_put_vp8_bilinear16_hv_mmi;
dsp->put_vp8_bilinear_pixels_tab[0][2][2] = ff_put_vp8_bilinear16_hv_mmi;
dsp->put_vp8_bilinear_pixels_tab[1][0][1] = ff_put_vp8_bilinear8_h_mmi;
dsp->put_vp8_bilinear_pixels_tab[1][0][2] = ff_put_vp8_bilinear8_h_mmi;
dsp->put_vp8_bilinear_pixels_tab[1][1][0] = ff_put_vp8_bilinear8_v_mmi;
dsp->put_vp8_bilinear_pixels_tab[1][1][1] = ff_put_vp8_bilinear8_hv_mmi;
dsp->put_vp8_bilinear_pixels_tab[1][1][2] = ff_put_vp8_bilinear8_hv_mmi;
dsp->put_vp8_bilinear_pixels_tab[1][2][0] = ff_put_vp8_bilinear8_v_mmi;
dsp->put_vp8_bilinear_pixels_tab[1][2][1] = ff_put_vp8_bilinear8_hv_mmi;
dsp->put_vp8_bilinear_pixels_tab[1][2][2] = ff_put_vp8_bilinear8_hv_mmi;
dsp->put_vp8_bilinear_pixels_tab[2][0][1] = ff_put_vp8_bilinear4_h_mmi;
dsp->put_vp8_bilinear_pixels_tab[2][0][2] = ff_put_vp8_bilinear4_h_mmi;
dsp->put_vp8_bilinear_pixels_tab[2][1][0] = ff_put_vp8_bilinear4_v_mmi;
dsp->put_vp8_bilinear_pixels_tab[2][1][1] = ff_put_vp8_bilinear4_hv_mmi;
dsp->put_vp8_bilinear_pixels_tab[2][1][2] = ff_put_vp8_bilinear4_hv_mmi;
dsp->put_vp8_bilinear_pixels_tab[2][2][0] = ff_put_vp8_bilinear4_v_mmi;
dsp->put_vp8_bilinear_pixels_tab[2][2][1] = ff_put_vp8_bilinear4_hv_mmi;
dsp->put_vp8_bilinear_pixels_tab[2][2][2] = ff_put_vp8_bilinear4_hv_mmi;
dsp->put_vp8_epel_pixels_tab[0][0][0] = ff_put_vp8_pixels16_mmi;
dsp->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_mmi;
dsp->put_vp8_epel_pixels_tab[1][0][0] = ff_put_vp8_pixels8_mmi;
dsp->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_mmi;
dsp->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16_mmi;
dsp->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16_mmi;
dsp->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mmi;
dsp->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mmi;
dsp->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16_inner_mmi;
dsp->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16_inner_mmi;
dsp->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmi;
dsp->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmi;
dsp->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmi;
dsp->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmi;
}
#endif /* HAVE_MMI */
av_cold void ff_vp8dsp_init_mips(VP8DSPContext *dsp)
{
#if HAVE_MSA
vp8dsp_init_msa(dsp);
#endif // #if HAVE_MSA
#if HAVE_MMI
vp8dsp_init_mmi(dsp);
#endif /* HAVE_MMI */
}

View File

@ -1,5 +1,6 @@
/*
* Copyright (c) 2015 Manojkumar Bhosale (Manojkumar.Bhosale@imgtec.com)
* Copyright (c) 2016 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
*
* This file is part of FFmpeg.
*
@ -21,6 +22,11 @@
#ifndef AVCODEC_MIPS_VP8DSP_MIPS_H
#define AVCODEC_MIPS_VP8DSP_MIPS_H
#include "libavutil/mem.h"
#include "libavcodec/vp8dsp.h"
#include "libavcodec/mathops.h"
#include "constants.h"
void ff_put_vp8_pixels4_msa(uint8_t *dst, ptrdiff_t dststride,
uint8_t *src, ptrdiff_t srcstride,
int h, int x, int y);
@ -169,4 +175,115 @@ void ff_vp8_idct_dc_add4uv_msa(uint8_t *dst, int16_t block[4][16],
void ff_vp8_idct_dc_add4y_msa(uint8_t *dst, int16_t block[4][16],
ptrdiff_t stride);
void ff_vp8_luma_dc_wht_mmi(int16_t block[4][4][16], int16_t dc[16]);
void ff_vp8_luma_dc_wht_dc_mmi(int16_t block[4][4][16], int16_t dc[16]);
void ff_vp8_idct_add_mmi(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
void ff_vp8_idct_dc_add_mmi(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
void ff_vp8_idct_dc_add4y_mmi(uint8_t *dst, int16_t block[4][16],
ptrdiff_t stride);
void ff_vp8_idct_dc_add4uv_mmi(uint8_t *dst, int16_t block[4][16],
ptrdiff_t stride);
void ff_put_vp8_pixels4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
ptrdiff_t srcstride, int h, int x, int y);
void ff_put_vp8_pixels8_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
ptrdiff_t srcstride, int h, int x, int y);
void ff_put_vp8_pixels16_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
ptrdiff_t srcstride, int h, int x, int y);
void ff_put_vp8_epel16_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
ptrdiff_t srcstride, int h, int mx, int my);
void ff_put_vp8_epel16_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
ptrdiff_t srcstride, int h, int mx, int my);
void ff_put_vp8_epel16_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
ptrdiff_t srcstride, int h, int mx, int my);
void ff_put_vp8_epel16_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
ptrdiff_t srcstride, int h, int mx, int my);
void ff_put_vp8_epel16_h4v4_mmi(uint8_t *dst, ptrdiff_t dststride,
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
void ff_put_vp8_epel16_h6v4_mmi(uint8_t *dst, ptrdiff_t dststride,
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
void ff_put_vp8_epel16_h4v6_mmi(uint8_t *dst, ptrdiff_t dststride,
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
void ff_put_vp8_epel16_h6v6_mmi(uint8_t *dst, ptrdiff_t dststride,
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
void ff_put_vp8_epel8_h4_mmi(uint8_t *dst, ptrdiff_t dststride,
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
void ff_put_vp8_epel8_h6_mmi(uint8_t *dst, ptrdiff_t dststride,
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
void ff_put_vp8_epel8_v4_mmi(uint8_t *dst, ptrdiff_t dststride,
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
void ff_put_vp8_epel8_v6_mmi(uint8_t *dst, ptrdiff_t dststride,
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
void ff_put_vp8_epel8_h4v4_mmi(uint8_t *dst, ptrdiff_t dststride,
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
void ff_put_vp8_epel8_h6v4_mmi(uint8_t *dst, ptrdiff_t dststride,
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
void ff_put_vp8_epel8_h4v6_mmi(uint8_t *dst, ptrdiff_t dststride,
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
void ff_put_vp8_epel8_h6v6_mmi(uint8_t *dst, ptrdiff_t dststride,
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
void ff_put_vp8_epel4_h4_mmi(uint8_t *dst, ptrdiff_t dststride,
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
void ff_put_vp8_epel4_h6_mmi(uint8_t *dst, ptrdiff_t dststride,
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
void ff_put_vp8_epel4_v4_mmi(uint8_t *dst, ptrdiff_t dststride,
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
void ff_put_vp8_epel4_v6_mmi(uint8_t *dst, ptrdiff_t dststride,
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
void ff_put_vp8_epel4_h4v4_mmi(uint8_t *dst, ptrdiff_t dststride,
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
void ff_put_vp8_epel4_h6v4_mmi(uint8_t *dst, ptrdiff_t dststride,
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
void ff_put_vp8_epel4_h4v6_mmi(uint8_t *dst, ptrdiff_t dststride,
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
void ff_put_vp8_epel4_h6v6_mmi(uint8_t *dst, ptrdiff_t dststride,
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
void ff_put_vp8_bilinear16_h_mmi(uint8_t *dst, ptrdiff_t dststride,
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
void ff_put_vp8_bilinear16_v_mmi(uint8_t *dst, ptrdiff_t dststride,
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
void ff_put_vp8_bilinear16_hv_mmi(uint8_t *dst, ptrdiff_t dststride,
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
void ff_put_vp8_bilinear8_h_mmi(uint8_t *dst, ptrdiff_t dststride,
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
void ff_put_vp8_bilinear8_v_mmi(uint8_t *dst, ptrdiff_t dststride,
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
void ff_put_vp8_bilinear8_hv_mmi(uint8_t *dst, ptrdiff_t dststride,
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
void ff_put_vp8_bilinear4_h_mmi(uint8_t *dst, ptrdiff_t dststride,
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
void ff_put_vp8_bilinear4_v_mmi(uint8_t *dst, ptrdiff_t dststride,
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
void ff_put_vp8_bilinear4_hv_mmi(uint8_t *dst, ptrdiff_t dststride,
uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my);
// loop filter applied to edges between macroblocks
void ff_vp8_v_loop_filter16_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E,
int flim_I, int hev_thresh);
void ff_vp8_h_loop_filter16_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E,
int flim_I, int hev_thresh);
void ff_vp8_v_loop_filter8uv_mmi(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride,
int flim_E, int flim_I, int hev_thresh);
void ff_vp8_h_loop_filter8uv_mmi(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride,
int flim_E, int flim_I, int hev_thresh);
// loop filter applied to inner macroblock edges
void ff_vp8_v_loop_filter16_inner_mmi(uint8_t *dst, ptrdiff_t stride,
int flim_E, int flim_I, int hev_thresh);
void ff_vp8_h_loop_filter16_inner_mmi(uint8_t *dst, ptrdiff_t stride,
int flim_E, int flim_I, int hev_thresh);
void ff_vp8_v_loop_filter8uv_inner_mmi(uint8_t *dstU, uint8_t *dstV,
ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh);
void ff_vp8_h_loop_filter8uv_inner_mmi(uint8_t *dstU, uint8_t *dstV,
ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh);
void ff_vp8_v_loop_filter_simple_mmi(uint8_t *dst, ptrdiff_t stride, int flim);
void ff_vp8_h_loop_filter_simple_mmi(uint8_t *dst, ptrdiff_t stride, int flim);
#endif // #ifndef AVCODEC_MIPS_VP8DSP_MIPS_H

3052
libavcodec/mips/vp8dsp_mmi.c Normal file

File diff suppressed because it is too large Load Diff