avcodec: loongson optimize h264dsp idct and loop filter with mmi

Change-Id: Ic87fb8f5cd22a502ff9dbbc5a5a8ea97cfc8a1dd
Signed-off-by: ZhouXiaoyong <zhouxiaoyong@loongson.cn>
Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
This commit is contained in:
周晓勇 2015-09-02 18:02:24 +08:00 committed by Michael Niedermayer
parent b45ff1386a
commit f7e9b9d260
3 changed files with 2303 additions and 3 deletions

View File

@ -82,11 +82,33 @@ static av_cold void h264dsp_init_msa(H264DSPContext *c,
#endif // #if HAVE_MSA
#if HAVE_MMI
static av_cold void h264dsp_init_mmi(H264DSPContext * c,
const int bit_depth,
const int chroma_format_idc)
static av_cold void h264dsp_init_mmi(H264DSPContext * c, const int bit_depth,
const int chroma_format_idc)
{
if (bit_depth == 8) {
c->h264_add_pixels4_clear = ff_h264_add_pixels4_8_mmi;
c->h264_idct_add = ff_h264_idct_add_8_mmi;
c->h264_idct8_add = ff_h264_idct8_add_8_mmi;
c->h264_idct_dc_add = ff_h264_idct_dc_add_8_mmi;
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmi;
c->h264_idct_add16 = ff_h264_idct_add16_8_mmi;
c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmi;
c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmi;
if (chroma_format_idc <= 1)
c->h264_idct_add8 = ff_h264_idct_add8_8_mmi;
else
c->h264_idct_add8 = ff_h264_idct_add8_422_8_mmi;
c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_8_mmi;
if (chroma_format_idc <= 1)
c->h264_chroma_dc_dequant_idct =
ff_h264_chroma_dc_dequant_idct_8_mmi;
else
c->h264_chroma_dc_dequant_idct =
ff_h264_chroma422_dc_dequant_idct_8_mmi;
c->weight_h264_pixels_tab[0] = ff_h264_weight_pixels16_8_mmi;
c->weight_h264_pixels_tab[1] = ff_h264_weight_pixels8_8_mmi;
c->weight_h264_pixels_tab[2] = ff_h264_weight_pixels4_8_mmi;
@ -94,6 +116,21 @@ static av_cold void h264dsp_init_mmi(H264DSPContext * c,
c->biweight_h264_pixels_tab[0] = ff_h264_biweight_pixels16_8_mmi;
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_pixels8_8_mmi;
c->biweight_h264_pixels_tab[2] = ff_h264_biweight_pixels4_8_mmi;
c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_8_mmi;
c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_8_mmi;
if (chroma_format_idc <= 1) {
c->h264_h_loop_filter_chroma =
ff_deblock_h_chroma_8_mmi;
c->h264_h_loop_filter_chroma_intra =
ff_deblock_h_chroma_intra_8_mmi;
}
c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_mmi;
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmi;
c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_mmi;
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmi;
}
}
#endif /* HAVE_MMI */

View File

@ -319,6 +319,26 @@ void ff_vp8_pred8x8_129_dc_8_msa(uint8_t *src, ptrdiff_t stride);
void ff_vp8_pred16x16_127_dc_8_msa(uint8_t *src, ptrdiff_t stride);
void ff_vp8_pred16x16_129_dc_8_msa(uint8_t *src, ptrdiff_t stride);
void ff_h264_add_pixels4_8_mmi(uint8_t *_dst, int16_t *_src, int stride);
void ff_h264_idct_add_8_mmi(uint8_t *dst, int16_t *block, int stride);
void ff_h264_idct8_add_8_mmi(uint8_t *dst, int16_t *block, int stride);
void ff_h264_idct_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride);
void ff_h264_idct8_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride);
void ff_h264_idct_add16_8_mmi(uint8_t *dst, const int *block_offset,
int16_t *block, int stride, const uint8_t nnzc[15*8]);
void ff_h264_idct_add16intra_8_mmi(uint8_t *dst, const int *block_offset,
int16_t *block, int stride, const uint8_t nnzc[15*8]);
void ff_h264_idct8_add4_8_mmi(uint8_t *dst, const int *block_offset,
int16_t *block, int stride, const uint8_t nnzc[15*8]);
void ff_h264_idct_add8_8_mmi(uint8_t **dest, const int *block_offset,
int16_t *block, int stride, const uint8_t nnzc[15*8]);
void ff_h264_idct_add8_422_8_mmi(uint8_t **dest, const int *block_offset,
int16_t *block, int stride, const uint8_t nnzc[15*8]);
void ff_h264_luma_dc_dequant_idct_8_mmi(int16_t *output, int16_t *input,
int qmul);
void ff_h264_chroma_dc_dequant_idct_8_mmi(int16_t *block, int qmul);
void ff_h264_chroma422_dc_dequant_idct_8_mmi(int16_t *block, int qmul);
void ff_h264_weight_pixels16_8_mmi(uint8_t *block, int stride, int height,
int log2_denom, int weight, int offset);
void ff_h264_biweight_pixels16_8_mmi(uint8_t *dst, uint8_t *src,
@ -335,6 +355,27 @@ void ff_h264_biweight_pixels4_8_mmi(uint8_t *dst, uint8_t *src,
int stride, int height, int log2_denom, int weightd, int weights,
int offset);
void ff_deblock_v_chroma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
int8_t *tc0);
void ff_deblock_v_chroma_intra_8_mmi(uint8_t *pix, int stride, int alpha,
int beta);
void ff_deblock_h_chroma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
int8_t *tc0);
void ff_deblock_h_chroma_intra_8_mmi(uint8_t *pix, int stride, int alpha,
int beta);
void ff_deblock_v_luma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
int8_t *tc0);
void ff_deblock_v_luma_intra_8_mmi(uint8_t *pix, int stride, int alpha,
int beta);
void ff_deblock_h_luma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
int8_t *tc0);
void ff_deblock_h_luma_intra_8_mmi(uint8_t *pix, int stride, int alpha,
int beta);
void ff_deblock_v8_luma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
int8_t *tc0);
void ff_deblock_v8_luma_intra_8_mmi(uint8_t *pix, int stride, int alpha,
int beta);
void ff_put_h264_qpel16_mc00_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t dst_stride);
void ff_put_h264_qpel16_mc10_mmi(uint8_t *dst, const uint8_t *src,

File diff suppressed because it is too large Load Diff