third_party_ffmpeg/libavcodec/h264addpx_template.c
Ronald S. Bultje 62844c3fd6 h264: Integrate clear_blocks calls with IDCT
The non-intra-pcm branch in hl_decode_mb (simple, 8bpp) goes from 700
to 672 cycles, and the complete loop of decode_mb_cabac and hl_decode_mb
(in the decode_slice loop) goes from 1759 to 1733 cycles on the clip
tested (cathedral), i.e. almost 30 cycles per mb faster.

Signed-off-by: Martin Storsjö <martin@martin.st>
2013-04-10 11:03:06 +03:00

73 lines
1.9 KiB
C

/*
* H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
* Copyright (c) 2003-2011 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* H.264 / AVC / MPEG4 part10 DSP functions.
* @author Michael Niedermayer <michaelni@gmx.at>
*/
#include "bit_depth_template.c"
static void FUNCC(ff_h264_add_pixels4)(uint8_t *_dst, int16_t *_src, int stride)
{
int i;
pixel *dst = (pixel *) _dst;
dctcoef *src = (dctcoef *) _src;
stride /= sizeof(pixel);
for (i = 0; i < 4; i++) {
dst[0] += src[0];
dst[1] += src[1];
dst[2] += src[2];
dst[3] += src[3];
dst += stride;
src += 4;
}
memset(_src, 0, sizeof(dctcoef) * 16);
}
static void FUNCC(ff_h264_add_pixels8)(uint8_t *_dst, int16_t *_src, int stride)
{
int i;
pixel *dst = (pixel *) _dst;
dctcoef *src = (dctcoef *) _src;
stride /= sizeof(pixel);
for (i = 0; i < 8; i++) {
dst[0] += src[0];
dst[1] += src[1];
dst[2] += src[2];
dst[3] += src[3];
dst[4] += src[4];
dst[5] += src[5];
dst[6] += src[6];
dst[7] += src[7];
dst += stride;
src += 8;
}
memset(_src, 0, sizeof(dctcoef) * 64);
}