mirror of
https://github.com/xenia-project/FFmpeg.git
synced 2024-11-27 13:30:45 +00:00
MMX/MMXEXT iDCT support, using external functions currently defined in libmpeg2
Gives average 13-20% mpeg decoding speedup on x86 systems. Originally committed as revision 30 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
2d6d0c1d66
commit
4af7bcc185
@ -21,6 +21,7 @@
|
||||
#include "avcodec.h"
|
||||
#include "dsputil.h"
|
||||
|
||||
void (*ff_idct)(DCTELEM *block);
|
||||
void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
|
||||
void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
|
||||
void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
|
||||
@ -363,6 +364,7 @@ void dsputil_init(void)
|
||||
squareTbl[i] = (i - 256) * (i - 256);
|
||||
}
|
||||
|
||||
ff_idct = j_rev_dct;
|
||||
get_pixels = get_pixels_c;
|
||||
put_pixels_clamped = put_pixels_clamped_c;
|
||||
add_pixels_clamped = add_pixels_clamped_c;
|
||||
|
@ -25,6 +25,7 @@ void dsputil_init(void);
|
||||
|
||||
/* pixel ops : interface with DCT */
|
||||
|
||||
extern void (*ff_idct)(DCTELEM *block);
|
||||
extern void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
|
||||
extern void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
|
||||
extern void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
|
||||
|
@ -29,6 +29,16 @@ int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h);
|
||||
int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h);
|
||||
int pix_abs16x16_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h);
|
||||
|
||||
#ifdef USE_MMX_IDCT
|
||||
/* external functions, defined in libmpeg2 */
|
||||
void mmx_idct(DCTELEM *block);
|
||||
void mmxext_idct(DCTELEM *block);
|
||||
/* this should be in dsputil.h? -- A'rpi */
|
||||
extern UINT8 ff_alternate_horizontal_scan[64];
|
||||
extern UINT8 ff_alternate_vertical_scan[64];
|
||||
extern UINT8 zigzag_direct[64];
|
||||
#endif
|
||||
|
||||
/* pixel operations */
|
||||
static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001;
|
||||
static const unsigned long long int mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002;
|
||||
@ -1039,5 +1049,23 @@ void dsputil_init_mmx(void)
|
||||
sub_pixels_tab[1] = sub_pixels_x2_3dnow;
|
||||
sub_pixels_tab[2] = sub_pixels_y2_3dnow;
|
||||
}
|
||||
|
||||
#ifdef USE_MMX_IDCT
|
||||
/* use MMX / MMXEXT iDCT code from libmpeg2 */
|
||||
//printf("LIBAVCODEC: Using MMX%s iDCT code\n",(mm_flags & MM_MMXEXT)?"EXT":"");
|
||||
ff_idct = (mm_flags & MM_MMXEXT) ? mmxext_idct : mmx_idct;
|
||||
/* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
|
||||
{ int i,j;
|
||||
for (i = 0; i < 64; i++) {
|
||||
j = zigzag_direct[i];
|
||||
zigzag_direct[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2);
|
||||
j = ff_alternate_horizontal_scan[i];
|
||||
ff_alternate_horizontal_scan[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2);
|
||||
j = ff_alternate_vertical_scan[i];
|
||||
ff_alternate_vertical_scan[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -331,7 +331,8 @@ static const UINT8 mbMotionVectorTable[17][2] = {
|
||||
{ 0xc, 10 },
|
||||
};
|
||||
|
||||
const UINT8 zigzag_direct[64] = {
|
||||
//const
|
||||
UINT8 zigzag_direct[64] = {
|
||||
0, 1, 8, 16, 9, 2, 3, 10,
|
||||
17, 24, 32, 25, 18, 11, 4, 5,
|
||||
12, 19, 26, 33, 40, 48, 41, 34,
|
||||
|
@ -634,7 +634,7 @@ static inline void put_dct(MpegEncContext *s,
|
||||
{
|
||||
if (!s->mpeg2)
|
||||
s->dct_unquantize(s, block, i, s->qscale);
|
||||
j_rev_dct (block);
|
||||
ff_idct (block);
|
||||
put_pixels_clamped(block, dest, line_size);
|
||||
}
|
||||
|
||||
@ -645,7 +645,7 @@ static inline void add_dct(MpegEncContext *s,
|
||||
if (s->block_last_index[i] >= 0) {
|
||||
if (!s->mpeg2)
|
||||
s->dct_unquantize(s, block, i, s->qscale);
|
||||
j_rev_dct (block);
|
||||
ff_idct (block);
|
||||
add_pixels_clamped(block, dest, line_size);
|
||||
}
|
||||
}
|
||||
|
@ -179,7 +179,8 @@ typedef struct MpegEncContext {
|
||||
DCTELEM *block, int n, int qscale);
|
||||
} MpegEncContext;
|
||||
|
||||
extern const UINT8 zigzag_direct[64];
|
||||
//const
|
||||
extern UINT8 zigzag_direct[64];
|
||||
|
||||
int MPV_common_init(MpegEncContext *s);
|
||||
void MPV_common_end(MpegEncContext *s);
|
||||
|
Loading…
Reference in New Issue
Block a user