mirror of
https://github.com/xenia-project/FFmpeg.git
synced 2024-11-27 21:40:34 +00:00
libavcodec/blockdsp : add AVX version
Also modify the required alignment, to 32 instead of 16 for several codecs Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
parent
4590d073cc
commit
cbbec68847
@ -54,7 +54,7 @@ typedef struct ASV1Context {
|
||||
int mb_height;
|
||||
int mb_width2;
|
||||
int mb_height2;
|
||||
DECLARE_ALIGNED(16, int16_t, block)[6][64];
|
||||
DECLARE_ALIGNED(32, int16_t, block)[6][64];
|
||||
uint16_t intra_matrix[64];
|
||||
int q_intra_matrix[64];
|
||||
uint8_t *bitstream_buffer;
|
||||
|
@ -813,7 +813,7 @@ static int binkb_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb,
|
||||
int v, col[2];
|
||||
const uint8_t *scan;
|
||||
int xoff, yoff;
|
||||
LOCAL_ALIGNED_16(int16_t, block, [64]);
|
||||
LOCAL_ALIGNED_32(int16_t, block, [64]);
|
||||
LOCAL_ALIGNED_16(int32_t, dctblock, [64]);
|
||||
int coordmap[64];
|
||||
int ybias = is_key ? -15 : 0;
|
||||
@ -976,7 +976,7 @@ static int bink_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb,
|
||||
uint8_t *dst, *prev, *ref_start, *ref_end;
|
||||
int v, col[2];
|
||||
const uint8_t *scan;
|
||||
LOCAL_ALIGNED_16(int16_t, block, [64]);
|
||||
LOCAL_ALIGNED_32(int16_t, block, [64]);
|
||||
LOCAL_ALIGNED_16(uint8_t, ublock, [64]);
|
||||
LOCAL_ALIGNED_16(int32_t, dctblock, [64]);
|
||||
int coordmap[64];
|
||||
|
@ -74,7 +74,7 @@ typedef struct DNXHDEncContext {
|
||||
unsigned min_padding;
|
||||
int intra_quant_bias;
|
||||
|
||||
DECLARE_ALIGNED(16, int16_t, blocks)[12][64];
|
||||
DECLARE_ALIGNED(32, int16_t, blocks)[12][64];
|
||||
DECLARE_ALIGNED(16, uint8_t, edge_buf_y)[512]; // has to hold 16x16 uint16 when depth=10
|
||||
DECLARE_ALIGNED(16, uint8_t, edge_buf_uv)[2][512]; // has to hold 16x16 uint16_t when depth=10
|
||||
|
||||
|
@ -54,7 +54,7 @@ typedef struct MadContext {
|
||||
GetBitContext gb;
|
||||
void *bitstream_buf;
|
||||
unsigned int bitstream_buf_size;
|
||||
DECLARE_ALIGNED(16, int16_t, block)[64];
|
||||
DECLARE_ALIGNED(32, int16_t, block)[64];
|
||||
ScanTable scantable;
|
||||
uint16_t quant_matrix[64];
|
||||
int mb_x;
|
||||
|
@ -51,7 +51,7 @@ typedef struct TqiContext {
|
||||
uint16_t intra_matrix[64];
|
||||
int last_dc[3];
|
||||
|
||||
DECLARE_ALIGNED(16, int16_t, block)[6][64];
|
||||
DECLARE_ALIGNED(32, int16_t, block)[6][64];
|
||||
} TqiContext;
|
||||
|
||||
static av_cold int tqi_decode_init(AVCodecContext *avctx)
|
||||
|
@ -122,7 +122,7 @@ typedef struct JPGContext {
|
||||
|
||||
VLC dc_vlc[2], ac_vlc[2];
|
||||
int prev_dc[3];
|
||||
DECLARE_ALIGNED(16, int16_t, block)[6][64];
|
||||
DECLARE_ALIGNED(32, int16_t, block)[6][64];
|
||||
|
||||
uint8_t *buf;
|
||||
} JPGContext;
|
||||
|
@ -574,7 +574,7 @@ not_coded:
|
||||
|
||||
static int h263_skip_b_part(MpegEncContext *s, int cbp)
|
||||
{
|
||||
LOCAL_ALIGNED_16(int16_t, dblock, [64]);
|
||||
LOCAL_ALIGNED_32(int16_t, dblock, [64]);
|
||||
int i, mbi;
|
||||
int bli[6];
|
||||
|
||||
|
@ -48,7 +48,7 @@ typedef struct MDECContext {
|
||||
int mb_width;
|
||||
int mb_height;
|
||||
int mb_x, mb_y;
|
||||
DECLARE_ALIGNED(16, int16_t, block)[6][64];
|
||||
DECLARE_ALIGNED(32, int16_t, block)[6][64];
|
||||
DECLARE_ALIGNED(16, uint16_t, quant_matrix)[64];
|
||||
uint8_t *bitstream_buffer;
|
||||
unsigned int bitstream_buffer_size;
|
||||
|
@ -49,7 +49,7 @@ typedef struct MimicContext {
|
||||
|
||||
ThreadFrame frames [16];
|
||||
|
||||
DECLARE_ALIGNED(16, int16_t, dct_block)[64];
|
||||
DECLARE_ALIGNED(32, int16_t, dct_block)[64];
|
||||
|
||||
GetBitContext gb;
|
||||
ScanTable scantable;
|
||||
|
@ -98,7 +98,7 @@ typedef struct MJpegDecodeContext {
|
||||
int got_picture; ///< we found a SOF and picture is valid, too.
|
||||
int linesize[MAX_COMPONENTS]; ///< linesize << interlaced
|
||||
int8_t *qscale_table;
|
||||
DECLARE_ALIGNED(16, int16_t, block)[64];
|
||||
DECLARE_ALIGNED(32, int16_t, block)[64];
|
||||
int16_t (*blocks[MAX_COMPONENTS])[64]; ///< intermediate sums (progressive mode)
|
||||
uint8_t *last_nnz[MAX_COMPONENTS];
|
||||
uint64_t coefs_finished[MAX_COMPONENTS]; ///< bitmask of which coefs have been completely decoded (progressive mode)
|
||||
|
@ -368,7 +368,7 @@ static int decode_slice_luma(AVCodecContext *avctx, SliceContext *slice,
|
||||
const int16_t *qmat)
|
||||
{
|
||||
ProresContext *ctx = avctx->priv_data;
|
||||
LOCAL_ALIGNED_16(int16_t, blocks, [8*4*64]);
|
||||
LOCAL_ALIGNED_32(int16_t, blocks, [8*4*64]);
|
||||
int16_t *block;
|
||||
GetBitContext gb;
|
||||
int i, blocks_per_slice = slice->mb_count<<2;
|
||||
@ -402,7 +402,7 @@ static int decode_slice_chroma(AVCodecContext *avctx, SliceContext *slice,
|
||||
const int16_t *qmat, int log2_blocks_per_mb)
|
||||
{
|
||||
ProresContext *ctx = avctx->priv_data;
|
||||
LOCAL_ALIGNED_16(int16_t, blocks, [8*4*64]);
|
||||
LOCAL_ALIGNED_32(int16_t, blocks, [8*4*64]);
|
||||
int16_t *block;
|
||||
GetBitContext gb;
|
||||
int i, j, blocks_per_slice = slice->mb_count << log2_blocks_per_mb;
|
||||
@ -485,7 +485,7 @@ static void decode_slice_alpha(ProresContext *ctx,
|
||||
{
|
||||
GetBitContext gb;
|
||||
int i;
|
||||
LOCAL_ALIGNED_16(int16_t, blocks, [8*4*64]);
|
||||
LOCAL_ALIGNED_32(int16_t, blocks, [8*4*64]);
|
||||
int16_t *block;
|
||||
|
||||
for (i = 0; i < blocks_per_slice<<2; i++)
|
||||
|
@ -224,7 +224,7 @@ static inline int decode_dct_block(const SHQContext *s, GetBitContext *gb, int l
|
||||
{
|
||||
const int *quant_matrix = s->quant_matrix;
|
||||
const uint8_t *scantable = s->intra_scantable.permutated;
|
||||
LOCAL_ALIGNED_16(int16_t, block, [64]);
|
||||
LOCAL_ALIGNED_32(int16_t, block, [64]);
|
||||
int dc_offset;
|
||||
|
||||
s->bdsp.clear_block(block);
|
||||
|
@ -51,7 +51,7 @@ typedef struct Wmv2Context {
|
||||
int hshift;
|
||||
|
||||
ScanTable abt_scantable[2];
|
||||
DECLARE_ALIGNED(16, int16_t, abt_block2)[6][64];
|
||||
DECLARE_ALIGNED(32, int16_t, abt_block2)[6][64];
|
||||
} Wmv2Context;
|
||||
|
||||
void ff_wmv2_common_init(Wmv2Context *w);
|
||||
|
@ -4,6 +4,8 @@
|
||||
;* Copyright (c) 2008 Loren Merritt
|
||||
;* Copyright (c) 2009 Fiona Glaser
|
||||
;*
|
||||
;* AVX version by Jokyo Images
|
||||
;*
|
||||
;* This file is part of FFmpeg.
|
||||
;*
|
||||
;* FFmpeg is free software; you can redistribute it and/or
|
||||
@ -39,20 +41,18 @@ cglobal clear_block, 1, 1, %1, blocks
|
||||
mova [blocksq+mmsize*(1+%%i)], m0
|
||||
mova [blocksq+mmsize*(2+%%i)], m0
|
||||
mova [blocksq+mmsize*(3+%%i)], m0
|
||||
mova [blocksq+mmsize*(4+%%i)], m0
|
||||
mova [blocksq+mmsize*(5+%%i)], m0
|
||||
mova [blocksq+mmsize*(6+%%i)], m0
|
||||
mova [blocksq+mmsize*(7+%%i)], m0
|
||||
%assign %%i %%i+8
|
||||
%assign %%i %%i+4
|
||||
%endrep
|
||||
RET
|
||||
%endmacro
|
||||
|
||||
INIT_MMX mmx
|
||||
%define ZERO pxor
|
||||
CLEAR_BLOCK 0, 2
|
||||
CLEAR_BLOCK 0, 4
|
||||
INIT_XMM sse
|
||||
%define ZERO xorps
|
||||
CLEAR_BLOCK 1, 2
|
||||
INIT_YMM avx
|
||||
CLEAR_BLOCK 1, 1
|
||||
|
||||
;-----------------------------------------
|
||||
@ -84,3 +84,5 @@ CLEAR_BLOCKS 0
|
||||
INIT_XMM sse
|
||||
%define ZERO xorps
|
||||
CLEAR_BLOCKS 1
|
||||
INIT_YMM avx
|
||||
CLEAR_BLOCKS 1
|
||||
|
@ -28,8 +28,10 @@
|
||||
|
||||
void ff_clear_block_mmx(int16_t *block);
|
||||
void ff_clear_block_sse(int16_t *block);
|
||||
void ff_clear_block_avx(int16_t *block);
|
||||
void ff_clear_blocks_mmx(int16_t *blocks);
|
||||
void ff_clear_blocks_sse(int16_t *blocks);
|
||||
void ff_clear_blocks_avx(int16_t *blocks);
|
||||
|
||||
av_cold void ff_blockdsp_init_x86(BlockDSPContext *c,
|
||||
AVCodecContext *avctx)
|
||||
@ -50,5 +52,9 @@ av_cold void ff_blockdsp_init_x86(BlockDSPContext *c,
|
||||
c->clear_block = ff_clear_block_sse;
|
||||
c->clear_blocks = ff_clear_blocks_sse;
|
||||
}
|
||||
if (EXTERNAL_AVX_FAST(cpu_flags)) {
|
||||
c->clear_block = ff_clear_block_avx;
|
||||
c->clear_blocks = ff_clear_blocks_avx;
|
||||
}
|
||||
#endif /* HAVE_X86ASM */
|
||||
}
|
||||
|
@ -53,8 +53,8 @@ do { \
|
||||
|
||||
void checkasm_check_blockdsp(void)
|
||||
{
|
||||
LOCAL_ALIGNED_16(uint16_t, buf0, [6 * 8 * 8]);
|
||||
LOCAL_ALIGNED_16(uint16_t, buf1, [6 * 8 * 8]);
|
||||
LOCAL_ALIGNED_32(uint16_t, buf0, [6 * 8 * 8]);
|
||||
LOCAL_ALIGNED_32(uint16_t, buf1, [6 * 8 * 8]);
|
||||
|
||||
AVCodecContext avctx = { 0 };
|
||||
BlockDSPContext h;
|
||||
|
Loading…
Reference in New Issue
Block a user