mirror of
https://github.com/xenia-project/FFmpeg.git
synced 2024-11-27 21:40:34 +00:00
clear_block mmx
Originally committed as revision 16045 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
5fac277602
commit
5fecfb7d58
@ -3420,6 +3420,11 @@ void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void clear_block_c(DCTELEM *block)
|
||||||
|
{
|
||||||
|
memset(block, 0, sizeof(DCTELEM)*64);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* memset(blocks, 0, sizeof(DCTELEM)*6*64)
|
* memset(blocks, 0, sizeof(DCTELEM)*6*64)
|
||||||
*/
|
*/
|
||||||
@ -4288,6 +4293,7 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
|
|||||||
c->sum_abs_dctelem = sum_abs_dctelem_c;
|
c->sum_abs_dctelem = sum_abs_dctelem_c;
|
||||||
c->gmc1 = gmc1_c;
|
c->gmc1 = gmc1_c;
|
||||||
c->gmc = ff_gmc_c;
|
c->gmc = ff_gmc_c;
|
||||||
|
c->clear_block = clear_block_c;
|
||||||
c->clear_blocks = clear_blocks_c;
|
c->clear_blocks = clear_blocks_c;
|
||||||
c->pix_sum = pix_sum_c;
|
c->pix_sum = pix_sum_c;
|
||||||
c->pix_norm1 = pix_norm1_c;
|
c->pix_norm1 = pix_norm1_c;
|
||||||
|
@ -203,6 +203,7 @@ typedef struct DSPContext {
|
|||||||
*/
|
*/
|
||||||
void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy,
|
void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy,
|
||||||
int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
|
int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
|
||||||
|
void (*clear_block)(DCTELEM *block/*align 16*/);
|
||||||
void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
|
void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
|
||||||
int (*pix_sum)(uint8_t * pix, int line_size);
|
int (*pix_sum)(uint8_t * pix, int line_size);
|
||||||
int (*pix_norm1)(uint8_t * pix, int line_size);
|
int (*pix_norm1)(uint8_t * pix, int line_size);
|
||||||
|
@ -810,7 +810,7 @@ static inline int get_p_cbp(MpegEncContext * s,
|
|||||||
for (i = 0; i < 6; i++) {
|
for (i = 0; i < 6; i++) {
|
||||||
if (s->block_last_index[i] >= 0 && ((cbp >> (5 - i))&1)==0 ){
|
if (s->block_last_index[i] >= 0 && ((cbp >> (5 - i))&1)==0 ){
|
||||||
s->block_last_index[i]= -1;
|
s->block_last_index[i]= -1;
|
||||||
memset(s->block[i], 0, sizeof(DCTELEM)*64);
|
s->dsp.clear_block(s->block[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}else{
|
}else{
|
||||||
@ -853,7 +853,7 @@ static inline int get_b_cbp(MpegEncContext * s, DCTELEM block[6][64],
|
|||||||
for (i = 0; i < 6; i++) {
|
for (i = 0; i < 6; i++) {
|
||||||
if (s->block_last_index[i] >= 0 && ((cbp >> (5 - i))&1)==0 ){
|
if (s->block_last_index[i] >= 0 && ((cbp >> (5 - i))&1)==0 ){
|
||||||
s->block_last_index[i]= -1;
|
s->block_last_index[i]= -1;
|
||||||
memset(s->block[i], 0, sizeof(DCTELEM)*64);
|
s->dsp.clear_block(s->block[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}else{
|
}else{
|
||||||
@ -4651,7 +4651,7 @@ retry:
|
|||||||
rl = &rl_intra_aic;
|
rl = &rl_intra_aic;
|
||||||
i = 0;
|
i = 0;
|
||||||
s->gb= gb;
|
s->gb= gb;
|
||||||
memset(block, 0, sizeof(DCTELEM)*64);
|
s->dsp.clear_block(block);
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
}
|
||||||
av_log(s->avctx, AV_LOG_ERROR, "run overflow at %dx%d i:%d\n", s->mb_x, s->mb_y, s->mb_intra);
|
av_log(s->avctx, AV_LOG_ERROR, "run overflow at %dx%d i:%d\n", s->mb_x, s->mb_y, s->mb_intra);
|
||||||
|
@ -464,21 +464,42 @@ static void avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_si
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void clear_blocks_mmx(DCTELEM *blocks)
|
#define CLEAR_BLOCKS(name,n) \
|
||||||
|
static void name(DCTELEM *blocks)\
|
||||||
|
{\
|
||||||
|
__asm__ volatile(\
|
||||||
|
"pxor %%mm7, %%mm7 \n\t"\
|
||||||
|
"mov %1, %%"REG_a" \n\t"\
|
||||||
|
"1: \n\t"\
|
||||||
|
"movq %%mm7, (%0, %%"REG_a") \n\t"\
|
||||||
|
"movq %%mm7, 8(%0, %%"REG_a") \n\t"\
|
||||||
|
"movq %%mm7, 16(%0, %%"REG_a") \n\t"\
|
||||||
|
"movq %%mm7, 24(%0, %%"REG_a") \n\t"\
|
||||||
|
"add $32, %%"REG_a" \n\t"\
|
||||||
|
" js 1b \n\t"\
|
||||||
|
: : "r" (((uint8_t *)blocks)+128*n),\
|
||||||
|
"i" (-128*n)\
|
||||||
|
: "%"REG_a\
|
||||||
|
);\
|
||||||
|
}
|
||||||
|
CLEAR_BLOCKS(clear_blocks_mmx, 6)
|
||||||
|
CLEAR_BLOCKS(clear_block_mmx, 1)
|
||||||
|
|
||||||
|
static void clear_block_sse(DCTELEM *block)
|
||||||
{
|
{
|
||||||
__asm__ volatile(
|
__asm__ volatile(
|
||||||
"pxor %%mm7, %%mm7 \n\t"
|
"xorps %%xmm0, %%xmm0 \n"
|
||||||
"mov $-128*6, %%"REG_a" \n\t"
|
"movaps %%xmm0, (%0) \n"
|
||||||
"1: \n\t"
|
"movaps %%xmm0, 16(%0) \n"
|
||||||
"movq %%mm7, (%0, %%"REG_a") \n\t"
|
"movaps %%xmm0, 32(%0) \n"
|
||||||
"movq %%mm7, 8(%0, %%"REG_a") \n\t"
|
"movaps %%xmm0, 48(%0) \n"
|
||||||
"movq %%mm7, 16(%0, %%"REG_a") \n\t"
|
"movaps %%xmm0, 64(%0) \n"
|
||||||
"movq %%mm7, 24(%0, %%"REG_a") \n\t"
|
"movaps %%xmm0, 80(%0) \n"
|
||||||
"add $32, %%"REG_a" \n\t"
|
"movaps %%xmm0, 96(%0) \n"
|
||||||
" js 1b \n\t"
|
"movaps %%xmm0, 112(%0) \n"
|
||||||
: : "r" (((uint8_t *)blocks)+128*6)
|
:: "r"(block)
|
||||||
: "%"REG_a
|
: "memory"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){
|
static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){
|
||||||
@ -2569,7 +2590,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
|
|||||||
c->put_pixels_clamped = put_pixels_clamped_mmx;
|
c->put_pixels_clamped = put_pixels_clamped_mmx;
|
||||||
c->put_signed_pixels_clamped = put_signed_pixels_clamped_mmx;
|
c->put_signed_pixels_clamped = put_signed_pixels_clamped_mmx;
|
||||||
c->add_pixels_clamped = add_pixels_clamped_mmx;
|
c->add_pixels_clamped = add_pixels_clamped_mmx;
|
||||||
|
c->clear_block = clear_block_mmx;
|
||||||
c->clear_blocks = clear_blocks_mmx;
|
c->clear_blocks = clear_blocks_mmx;
|
||||||
|
if (mm_flags & FF_MM_SSE)
|
||||||
|
c->clear_block = clear_block_sse;
|
||||||
|
|
||||||
#define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \
|
#define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \
|
||||||
c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## SIZE ## _ ## CPU; \
|
c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## SIZE ## _ ## CPU; \
|
||||||
|
@ -511,7 +511,7 @@ static int x8_decode_intra_mb(IntraX8Context* const w, const int chroma){
|
|||||||
int sign;
|
int sign;
|
||||||
|
|
||||||
assert(w->orient<12);
|
assert(w->orient<12);
|
||||||
memset(s->block[0],0x00,64*sizeof(DCTELEM));
|
s->dsp.clear_block(s->block[0]);
|
||||||
|
|
||||||
if(chroma){
|
if(chroma){
|
||||||
dc_mode=2;
|
dc_mode=2;
|
||||||
|
@ -163,7 +163,7 @@ static int vlc_decode_block(MimicContext *ctx, int num_coeffs, int qscale)
|
|||||||
DCTELEM *block = ctx->dct_block;
|
DCTELEM *block = ctx->dct_block;
|
||||||
unsigned int pos;
|
unsigned int pos;
|
||||||
|
|
||||||
memset(block, 0, 64 * sizeof(DCTELEM));
|
ctx->dsp.clear_block(block);
|
||||||
|
|
||||||
block[0] = get_bits(&ctx->gb, 8) << 3;
|
block[0] = get_bits(&ctx->gb, 8) << 3;
|
||||||
|
|
||||||
|
@ -444,7 +444,7 @@ static int decode_dc_progressive(MJpegDecodeContext *s, DCTELEM *block, int comp
|
|||||||
int dc_index, int16_t *quant_matrix, int Al)
|
int dc_index, int16_t *quant_matrix, int Al)
|
||||||
{
|
{
|
||||||
int val;
|
int val;
|
||||||
memset(block, 0, 64*sizeof(DCTELEM));
|
s->dsp.clear_block(block);
|
||||||
val = mjpeg_decode_dc(s, dc_index);
|
val = mjpeg_decode_dc(s, dc_index);
|
||||||
if (val == 0xffff) {
|
if (val == 0xffff) {
|
||||||
av_log(s->avctx, AV_LOG_ERROR, "error dc\n");
|
av_log(s->avctx, AV_LOG_ERROR, "error dc\n");
|
||||||
@ -800,7 +800,7 @@ static int mjpeg_decode_scan(MJpegDecodeContext *s, int nb_components, int Ah, i
|
|||||||
if(s->interlaced && s->bottom_field)
|
if(s->interlaced && s->bottom_field)
|
||||||
ptr += linesize[c] >> 1;
|
ptr += linesize[c] >> 1;
|
||||||
if(!s->progressive) {
|
if(!s->progressive) {
|
||||||
memset(s->block, 0, sizeof(s->block));
|
s->dsp.clear_block(s->block);
|
||||||
if(decode_block(s, s->block, i,
|
if(decode_block(s, s->block, i,
|
||||||
s->dc_index[i], s->ac_index[i],
|
s->dc_index[i], s->ac_index[i],
|
||||||
s->quant_matrixes[ s->quant_index[c] ]) < 0) {
|
s->quant_matrixes[ s->quant_index[c] ]) < 0) {
|
||||||
|
@ -1402,14 +1402,14 @@ static void render_slice(Vp3DecodeContext *s, int slice)
|
|||||||
/* dequantize the DCT coefficients */
|
/* dequantize the DCT coefficients */
|
||||||
if(s->avctx->idct_algo==FF_IDCT_VP3){
|
if(s->avctx->idct_algo==FF_IDCT_VP3){
|
||||||
Coeff *coeff= s->coeffs + i;
|
Coeff *coeff= s->coeffs + i;
|
||||||
memset(block, 0, sizeof(block));
|
s->dsp.clear_block(block);
|
||||||
while(coeff->next){
|
while(coeff->next){
|
||||||
block[coeff->index]= coeff->coeff * dequantizer[coeff->index];
|
block[coeff->index]= coeff->coeff * dequantizer[coeff->index];
|
||||||
coeff= coeff->next;
|
coeff= coeff->next;
|
||||||
}
|
}
|
||||||
}else{
|
}else{
|
||||||
Coeff *coeff= s->coeffs + i;
|
Coeff *coeff= s->coeffs + i;
|
||||||
memset(block, 0, sizeof(block));
|
s->dsp.clear_block(block);
|
||||||
while(coeff->next){
|
while(coeff->next){
|
||||||
block[coeff->index]= (coeff->coeff * dequantizer[coeff->index] + 2)>>2;
|
block[coeff->index]= (coeff->coeff * dequantizer[coeff->index] + 2)>>2;
|
||||||
coeff= coeff->next;
|
coeff= coeff->next;
|
||||||
|
@ -405,7 +405,7 @@ static void vp56_decode_mb(vp56_context_t *s, int row, int col, int is_alpha)
|
|||||||
mb_type = vp56_decode_mv(s, row, col);
|
mb_type = vp56_decode_mv(s, row, col);
|
||||||
ref_frame = vp56_reference_frame[mb_type];
|
ref_frame = vp56_reference_frame[mb_type];
|
||||||
|
|
||||||
memset(s->block_coeff, 0, sizeof(s->block_coeff));
|
s->dsp.clear_blocks(*s->block_coeff);
|
||||||
|
|
||||||
s->parse_coeff(s);
|
s->parse_coeff(s);
|
||||||
|
|
||||||
|
@ -43,12 +43,12 @@ static void wmv2_add_block(Wmv2Context *w, DCTELEM *block1, uint8_t *dst, int st
|
|||||||
case 1:
|
case 1:
|
||||||
ff_simple_idct84_add(dst , stride, block1);
|
ff_simple_idct84_add(dst , stride, block1);
|
||||||
ff_simple_idct84_add(dst + 4*stride, stride, w->abt_block2[n]);
|
ff_simple_idct84_add(dst + 4*stride, stride, w->abt_block2[n]);
|
||||||
memset(w->abt_block2[n], 0, 64*sizeof(DCTELEM));
|
s->dsp.clear_block(w->abt_block2[n]);
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
ff_simple_idct48_add(dst , stride, block1);
|
ff_simple_idct48_add(dst , stride, block1);
|
||||||
ff_simple_idct48_add(dst + 4 , stride, w->abt_block2[n]);
|
ff_simple_idct48_add(dst + 4 , stride, w->abt_block2[n]);
|
||||||
memset(w->abt_block2[n], 0, 64*sizeof(DCTELEM));
|
s->dsp.clear_block(w->abt_block2[n]);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
av_log(s->avctx, AV_LOG_ERROR, "internal error in WMV2 abt\n");
|
av_log(s->avctx, AV_LOG_ERROR, "internal error in WMV2 abt\n");
|
||||||
|
Loading…
Reference in New Issue
Block a user