VC1: transpose IDCT 8x8 coeffs while reading.

(cherry picked from commit 1da6ea3954)
This commit is contained in:
Ronald S. Bultje 2011-02-16 14:18:21 -05:00 committed by Michael Niedermayer
parent 003c32e72c
commit 2739dc5d85
4 changed files with 63 additions and 58 deletions

View File

@ -154,7 +154,6 @@ static void vc1_inv_trans_8x8_altivec(DCTELEM block[64])
src6 = vec_ld( 96, block);
src7 = vec_ld(112, block);
TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7);
s0 = vec_unpackl(src0);
s1 = vec_unpackl(src1);
s2 = vec_unpackl(src2);

View File

@ -215,6 +215,7 @@ typedef struct VC1Context{
int k_y; ///< Number of bits for MVs (depends on MV range)
int range_x, range_y; ///< MV range
uint8_t pq, altpq; ///< Current/alternate frame quantizer scale
uint8_t zz_8x8[4][64];///< Zigzag table for TT_8x8, permuted for IDCT
const uint8_t* zz_8x4;///< Zigzag scan table for TT_8x4 coding mode
const uint8_t* zz_4x8;///< Zigzag scan table for TT_4x8 coding mode
/** pquant parameters */

View File

@ -1499,11 +1499,11 @@ static int vc1_decode_i_block(VC1Context *v, DCTELEM block[64], int n, int coded
if(v->s.ac_pred) {
if(!dc_pred_dir)
zz_table = wmv1_scantable[2];
zz_table = v->zz_8x8[2];
else
zz_table = wmv1_scantable[3];
zz_table = v->zz_8x8[3];
} else
zz_table = wmv1_scantable[1];
zz_table = v->zz_8x8[1];
ac_val = s->ac_val[0][0] + s->block_index[n] * 16;
ac_val2 = ac_val;
@ -1524,16 +1524,16 @@ static int vc1_decode_i_block(VC1Context *v, DCTELEM block[64], int n, int coded
if(s->ac_pred) {
if(dc_pred_dir) { //left
for(k = 1; k < 8; k++)
block[k << 3] += ac_val[k];
block[k] += ac_val[k];
} else { //top
for(k = 1; k < 8; k++)
block[k] += ac_val[k + 8];
block[k << 3] += ac_val[k + 8];
}
}
/* save AC coeffs for further prediction */
for(k = 1; k < 8; k++) {
ac_val2[k] = block[k << 3];
ac_val2[k + 8] = block[k];
ac_val2[k] = block[k];
ac_val2[k + 8] = block[k << 3];
}
/* scale AC coeffs */
@ -1570,15 +1570,15 @@ not_coded:
if(s->ac_pred) {
if(dc_pred_dir) { //left
for(k = 1; k < 8; k++) {
block[k << 3] = ac_val[k] * scale;
if(!v->pquantizer && block[k << 3])
block[k << 3] += (block[k << 3] < 0) ? -v->pq : v->pq;
block[k] = ac_val[k] * scale;
if(!v->pquantizer && block[k])
block[k] += (block[k] < 0) ? -v->pq : v->pq;
}
} else { //top
for(k = 1; k < 8; k++) {
block[k] = ac_val[k + 8] * scale;
if(!v->pquantizer && block[k])
block[k] += (block[k] < 0) ? -v->pq : v->pq;
block[k << 3] = ac_val[k + 8] * scale;
if(!v->pquantizer && block[k << 3])
block[k << 3] += (block[k << 3] < 0) ? -v->pq : v->pq;
}
}
i = 63;
@ -1682,11 +1682,11 @@ static int vc1_decode_i_block_adv(VC1Context *v, DCTELEM block[64], int n, int c
if(v->s.ac_pred) {
if(!dc_pred_dir)
zz_table = wmv1_scantable[2];
zz_table = v->zz_8x8[2];
else
zz_table = wmv1_scantable[3];
zz_table = v->zz_8x8[3];
} else
zz_table = wmv1_scantable[1];
zz_table = v->zz_8x8[1];
while (!last) {
vc1_decode_ac_coeff(v, &last, &skip, &value, codingset);
@ -1705,25 +1705,25 @@ static int vc1_decode_i_block_adv(VC1Context *v, DCTELEM block[64], int n, int c
if(dc_pred_dir) { //left
for(k = 1; k < 8; k++)
block[k << 3] += (ac_val[k] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
block[k] += (ac_val[k] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
} else { //top
for(k = 1; k < 8; k++)
block[k] += (ac_val[k + 8] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
block[k << 3] += (ac_val[k + 8] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
}
} else {
if(dc_pred_dir) { //left
for(k = 1; k < 8; k++)
block[k << 3] += ac_val[k];
block[k] += ac_val[k];
} else { //top
for(k = 1; k < 8; k++)
block[k] += ac_val[k + 8];
block[k << 3] += ac_val[k + 8];
}
}
}
/* save AC coeffs for further prediction */
for(k = 1; k < 8; k++) {
ac_val2[k] = block[k << 3];
ac_val2[k + 8] = block[k];
ac_val2[k] = block[k];
ac_val2[k + 8] = block[k << 3];
}
/* scale AC coeffs */
@ -1765,15 +1765,15 @@ static int vc1_decode_i_block_adv(VC1Context *v, DCTELEM block[64], int n, int c
if(use_pred) {
if(dc_pred_dir) { //left
for(k = 1; k < 8; k++) {
block[k << 3] = ac_val2[k] * scale;
if(!v->pquantizer && block[k << 3])
block[k << 3] += (block[k << 3] < 0) ? -mquant : mquant;
block[k] = ac_val2[k] * scale;
if(!v->pquantizer && block[k])
block[k] += (block[k] < 0) ? -mquant : mquant;
}
} else { //top
for(k = 1; k < 8; k++) {
block[k] = ac_val2[k + 8] * scale;
if(!v->pquantizer && block[k])
block[k] += (block[k] < 0) ? -mquant : mquant;
block[k << 3] = ac_val2[k + 8] * scale;
if(!v->pquantizer && block[k << 3])
block[k << 3] += (block[k << 3] < 0) ? -mquant : mquant;
}
}
i = 63;
@ -1884,17 +1884,14 @@ static int vc1_decode_intra_block(VC1Context *v, DCTELEM block[64], int n, int c
if(coded) {
int last = 0, skip, value;
const uint8_t *zz_table;
int k;
zz_table = wmv1_scantable[0];
while (!last) {
vc1_decode_ac_coeff(v, &last, &skip, &value, codingset);
i += skip;
if(i > 63)
break;
block[zz_table[i++]] = value;
block[v->zz_8x8[0][i++]] = value;
}
/* apply AC prediction if needed */
@ -1906,25 +1903,25 @@ static int vc1_decode_intra_block(VC1Context *v, DCTELEM block[64], int n, int c
if(dc_pred_dir) { //left
for(k = 1; k < 8; k++)
block[k << 3] += (ac_val[k] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
block[k] += (ac_val[k] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
} else { //top
for(k = 1; k < 8; k++)
block[k] += (ac_val[k + 8] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
block[k << 3] += (ac_val[k + 8] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
}
} else {
if(dc_pred_dir) { //left
for(k = 1; k < 8; k++)
block[k << 3] += ac_val[k];
block[k] += ac_val[k];
} else { //top
for(k = 1; k < 8; k++)
block[k] += ac_val[k + 8];
block[k << 3] += ac_val[k + 8];
}
}
}
/* save AC coeffs for further prediction */
for(k = 1; k < 8; k++) {
ac_val2[k] = block[k << 3];
ac_val2[k + 8] = block[k];
ac_val2[k] = block[k];
ac_val2[k + 8] = block[k << 3];
}
/* scale AC coeffs */
@ -1966,15 +1963,15 @@ static int vc1_decode_intra_block(VC1Context *v, DCTELEM block[64], int n, int c
if(use_pred) {
if(dc_pred_dir) { //left
for(k = 1; k < 8; k++) {
block[k << 3] = ac_val2[k] * scale;
if(!v->pquantizer && block[k << 3])
block[k << 3] += (block[k << 3] < 0) ? -mquant : mquant;
block[k] = ac_val2[k] * scale;
if(!v->pquantizer && block[k])
block[k] += (block[k] < 0) ? -mquant : mquant;
}
} else { //top
for(k = 1; k < 8; k++) {
block[k] = ac_val2[k + 8] * scale;
if(!v->pquantizer && block[k])
block[k] += (block[k] < 0) ? -mquant : mquant;
block[k << 3] = ac_val2[k + 8] * scale;
if(!v->pquantizer && block[k << 3])
block[k << 3] += (block[k << 3] < 0) ? -mquant : mquant;
}
}
i = 63;
@ -2035,7 +2032,7 @@ static int vc1_decode_p_block(VC1Context *v, DCTELEM block[64], int n, int mquan
i += skip;
if(i > 63)
break;
idx = wmv1_scantable[0][i++];
idx = v->zz_8x8[0][i++];
block[idx] = value * scale;
if(!v->pquantizer)
block[idx] += (block[idx] < 0) ? -mquant : mquant;
@ -3007,6 +3004,7 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
VC1Context *v = avctx->priv_data;
MpegEncContext *s = &v->s;
GetBitContext gb;
int i;
if (!avctx->extradata_size || !avctx->extradata) return -1;
if (!(avctx->flags & CODEC_FLAG_GRAY))
@ -3025,6 +3023,13 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
if(ff_msmpeg4_decode_init(avctx) < 0)
return -1;
if (vc1_init_common(v) < 0) return -1;
for (i = 0; i < 64; i++) {
#define transpose(x) ((x>>3) | ((x&7)<<3))
v->zz_8x8[0][i] = transpose(wmv1_scantable[0][i]);
v->zz_8x8[1][i] = transpose(wmv1_scantable[1][i]);
v->zz_8x8[2][i] = transpose(wmv1_scantable[2][i]);
v->zz_8x8[3][i] = transpose(wmv1_scantable[3][i]);
}
avctx->coded_width = avctx->width;
avctx->coded_height = avctx->height;

View File

@ -203,25 +203,25 @@ static void vc1_inv_trans_8x8_c(DCTELEM block[64])
{
int i;
register int t1,t2,t3,t4,t5,t6,t7,t8;
DCTELEM *src, *dst;
DCTELEM *src, *dst, temp[64];
src = block;
dst = block;
dst = temp;
for(i = 0; i < 8; i++){
t1 = 12 * (src[0] + src[4]) + 4;
t2 = 12 * (src[0] - src[4]) + 4;
t3 = 16 * src[2] + 6 * src[6];
t4 = 6 * src[2] - 16 * src[6];
t1 = 12 * (src[ 0] + src[32]) + 4;
t2 = 12 * (src[ 0] - src[32]) + 4;
t3 = 16 * src[16] + 6 * src[48];
t4 = 6 * src[16] - 16 * src[48];
t5 = t1 + t3;
t6 = t2 + t4;
t7 = t2 - t4;
t8 = t1 - t3;
t1 = 16 * src[1] + 15 * src[3] + 9 * src[5] + 4 * src[7];
t2 = 15 * src[1] - 4 * src[3] - 16 * src[5] - 9 * src[7];
t3 = 9 * src[1] - 16 * src[3] + 4 * src[5] + 15 * src[7];
t4 = 4 * src[1] - 9 * src[3] + 15 * src[5] - 16 * src[7];
t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56];
t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56];
t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56];
t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56];
dst[0] = (t5 + t1) >> 3;
dst[1] = (t6 + t2) >> 3;
@ -232,11 +232,11 @@ static void vc1_inv_trans_8x8_c(DCTELEM block[64])
dst[6] = (t6 - t2) >> 3;
dst[7] = (t5 - t1) >> 3;
src += 8;
src += 1;
dst += 8;
}
src = block;
src = temp;
dst = block;
for(i = 0; i < 8; i++){
t1 = 12 * (src[ 0] + src[32]) + 64;