5% faster get_cabac()

Originally committed as revision 6586 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
Michael Niedermayer 2006-10-08 11:24:37 +00:00
parent e16c407a81
commit 99ce10873d
3 changed files with 30 additions and 14 deletions

View File

@ -133,19 +133,19 @@ void ff_init_cabac_states(CABACContext *c, uint8_t const (*lps_range)[4],
for(i=0; i<state_count; i++){
for(j=0; j<4; j++){ //FIXME check if this is worth the 1 shift we save
c->lps_range[2*i+0][j+4]=
c->lps_range[2*i+1][j+4]= lps_range[i][j];
c->lps_range[2*i+2][j+4]=
c->lps_range[2*i+3][j+4]= lps_range[i][j];
}
c->mps_state[2*i+0]= 2*mps_state[i];
c->mps_state[2*i+1]= 2*mps_state[i]+1;
c->mps_state[2*i+2]= 2*mps_state[i]+2;
c->mps_state[2*i+3]= 2*mps_state[i]+3;
if( i ){
c->lps_state[2*i+0]= 2*lps_state[i];
c->lps_state[2*i+1]= 2*lps_state[i]+1;
c->lps_state[2*i+2]= 2*lps_state[i]+2;
c->lps_state[2*i+3]= 2*lps_state[i]+3;
}else{
c->lps_state[2*i+0]= 1;
c->lps_state[2*i+1]= 0;
c->lps_state[2*i+2]= 3;
c->lps_state[2*i+3]= 2;
}
}
}

View File

@ -39,9 +39,9 @@ typedef struct CABACContext{
#ifdef STRICT_LIMITS
int symCount;
#endif
uint8_t lps_range[2*65][4]; ///< rangeTabLPS
uint8_t lps_state[2*64]; ///< transIdxLPS
uint8_t mps_state[2*64]; ///< transIdxMPS
uint8_t lps_range[2*66][4]; ///< rangeTabLPS
uint8_t lps_state[2*65]; ///< transIdxLPS
uint8_t mps_state[2*65]; ///< transIdxMPS
const uint8_t *bytestream_start;
const uint8_t *bytestream;
const uint8_t *bytestream_end;
@ -376,7 +376,23 @@ static int get_cabac(CABACContext *c, uint8_t * const state){
#if 1
if(c->low < c->range){
bit= s&1;
*state= c->mps_state[s];
#ifdef ARCH_X86
//P3:627
asm(
"addb $2, %b0 \n\t"
" js 1f \n\t"
"movb %b0, %1 \n\t"
"1: \n\t"
: "+q"(s), "=m"(*state)
);
#else
*state= c->mps_state[s]; //P3:655
/* if(s<126) //P3:657
*state= s+2;*/
s+=2; //P3:631
if(s<128)
*state= s;
#endif
renorm_cabac_decoder_once(c);
}else{
// int shift= ff_h264_norm_shift[RangeLPS>>17];

View File

@ -7399,9 +7399,9 @@ static int decode_slice(H264Context *h){
pre = clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
if( pre <= 63 )
h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
h->cabac_state[i] = 2 * ( 63 - pre ) + 2;
else
h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
h->cabac_state[i] = 2 * ( pre - 64 ) + 3;
}
for(;;){