third_party_ffmpeg/libavcodec/cabac_functions.h
Janne Grunau 634d9d8b39 arm: get_cabac inline asm
Based on the aarch64 asm. CPU cycle counts on cortex-a9 compared to
gcc 4.8.2:
before: 475 decicycles in get_cabac_noinline, 67106035 runs, 2829 skips
after:  393 decicycles in get_cabac_noinline, 67106474 runs, 2390 skips

Overall speedup is above 2%. Code generated by clang 3.4 is slower on
the same hardware and the relative change is a little larger.
2014-03-09 00:45:34 +01:00

191 lines
4.8 KiB
C

/*
* H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
* Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* Context Adaptive Binary Arithmetic Coder inline functions
*/
#ifndef AVCODEC_CABAC_FUNCTIONS_H
#define AVCODEC_CABAC_FUNCTIONS_H
#include <stdint.h>
#include "cabac.h"
#include "config.h"
#if ARCH_AARCH64
# include "aarch64/cabac.h"
#endif
#if ARCH_ARM
# include "arm/cabac.h"
#endif
#if ARCH_X86
# include "x86/cabac.h"
#endif
static uint8_t * const ff_h264_norm_shift = ff_h264_cabac_tables + H264_NORM_SHIFT_OFFSET;
static uint8_t * const ff_h264_lps_range = ff_h264_cabac_tables + H264_LPS_RANGE_OFFSET;
static uint8_t * const ff_h264_mlps_state = ff_h264_cabac_tables + H264_MLPS_STATE_OFFSET;
static uint8_t * const ff_h264_last_coeff_flag_offset_8x8 = ff_h264_cabac_tables + H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET;
static void refill(CABACContext *c){
#if CABAC_BITS == 16
c->low+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1);
#else
c->low+= c->bytestream[0]<<1;
#endif
c->low -= CABAC_MASK;
if (c->bytestream < c->bytestream_end)
c->bytestream += CABAC_BITS / 8;
}
static inline void renorm_cabac_decoder_once(CABACContext *c){
int shift= (uint32_t)(c->range - 0x100)>>31;
c->range<<= shift;
c->low <<= shift;
if(!(c->low & CABAC_MASK))
refill(c);
}
#ifndef get_cabac_inline
static void refill2(CABACContext *c){
int i, x;
x= c->low ^ (c->low-1);
i= 7 - ff_h264_norm_shift[x>>(CABAC_BITS-1)];
x= -CABAC_MASK;
#if CABAC_BITS == 16
x+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1);
#else
x+= c->bytestream[0]<<1;
#endif
c->low += x<<i;
if (c->bytestream < c->bytestream_end)
c->bytestream += CABAC_BITS/8;
}
static av_always_inline int get_cabac_inline(CABACContext *c, uint8_t * const state){
int s = *state;
int RangeLPS= ff_h264_lps_range[2*(c->range&0xC0) + s];
int bit, lps_mask;
c->range -= RangeLPS;
lps_mask= ((c->range<<(CABAC_BITS+1)) - c->low)>>31;
c->low -= (c->range<<(CABAC_BITS+1)) & lps_mask;
c->range += (RangeLPS - c->range) & lps_mask;
s^=lps_mask;
*state= (ff_h264_mlps_state+128)[s];
bit= s&1;
lps_mask= ff_h264_norm_shift[c->range];
c->range<<= lps_mask;
c->low <<= lps_mask;
if(!(c->low & CABAC_MASK))
refill2(c);
return bit;
}
#endif
static int av_noinline av_unused get_cabac_noinline(CABACContext *c, uint8_t * const state){
return get_cabac_inline(c,state);
}
static int av_unused get_cabac(CABACContext *c, uint8_t * const state){
return get_cabac_inline(c,state);
}
#ifndef get_cabac_bypass
static int av_unused get_cabac_bypass(CABACContext *c){
int range;
c->low += c->low;
if(!(c->low & CABAC_MASK))
refill(c);
range= c->range<<(CABAC_BITS+1);
if(c->low < range){
return 0;
}else{
c->low -= range;
return 1;
}
}
#endif
#ifndef get_cabac_bypass_sign
static av_always_inline int get_cabac_bypass_sign(CABACContext *c, int val){
int range, mask;
c->low += c->low;
if(!(c->low & CABAC_MASK))
refill(c);
range= c->range<<(CABAC_BITS+1);
c->low -= range;
mask= c->low >> 31;
range &= mask;
c->low += range;
return (val^mask)-mask;
}
#endif
/**
*
* @return the number of bytes read or 0 if no end
*/
static int av_unused get_cabac_terminate(CABACContext *c){
c->range -= 2;
if(c->low < c->range<<(CABAC_BITS+1)){
renorm_cabac_decoder_once(c);
return 0;
}else{
return c->bytestream - c->bytestream_start;
}
}
/**
* Skip @p n bytes and reset the decoder.
* @return the address of the first skipped byte or NULL if there's less than @p n bytes left
*/
static av_unused const uint8_t* skip_bytes(CABACContext *c, int n) {
const uint8_t *ptr = c->bytestream;
if (c->low & 0x1)
ptr--;
#if CABAC_BITS == 16
if (c->low & 0x1FF)
ptr--;
#endif
if ((int) (c->bytestream_end - ptr) < n)
return NULL;
ff_init_cabac_decoder(c, ptr + n, c->bytestream_end - ptr - n);
return ptr;
}
#endif /* AVCODEC_CABAC_FUNCTIONS_H */