diff --git a/libavcodec/hevc_cabac.c b/libavcodec/hevc_cabac.c index 2647388dd3..3da2bf0617 100644 --- a/libavcodec/hevc_cabac.c +++ b/libavcodec/hevc_cabac.c @@ -1383,17 +1383,15 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, } } - if (lc->cu.cu_transquant_bypass_flag) { - s->hevcdsp.transquant_bypass[log2_trafo_size-2](dst, coeffs, stride); - } else { + if (!lc->cu.cu_transquant_bypass_flag) { if (transform_skip_flag) - s->hevcdsp.transform_skip(dst, coeffs, stride); + s->hevcdsp.transform_skip(coeffs, log2_trafo_size); else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 && log2_trafo_size == 2) - s->hevcdsp.transform_4x4_luma_add(dst, coeffs, stride); + s->hevcdsp.idct_4x4_luma(coeffs); else { int max_xy = FFMAX(last_significant_coeff_x, last_significant_coeff_y); if (max_xy == 0) - s->hevcdsp.transform_dc_add[log2_trafo_size-2](dst, coeffs, stride); + s->hevcdsp.idct_dc[log2_trafo_size-2](coeffs); else { int col_limit = last_significant_coeff_x + last_significant_coeff_y + 4; if (max_xy < 4) @@ -1402,10 +1400,11 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, col_limit = FFMIN(8, col_limit); else if (max_xy < 12) col_limit = FFMIN(24, col_limit); - s->hevcdsp.transform_add[log2_trafo_size-2](dst, coeffs, stride, col_limit); + s->hevcdsp.idct[log2_trafo_size-2](coeffs, col_limit); } } } + s->hevcdsp.transform_add[log2_trafo_size-2](dst, coeffs, stride); } void ff_hevc_hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size) diff --git a/libavcodec/hevcdsp.c b/libavcodec/hevcdsp.c index d89d6db3e7..eeea542691 100644 --- a/libavcodec/hevcdsp.c +++ b/libavcodec/hevcdsp.c @@ -191,21 +191,21 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth) #define HEVC_DSP(depth) \ hevcdsp->put_pcm = FUNC(put_pcm, depth); \ - hevcdsp->transquant_bypass[0] = FUNC(transquant_bypass4x4, depth); \ - hevcdsp->transquant_bypass[1] = FUNC(transquant_bypass8x8, depth); \ - hevcdsp->transquant_bypass[2] = FUNC(transquant_bypass16x16, depth); \ - hevcdsp->transquant_bypass[3] = FUNC(transquant_bypass32x32, depth); \ + hevcdsp->transform_add[0] = FUNC(transform_add4x4, depth); \ + hevcdsp->transform_add[1] = FUNC(transform_add8x8, depth); \ + hevcdsp->transform_add[2] = FUNC(transform_add16x16, depth); \ + hevcdsp->transform_add[3] = FUNC(transform_add32x32, depth); \ hevcdsp->transform_skip = FUNC(transform_skip, depth); \ - hevcdsp->transform_4x4_luma_add = FUNC(transform_4x4_luma_add, depth); \ - hevcdsp->transform_add[0] = FUNC(transform_4x4_add, depth); \ - hevcdsp->transform_add[1] = FUNC(transform_8x8_add, depth); \ - hevcdsp->transform_add[2] = FUNC(transform_16x16_add, depth); \ - hevcdsp->transform_add[3] = FUNC(transform_32x32_add, depth); \ + hevcdsp->idct_4x4_luma = FUNC(transform_4x4_luma, depth); \ + hevcdsp->idct[0] = FUNC(idct_4x4, depth); \ + hevcdsp->idct[1] = FUNC(idct_8x8, depth); \ + hevcdsp->idct[2] = FUNC(idct_16x16, depth); \ + hevcdsp->idct[3] = FUNC(idct_32x32, depth); \ \ - hevcdsp->transform_dc_add[0] = FUNC(transform_4x4_dc_add, depth); \ - hevcdsp->transform_dc_add[1] = FUNC(transform_8x8_dc_add, depth); \ - hevcdsp->transform_dc_add[2] = FUNC(transform_16x16_dc_add, depth); \ - hevcdsp->transform_dc_add[3] = FUNC(transform_32x32_dc_add, depth); \ + hevcdsp->idct_dc[0] = FUNC(idct_4x4_dc, depth); \ + hevcdsp->idct_dc[1] = FUNC(idct_8x8_dc, depth); \ + hevcdsp->idct_dc[2] = FUNC(idct_16x16_dc, depth); \ + hevcdsp->idct_dc[3] = FUNC(idct_32x32_dc, depth); \ \ hevcdsp->sao_band_filter = FUNC(sao_band_filter_0, depth); \ hevcdsp->sao_edge_filter[0] = FUNC(sao_edge_filter_0, depth); \ diff --git a/libavcodec/hevcdsp.h b/libavcodec/hevcdsp.h index 3a76fc4937..e17ee7b4ce 100644 --- a/libavcodec/hevcdsp.h +++ b/libavcodec/hevcdsp.h @@ -44,13 +44,15 @@ typedef struct HEVCDSPContext { void (*put_pcm)(uint8_t *dst, ptrdiff_t stride, int size, GetBitContext *gb, int pcm_bit_depth); - void (*transquant_bypass[4])(uint8_t *dst, int16_t *coeffs, - ptrdiff_t stride); + void (*transform_add[4])(uint8_t *_dst, int16_t *coeffs, ptrdiff_t _stride); - void (*transform_skip)(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); - void (*transform_4x4_luma_add)(uint8_t *dst, int16_t *coeffs, - ptrdiff_t stride); - void (*transform_add[4])(uint8_t *dst, int16_t *coeffs, ptrdiff_t _stride, int col_limit); + void (*transform_skip)(int16_t *coeffs, int16_t log2_size); + + void (*idct_4x4_luma)(int16_t *coeffs); + + void (*idct[4])(int16_t *coeffs, int col_limit); + + void (*idct_dc[4])(int16_t *coeffs); void (*transform_dc_add[4])(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); diff --git a/libavcodec/hevcdsp_template.c b/libavcodec/hevcdsp_template.c index ebfb9e818e..b08050b67a 100644 --- a/libavcodec/hevcdsp_template.c +++ b/libavcodec/hevcdsp_template.c @@ -42,7 +42,7 @@ static void FUNC(put_pcm)(uint8_t *_dst, ptrdiff_t stride, int size, } } -static void FUNC(transquant_bypass4x4)(uint8_t *_dst, int16_t *coeffs, +static void FUNC(transform_add4x4)(uint8_t *_dst, int16_t *coeffs, ptrdiff_t stride) { int x, y; @@ -59,7 +59,7 @@ static void FUNC(transquant_bypass4x4)(uint8_t *_dst, int16_t *coeffs, } } -static void FUNC(transquant_bypass8x8)(uint8_t *_dst, int16_t *coeffs, +static void FUNC(transform_add8x8)(uint8_t *_dst, int16_t *coeffs, ptrdiff_t stride) { int x, y; @@ -76,7 +76,7 @@ static void FUNC(transquant_bypass8x8)(uint8_t *_dst, int16_t *coeffs, } } -static void FUNC(transquant_bypass16x16)(uint8_t *_dst, int16_t *coeffs, +static void FUNC(transform_add16x16)(uint8_t *_dst, int16_t *coeffs, ptrdiff_t stride) { int x, y; @@ -93,7 +93,7 @@ static void FUNC(transquant_bypass16x16)(uint8_t *_dst, int16_t *coeffs, } } -static void FUNC(transquant_bypass32x32)(uint8_t *_dst, int16_t *coeffs, +static void FUNC(transform_add32x32)(uint8_t *_dst, int16_t *coeffs, ptrdiff_t stride) { int x, y; @@ -110,24 +110,29 @@ static void FUNC(transquant_bypass32x32)(uint8_t *_dst, int16_t *coeffs, } } -static void FUNC(transform_skip)(uint8_t *_dst, int16_t *coeffs, - ptrdiff_t stride) +static void FUNC(transform_skip)(int16_t *_coeffs, int16_t log2_size) { - pixel *dst = (pixel *)_dst; - int shift = 13 - BIT_DEPTH; -#if BIT_DEPTH <= 13 - int offset = 1 << (shift - 1); -#else - int offset = 0; -#endif + int shift = 15 - BIT_DEPTH - log2_size; int x, y; + int size = 1 << log2_size; + int16_t *coeffs = _coeffs; - stride /= sizeof(pixel); - for (y = 0; y < 4 * 4; y += 4) { - for (x = 0; x < 4; x++) - dst[x] = av_clip_pixel(dst[x] + ((coeffs[y + x] + offset) >> shift)); - dst += stride; + if (shift > 0) { + int offset = 1 << (shift - 1); + for (y = 0; y < size; y++) { + for (x = 0; x < size; x++) { + *coeffs = (*coeffs + offset) >> shift; + coeffs++; + } + } + } else { + for (y = 0; y < size; y++) { + for (x = 0; x < size; x++) { + *coeffs = *coeffs << -shift; + coeffs++; + } + } } } @@ -151,17 +156,13 @@ static void FUNC(transform_skip)(uint8_t *_dst, int16_t *coeffs, assign(dst[3 * step], 55 * c0 + 29 * c2 - c3); \ } while (0) -static void FUNC(transform_4x4_luma_add)(uint8_t *_dst, int16_t *coeffs, - ptrdiff_t stride) +static void FUNC(transform_4x4_luma)(int16_t *coeffs) { int i; - pixel *dst = (pixel *)_dst; int shift = 7; int add = 1 << (shift - 1); int16_t *src = coeffs; - stride /= sizeof(pixel); - for (i = 0; i < 4; i++) { TR_4x4_LUMA(src, src, 4, SCALE); src++; @@ -170,9 +171,8 @@ static void FUNC(transform_4x4_luma_add)(uint8_t *_dst, int16_t *coeffs, shift = 20 - BIT_DEPTH; add = 1 << (shift - 1); for (i = 0; i < 4; i++) { - TR_4x4_LUMA(dst, coeffs, 1, ADD_AND_SCALE); + TR_4x4_LUMA(coeffs, coeffs, 1, SCALE); coeffs += 4; - dst += stride; } } @@ -239,60 +239,71 @@ static void FUNC(transform_4x4_luma_add)(uint8_t *_dst, int16_t *coeffs, } \ } while (0) -#define TRANSFORM_ADD(H) \ -static void FUNC(transform_##H ##x ##H ##_add)( \ - uint8_t *_dst, int16_t *coeffs, ptrdiff_t _stride, int col_limit) { \ - int i; \ - pixel *dst = (pixel *)_dst; \ - int stride = _stride/sizeof(pixel); \ - int shift = 7; \ - int add = 1 << (shift - 1); \ - int16_t *src = coeffs; \ - int limit = FFMIN(col_limit + 4, H); \ - \ - for (i = 0; i < H; i++) { \ - TR_ ## H(src, src, H, H, SCALE, limit); \ - if (limit < H && i%4 == 0 && !!i) \ - limit -= 4; \ - src++; \ - } \ - limit = FFMIN(col_limit, H); \ - \ - shift = 20 - BIT_DEPTH; \ - add = 1 << (shift - 1); \ - for (i = 0; i < H; i++) { \ - TR_ ## H(dst, coeffs, 1, 1, ADD_AND_SCALE, limit); \ - coeffs += H; \ - dst += stride; \ - } \ +#define IDCT_VAR4(H) \ + int limit2 = FFMIN(col_limit + 4, H) +#define IDCT_VAR8(H) \ + int limit = FFMIN(col_limit, H); \ + int limit2 = FFMIN(col_limit + 4, H) +#define IDCT_VAR16(H) IDCT_VAR8(H) +#define IDCT_VAR32(H) IDCT_VAR8(H) + +#define IDCT(H) \ +static void FUNC(idct_##H ##x ##H )( \ + int16_t *coeffs, int col_limit) { \ + int i; \ + int shift = 7; \ + int add = 1 << (shift - 1); \ + int16_t *src = coeffs; \ + IDCT_VAR ##H(H); \ + \ + for (i = 0; i < H; i++) { \ + TR_ ## H(src, src, H, H, SCALE, limit2); \ + if (limit2 < H && i%4 == 0 && !!i) \ + limit2 -= 4; \ + src++; \ + } \ + \ + shift = 20 - BIT_DEPTH; \ + add = 1 << (shift - 1); \ + for (i = 0; i < H; i++) { \ + TR_ ## H(coeffs, coeffs, 1, 1, SCALE, limit); \ + coeffs += H; \ + } \ } -#define TRANSFORM_DC_ADD(H) \ -static void FUNC(transform_##H ##x ##H ##_dc_add)( \ - uint8_t *_dst, int16_t *coeffs, ptrdiff_t _stride) { \ - int i, j; \ - pixel *dst = (pixel *)_dst; \ - int stride = _stride/sizeof(pixel); \ - int shift = 14 - BIT_DEPTH; \ - int add = 1 << (shift - 1); \ - int coeff = (((coeffs[0] + 1) >> 1) + add) >> shift; \ - \ - for (j = 0; j < H; j++) { \ - for (i = 0; i < H; i++) { \ - dst[i+j*stride] = av_clip_pixel(dst[i+j*stride] + coeff); \ - } \ - } \ +#define IDCT_DC(H) \ +static void FUNC(idct_##H ##x ##H ##_dc)( \ + int16_t *coeffs) { \ + int i, j; \ + int shift = 14 - BIT_DEPTH; \ + int add = 1 << (shift - 1); \ + int coeff = (((coeffs[0] + 1) >> 1) + add) >> shift; \ + \ + for (j = 0; j < H; j++) { \ + for (i = 0; i < H; i++) { \ + coeffs[i+j*H] = coeff; \ + } \ + } \ } -TRANSFORM_ADD( 4) -TRANSFORM_ADD( 8) -TRANSFORM_ADD(16) -TRANSFORM_ADD(32) +IDCT( 4) +IDCT( 8) +IDCT(16) +IDCT(32) -TRANSFORM_DC_ADD( 4) -TRANSFORM_DC_ADD( 8) -TRANSFORM_DC_ADD(16) -TRANSFORM_DC_ADD(32) +IDCT_DC( 4) +IDCT_DC( 8) +IDCT_DC(16) +IDCT_DC(32) + +#undef TR_4 +#undef TR_8 +#undef TR_16 +#undef TR_32 + +#undef SET +#undef SCALE +#undef ADD_AND_SCALE static void FUNC(sao_band_filter_0)(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride, SAOParams *sao, @@ -505,15 +516,6 @@ static void FUNC(sao_edge_filter_1)(uint8_t *_dst, uint8_t *_src, #undef CMP -#undef SET -#undef SCALE -#undef ADD_AND_SCALE -#undef TR_4 -#undef TR_8 -#undef TR_16 -#undef TR_32 - - //////////////////////////////////////////////////////////////////////////////// // ////////////////////////////////////////////////////////////////////////////////