mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-10-25 11:15:34 +00:00
92f3d837b2
Update to upstream commit id e87fb2378f01103d5d6e477a4ef6892dc714e614 from a couple of weeks ago to pick up changes. MozReview-Commit-ID: H7H69A7qFXD --HG-- extra : rebase_source : dee676da15b65e4eea612d20529c4fb312bbddfb
536 lines
19 KiB
C
536 lines
19 KiB
C
/*
|
|
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
|
*
|
|
* This source code is subject to the terms of the BSD 2 Clause License and
|
|
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
|
* was not distributed with this source code in the LICENSE file, you can
|
|
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
|
* Media Patent License 1.0 was not distributed with this source code in the
|
|
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
|
*/
|
|
|
|
#include <math.h>
|
|
#include <string.h>
|
|
|
|
#include "./aom_scale_rtcd.h"
|
|
#include "aom/aom_integer.h"
|
|
#include "av1/common/cdef.h"
|
|
#include "av1/common/onyxc_int.h"
|
|
#include "av1/common/reconinter.h"
|
|
#include "av1/encoder/encoder.h"
|
|
|
|
#define REDUCED_PRI_STRENGTHS 8
|
|
#define REDUCED_TOTAL_STRENGTHS (REDUCED_PRI_STRENGTHS * CDEF_SEC_STRENGTHS)
|
|
#define TOTAL_STRENGTHS (CDEF_PRI_STRENGTHS * CDEF_SEC_STRENGTHS)
|
|
|
|
static int priconv[REDUCED_PRI_STRENGTHS] = { 0, 1, 2, 3, 4, 7, 12, 25 };
|
|
|
|
/* Search for the best strength to add as an option, knowing we
|
|
already selected nb_strengths options. */
|
|
static uint64_t search_one(int *lev, int nb_strengths,
|
|
uint64_t mse[][TOTAL_STRENGTHS], int sb_count,
|
|
int fast) {
|
|
uint64_t tot_mse[TOTAL_STRENGTHS];
|
|
const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
|
|
int i, j;
|
|
uint64_t best_tot_mse = (uint64_t)1 << 63;
|
|
int best_id = 0;
|
|
memset(tot_mse, 0, sizeof(tot_mse));
|
|
for (i = 0; i < sb_count; i++) {
|
|
int gi;
|
|
uint64_t best_mse = (uint64_t)1 << 63;
|
|
/* Find best mse among already selected options. */
|
|
for (gi = 0; gi < nb_strengths; gi++) {
|
|
if (mse[i][lev[gi]] < best_mse) {
|
|
best_mse = mse[i][lev[gi]];
|
|
}
|
|
}
|
|
/* Find best mse when adding each possible new option. */
|
|
for (j = 0; j < total_strengths; j++) {
|
|
uint64_t best = best_mse;
|
|
if (mse[i][j] < best) best = mse[i][j];
|
|
tot_mse[j] += best;
|
|
}
|
|
}
|
|
for (j = 0; j < total_strengths; j++) {
|
|
if (tot_mse[j] < best_tot_mse) {
|
|
best_tot_mse = tot_mse[j];
|
|
best_id = j;
|
|
}
|
|
}
|
|
lev[nb_strengths] = best_id;
|
|
return best_tot_mse;
|
|
}
|
|
|
|
/* Search for the best luma+chroma strength to add as an option, knowing we
|
|
already selected nb_strengths options. */
|
|
static uint64_t search_one_dual(int *lev0, int *lev1, int nb_strengths,
|
|
uint64_t (**mse)[TOTAL_STRENGTHS], int sb_count,
|
|
int fast) {
|
|
uint64_t tot_mse[TOTAL_STRENGTHS][TOTAL_STRENGTHS];
|
|
#if !CONFIG_CDEF_SINGLEPASS
|
|
const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
|
|
#endif
|
|
int i, j;
|
|
uint64_t best_tot_mse = (uint64_t)1 << 63;
|
|
int best_id0 = 0;
|
|
int best_id1 = 0;
|
|
#if CONFIG_CDEF_SINGLEPASS
|
|
const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
|
|
#endif
|
|
memset(tot_mse, 0, sizeof(tot_mse));
|
|
for (i = 0; i < sb_count; i++) {
|
|
int gi;
|
|
uint64_t best_mse = (uint64_t)1 << 63;
|
|
/* Find best mse among already selected options. */
|
|
for (gi = 0; gi < nb_strengths; gi++) {
|
|
uint64_t curr = mse[0][i][lev0[gi]];
|
|
curr += mse[1][i][lev1[gi]];
|
|
if (curr < best_mse) {
|
|
best_mse = curr;
|
|
}
|
|
}
|
|
/* Find best mse when adding each possible new option. */
|
|
for (j = 0; j < total_strengths; j++) {
|
|
int k;
|
|
for (k = 0; k < total_strengths; k++) {
|
|
uint64_t best = best_mse;
|
|
uint64_t curr = mse[0][i][j];
|
|
curr += mse[1][i][k];
|
|
if (curr < best) best = curr;
|
|
tot_mse[j][k] += best;
|
|
}
|
|
}
|
|
}
|
|
for (j = 0; j < total_strengths; j++) {
|
|
int k;
|
|
for (k = 0; k < total_strengths; k++) {
|
|
if (tot_mse[j][k] < best_tot_mse) {
|
|
best_tot_mse = tot_mse[j][k];
|
|
best_id0 = j;
|
|
best_id1 = k;
|
|
}
|
|
}
|
|
}
|
|
lev0[nb_strengths] = best_id0;
|
|
lev1[nb_strengths] = best_id1;
|
|
return best_tot_mse;
|
|
}
|
|
|
|
/* Search for the set of strengths that minimizes mse. */
|
|
static uint64_t joint_strength_search(int *best_lev, int nb_strengths,
|
|
uint64_t mse[][TOTAL_STRENGTHS],
|
|
int sb_count, int fast) {
|
|
uint64_t best_tot_mse;
|
|
int i;
|
|
best_tot_mse = (uint64_t)1 << 63;
|
|
/* Greedy search: add one strength options at a time. */
|
|
for (i = 0; i < nb_strengths; i++) {
|
|
best_tot_mse = search_one(best_lev, i, mse, sb_count, fast);
|
|
}
|
|
/* Trying to refine the greedy search by reconsidering each
|
|
already-selected option. */
|
|
if (!fast) {
|
|
for (i = 0; i < 4 * nb_strengths; i++) {
|
|
int j;
|
|
for (j = 0; j < nb_strengths - 1; j++) best_lev[j] = best_lev[j + 1];
|
|
best_tot_mse =
|
|
search_one(best_lev, nb_strengths - 1, mse, sb_count, fast);
|
|
}
|
|
}
|
|
return best_tot_mse;
|
|
}
|
|
|
|
/* Search for the set of luma+chroma strengths that minimizes mse. */
|
|
static uint64_t joint_strength_search_dual(int *best_lev0, int *best_lev1,
|
|
int nb_strengths,
|
|
uint64_t (**mse)[TOTAL_STRENGTHS],
|
|
int sb_count, int fast) {
|
|
uint64_t best_tot_mse;
|
|
int i;
|
|
best_tot_mse = (uint64_t)1 << 63;
|
|
/* Greedy search: add one strength options at a time. */
|
|
for (i = 0; i < nb_strengths; i++) {
|
|
best_tot_mse =
|
|
search_one_dual(best_lev0, best_lev1, i, mse, sb_count, fast);
|
|
}
|
|
/* Trying to refine the greedy search by reconsidering each
|
|
already-selected option. */
|
|
for (i = 0; i < 4 * nb_strengths; i++) {
|
|
int j;
|
|
for (j = 0; j < nb_strengths - 1; j++) {
|
|
best_lev0[j] = best_lev0[j + 1];
|
|
best_lev1[j] = best_lev1[j + 1];
|
|
}
|
|
best_tot_mse = search_one_dual(best_lev0, best_lev1, nb_strengths - 1, mse,
|
|
sb_count, fast);
|
|
}
|
|
return best_tot_mse;
|
|
}
|
|
|
|
/* FIXME: SSE-optimize this. */
|
|
static void copy_sb16_16(uint16_t *dst, int dstride, const uint16_t *src,
|
|
int src_voffset, int src_hoffset, int sstride,
|
|
int vsize, int hsize) {
|
|
int r, c;
|
|
const uint16_t *base = &src[src_voffset * sstride + src_hoffset];
|
|
for (r = 0; r < vsize; r++) {
|
|
for (c = 0; c < hsize; c++) {
|
|
dst[r * dstride + c] = base[r * sstride + c];
|
|
}
|
|
}
|
|
}
|
|
|
|
static INLINE uint64_t dist_8x8_16bit(uint16_t *dst, int dstride, uint16_t *src,
|
|
int sstride, int coeff_shift) {
|
|
uint64_t svar = 0;
|
|
uint64_t dvar = 0;
|
|
uint64_t sum_s = 0;
|
|
uint64_t sum_d = 0;
|
|
uint64_t sum_s2 = 0;
|
|
uint64_t sum_d2 = 0;
|
|
uint64_t sum_sd = 0;
|
|
int i, j;
|
|
for (i = 0; i < 8; i++) {
|
|
for (j = 0; j < 8; j++) {
|
|
sum_s += src[i * sstride + j];
|
|
sum_d += dst[i * dstride + j];
|
|
sum_s2 += src[i * sstride + j] * src[i * sstride + j];
|
|
sum_d2 += dst[i * dstride + j] * dst[i * dstride + j];
|
|
sum_sd += src[i * sstride + j] * dst[i * dstride + j];
|
|
}
|
|
}
|
|
/* Compute the variance -- the calculation cannot go negative. */
|
|
svar = sum_s2 - ((sum_s * sum_s + 32) >> 6);
|
|
dvar = sum_d2 - ((sum_d * sum_d + 32) >> 6);
|
|
return (uint64_t)floor(
|
|
.5 +
|
|
(sum_d2 + sum_s2 - 2 * sum_sd) * .5 *
|
|
(svar + dvar + (400 << 2 * coeff_shift)) /
|
|
(sqrt((20000 << 4 * coeff_shift) + svar * (double)dvar)));
|
|
}
|
|
|
|
static INLINE uint64_t mse_8x8_16bit(uint16_t *dst, int dstride, uint16_t *src,
|
|
int sstride) {
|
|
uint64_t sum = 0;
|
|
int i, j;
|
|
for (i = 0; i < 8; i++) {
|
|
for (j = 0; j < 8; j++) {
|
|
int e = dst[i * dstride + j] - src[i * sstride + j];
|
|
sum += e * e;
|
|
}
|
|
}
|
|
return sum;
|
|
}
|
|
|
|
static INLINE uint64_t mse_4x4_16bit(uint16_t *dst, int dstride, uint16_t *src,
|
|
int sstride) {
|
|
uint64_t sum = 0;
|
|
int i, j;
|
|
for (i = 0; i < 4; i++) {
|
|
for (j = 0; j < 4; j++) {
|
|
int e = dst[i * dstride + j] - src[i * sstride + j];
|
|
sum += e * e;
|
|
}
|
|
}
|
|
return sum;
|
|
}
|
|
|
|
/* Compute MSE only on the blocks we filtered. */
|
|
uint64_t compute_cdef_dist(uint16_t *dst, int dstride, uint16_t *src,
|
|
cdef_list *dlist, int cdef_count, BLOCK_SIZE bsize,
|
|
int coeff_shift, int pli) {
|
|
uint64_t sum = 0;
|
|
int bi, bx, by;
|
|
if (bsize == BLOCK_8X8) {
|
|
for (bi = 0; bi < cdef_count; bi++) {
|
|
by = dlist[bi].by;
|
|
bx = dlist[bi].bx;
|
|
if (pli == 0) {
|
|
sum += dist_8x8_16bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
|
|
&src[bi << (3 + 3)], 8, coeff_shift);
|
|
} else {
|
|
sum += mse_8x8_16bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
|
|
&src[bi << (3 + 3)], 8);
|
|
}
|
|
}
|
|
} else if (bsize == BLOCK_4X8) {
|
|
for (bi = 0; bi < cdef_count; bi++) {
|
|
by = dlist[bi].by;
|
|
bx = dlist[bi].bx;
|
|
sum += mse_4x4_16bit(&dst[(by << 3) * dstride + (bx << 2)], dstride,
|
|
&src[bi << (3 + 2)], 4);
|
|
sum += mse_4x4_16bit(&dst[((by << 3) + 4) * dstride + (bx << 2)], dstride,
|
|
&src[(bi << (3 + 2)) + 4 * 4], 4);
|
|
}
|
|
} else if (bsize == BLOCK_8X4) {
|
|
for (bi = 0; bi < cdef_count; bi++) {
|
|
by = dlist[bi].by;
|
|
bx = dlist[bi].bx;
|
|
sum += mse_4x4_16bit(&dst[(by << 2) * dstride + (bx << 3)], dstride,
|
|
&src[bi << (2 + 3)], 8);
|
|
sum += mse_4x4_16bit(&dst[(by << 2) * dstride + (bx << 3) + 4], dstride,
|
|
&src[(bi << (2 + 3)) + 4], 8);
|
|
}
|
|
} else {
|
|
assert(bsize == BLOCK_4X4);
|
|
for (bi = 0; bi < cdef_count; bi++) {
|
|
by = dlist[bi].by;
|
|
bx = dlist[bi].bx;
|
|
sum += mse_4x4_16bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
|
|
&src[bi << (2 + 2)], 4);
|
|
}
|
|
}
|
|
return sum >> 2 * coeff_shift;
|
|
}
|
|
|
|
void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
|
|
AV1_COMMON *cm, MACROBLOCKD *xd, int fast) {
|
|
int r, c;
|
|
int fbr, fbc;
|
|
uint16_t *src[3];
|
|
uint16_t *ref_coeff[3];
|
|
cdef_list dlist[MI_SIZE_64X64 * MI_SIZE_64X64];
|
|
int dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
|
|
int var[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
|
|
int stride[3];
|
|
int bsize[3];
|
|
int mi_wide_l2[3];
|
|
int mi_high_l2[3];
|
|
int xdec[3];
|
|
int ydec[3];
|
|
int pli;
|
|
int cdef_count;
|
|
int coeff_shift = AOMMAX(cm->bit_depth - 8, 0);
|
|
uint64_t best_tot_mse = (uint64_t)1 << 63;
|
|
uint64_t tot_mse;
|
|
int sb_count;
|
|
int nvfb = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
|
|
int nhfb = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
|
|
int *sb_index = aom_malloc(nvfb * nhfb * sizeof(*sb_index));
|
|
int *selected_strength = aom_malloc(nvfb * nhfb * sizeof(*sb_index));
|
|
uint64_t(*mse[2])[TOTAL_STRENGTHS];
|
|
#if CONFIG_CDEF_SINGLEPASS
|
|
int pri_damping = 3 + (cm->base_qindex >> 6);
|
|
#else
|
|
int pri_damping = 6;
|
|
#endif
|
|
int sec_damping = 3 + (cm->base_qindex >> 6);
|
|
int i;
|
|
int nb_strengths;
|
|
int nb_strength_bits;
|
|
int quantizer;
|
|
double lambda;
|
|
int nplanes = 3;
|
|
const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
|
|
DECLARE_ALIGNED(32, uint16_t, inbuf[CDEF_INBUF_SIZE]);
|
|
uint16_t *in;
|
|
DECLARE_ALIGNED(32, uint16_t, tmp_dst[CDEF_BLOCKSIZE * CDEF_BLOCKSIZE]);
|
|
int chroma_cdef = xd->plane[1].subsampling_x == xd->plane[1].subsampling_y &&
|
|
xd->plane[2].subsampling_x == xd->plane[2].subsampling_y;
|
|
quantizer =
|
|
av1_ac_quant(cm->base_qindex, 0, cm->bit_depth) >> (cm->bit_depth - 8);
|
|
lambda = .12 * quantizer * quantizer / 256.;
|
|
|
|
av1_setup_dst_planes(xd->plane, cm->sb_size, frame, 0, 0);
|
|
mse[0] = aom_malloc(sizeof(**mse) * nvfb * nhfb);
|
|
mse[1] = aom_malloc(sizeof(**mse) * nvfb * nhfb);
|
|
for (pli = 0; pli < nplanes; pli++) {
|
|
uint8_t *ref_buffer;
|
|
int ref_stride;
|
|
switch (pli) {
|
|
case 0:
|
|
ref_buffer = ref->y_buffer;
|
|
ref_stride = ref->y_stride;
|
|
break;
|
|
case 1:
|
|
ref_buffer = ref->u_buffer;
|
|
ref_stride = ref->uv_stride;
|
|
break;
|
|
case 2:
|
|
ref_buffer = ref->v_buffer;
|
|
ref_stride = ref->uv_stride;
|
|
break;
|
|
}
|
|
src[pli] = aom_memalign(
|
|
32, sizeof(*src) * cm->mi_rows * cm->mi_cols * MI_SIZE * MI_SIZE);
|
|
ref_coeff[pli] = aom_memalign(
|
|
32, sizeof(*ref_coeff) * cm->mi_rows * cm->mi_cols * MI_SIZE * MI_SIZE);
|
|
xdec[pli] = xd->plane[pli].subsampling_x;
|
|
ydec[pli] = xd->plane[pli].subsampling_y;
|
|
bsize[pli] = ydec[pli] ? (xdec[pli] ? BLOCK_4X4 : BLOCK_8X4)
|
|
: (xdec[pli] ? BLOCK_4X8 : BLOCK_8X8);
|
|
stride[pli] = cm->mi_cols << MI_SIZE_LOG2;
|
|
mi_wide_l2[pli] = MI_SIZE_LOG2 - xd->plane[pli].subsampling_x;
|
|
mi_high_l2[pli] = MI_SIZE_LOG2 - xd->plane[pli].subsampling_y;
|
|
|
|
const int frame_height =
|
|
(cm->mi_rows * MI_SIZE) >> xd->plane[pli].subsampling_y;
|
|
const int frame_width =
|
|
(cm->mi_cols * MI_SIZE) >> xd->plane[pli].subsampling_x;
|
|
|
|
for (r = 0; r < frame_height; ++r) {
|
|
for (c = 0; c < frame_width; ++c) {
|
|
#if CONFIG_HIGHBITDEPTH
|
|
if (cm->use_highbitdepth) {
|
|
src[pli][r * stride[pli] + c] = CONVERT_TO_SHORTPTR(
|
|
xd->plane[pli].dst.buf)[r * xd->plane[pli].dst.stride + c];
|
|
ref_coeff[pli][r * stride[pli] + c] =
|
|
CONVERT_TO_SHORTPTR(ref_buffer)[r * ref_stride + c];
|
|
} else {
|
|
#endif
|
|
src[pli][r * stride[pli] + c] =
|
|
xd->plane[pli].dst.buf[r * xd->plane[pli].dst.stride + c];
|
|
ref_coeff[pli][r * stride[pli] + c] = ref_buffer[r * ref_stride + c];
|
|
#if CONFIG_HIGHBITDEPTH
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
}
|
|
in = inbuf + CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER;
|
|
sb_count = 0;
|
|
for (fbr = 0; fbr < nvfb; ++fbr) {
|
|
for (fbc = 0; fbc < nhfb; ++fbc) {
|
|
int nvb, nhb;
|
|
int gi;
|
|
int dirinit = 0;
|
|
nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * fbc);
|
|
nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * fbr);
|
|
cm->mi_grid_visible[MI_SIZE_64X64 * fbr * cm->mi_stride +
|
|
MI_SIZE_64X64 * fbc]
|
|
->mbmi.cdef_strength = -1;
|
|
if (sb_all_skip(cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64)) continue;
|
|
cdef_count = sb_compute_cdef_list(cm, fbr * MI_SIZE_64X64,
|
|
fbc * MI_SIZE_64X64, dlist, 1);
|
|
for (pli = 0; pli < nplanes; pli++) {
|
|
for (i = 0; i < CDEF_INBUF_SIZE; i++) inbuf[i] = CDEF_VERY_LARGE;
|
|
for (gi = 0; gi < total_strengths; gi++) {
|
|
int threshold;
|
|
uint64_t curr_mse;
|
|
int sec_strength;
|
|
threshold = gi / CDEF_SEC_STRENGTHS;
|
|
if (fast) threshold = priconv[threshold];
|
|
if (pli > 0 && !chroma_cdef) threshold = 0;
|
|
/* We avoid filtering the pixels for which some of the pixels to
|
|
average
|
|
are outside the frame. We could change the filter instead, but it
|
|
would add special cases for any future vectorization. */
|
|
int yoff = CDEF_VBORDER * (fbr != 0);
|
|
int xoff = CDEF_HBORDER * (fbc != 0);
|
|
int ysize = (nvb << mi_high_l2[pli]) +
|
|
CDEF_VBORDER * (fbr != nvfb - 1) + yoff;
|
|
int xsize = (nhb << mi_wide_l2[pli]) +
|
|
CDEF_HBORDER * (fbc != nhfb - 1) + xoff;
|
|
sec_strength = gi % CDEF_SEC_STRENGTHS;
|
|
#if CONFIG_CDEF_SINGLEPASS
|
|
copy_sb16_16(&in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE,
|
|
src[pli],
|
|
(fbr * MI_SIZE_64X64 << mi_high_l2[pli]) - yoff,
|
|
(fbc * MI_SIZE_64X64 << mi_wide_l2[pli]) - xoff,
|
|
stride[pli], ysize, xsize);
|
|
cdef_filter_fb(NULL, tmp_dst, CDEF_BSTRIDE, in, xdec[pli], ydec[pli],
|
|
dir, &dirinit, var, pli, dlist, cdef_count, threshold,
|
|
sec_strength + (sec_strength == 3), pri_damping,
|
|
sec_damping, coeff_shift);
|
|
#else
|
|
if (sec_strength == 0)
|
|
copy_sb16_16(&in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE,
|
|
src[pli],
|
|
(fbr * MI_SIZE_64X64 << mi_high_l2[pli]) - yoff,
|
|
(fbc * MI_SIZE_64X64 << mi_wide_l2[pli]) - xoff,
|
|
stride[pli], ysize, xsize);
|
|
cdef_filter_fb(sec_strength ? NULL : (uint8_t *)in, CDEF_BSTRIDE,
|
|
tmp_dst, in, xdec[pli], ydec[pli], dir, &dirinit, var,
|
|
pli, dlist, cdef_count, threshold,
|
|
sec_strength + (sec_strength == 3), sec_damping,
|
|
pri_damping, coeff_shift, sec_strength != 0, 1);
|
|
#endif
|
|
curr_mse = compute_cdef_dist(
|
|
ref_coeff[pli] +
|
|
(fbr * MI_SIZE_64X64 << mi_high_l2[pli]) * stride[pli] +
|
|
(fbc * MI_SIZE_64X64 << mi_wide_l2[pli]),
|
|
stride[pli], tmp_dst, dlist, cdef_count, bsize[pli], coeff_shift,
|
|
pli);
|
|
if (pli < 2)
|
|
mse[pli][sb_count][gi] = curr_mse;
|
|
else
|
|
mse[1][sb_count][gi] += curr_mse;
|
|
sb_index[sb_count] =
|
|
MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc;
|
|
}
|
|
}
|
|
sb_count++;
|
|
}
|
|
}
|
|
nb_strength_bits = 0;
|
|
/* Search for different number of signalling bits. */
|
|
for (i = 0; i <= 3; i++) {
|
|
int j;
|
|
int best_lev0[CDEF_MAX_STRENGTHS];
|
|
int best_lev1[CDEF_MAX_STRENGTHS] = { 0 };
|
|
nb_strengths = 1 << i;
|
|
if (nplanes >= 3)
|
|
tot_mse = joint_strength_search_dual(best_lev0, best_lev1, nb_strengths,
|
|
mse, sb_count, fast);
|
|
else
|
|
tot_mse = joint_strength_search(best_lev0, nb_strengths, mse[0], sb_count,
|
|
fast);
|
|
/* Count superblock signalling cost. */
|
|
tot_mse += (uint64_t)(sb_count * lambda * i);
|
|
/* Count header signalling cost. */
|
|
tot_mse += (uint64_t)(nb_strengths * lambda * CDEF_STRENGTH_BITS);
|
|
if (tot_mse < best_tot_mse) {
|
|
best_tot_mse = tot_mse;
|
|
nb_strength_bits = i;
|
|
for (j = 0; j < 1 << nb_strength_bits; j++) {
|
|
cm->cdef_strengths[j] = best_lev0[j];
|
|
cm->cdef_uv_strengths[j] = best_lev1[j];
|
|
}
|
|
}
|
|
}
|
|
nb_strengths = 1 << nb_strength_bits;
|
|
|
|
cm->cdef_bits = nb_strength_bits;
|
|
cm->nb_cdef_strengths = nb_strengths;
|
|
for (i = 0; i < sb_count; i++) {
|
|
int gi;
|
|
int best_gi;
|
|
uint64_t best_mse = (uint64_t)1 << 63;
|
|
best_gi = 0;
|
|
for (gi = 0; gi < cm->nb_cdef_strengths; gi++) {
|
|
uint64_t curr = mse[0][i][cm->cdef_strengths[gi]];
|
|
if (nplanes >= 3) curr += mse[1][i][cm->cdef_uv_strengths[gi]];
|
|
if (curr < best_mse) {
|
|
best_gi = gi;
|
|
best_mse = curr;
|
|
}
|
|
}
|
|
selected_strength[i] = best_gi;
|
|
cm->mi_grid_visible[sb_index[i]]->mbmi.cdef_strength = best_gi;
|
|
}
|
|
|
|
if (fast) {
|
|
for (int j = 0; j < nb_strengths; j++) {
|
|
cm->cdef_strengths[j] =
|
|
priconv[cm->cdef_strengths[j] / CDEF_SEC_STRENGTHS] *
|
|
CDEF_SEC_STRENGTHS +
|
|
(cm->cdef_strengths[j] % CDEF_SEC_STRENGTHS);
|
|
cm->cdef_uv_strengths[j] =
|
|
priconv[cm->cdef_uv_strengths[j] / CDEF_SEC_STRENGTHS] *
|
|
CDEF_SEC_STRENGTHS +
|
|
(cm->cdef_uv_strengths[j] % CDEF_SEC_STRENGTHS);
|
|
}
|
|
}
|
|
cm->cdef_pri_damping = pri_damping;
|
|
cm->cdef_sec_damping = sec_damping;
|
|
aom_free(mse[0]);
|
|
aom_free(mse[1]);
|
|
for (pli = 0; pli < nplanes; pli++) {
|
|
aom_free(src[pli]);
|
|
aom_free(ref_coeff[pli]);
|
|
}
|
|
aom_free(sb_index);
|
|
aom_free(selected_strength);
|
|
}
|