FFmpeg/libavcodec/jpeg2000dwt.c
Michael Niedermayer 0416b5e033 avcodec/jpeg2000dwt: Replace /2 by >>1
Divisions can be slow if the compiler fails to replace them by shifts

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
2015-06-23 18:59:00 +02:00

741 lines
20 KiB
C

/*
* Discrete wavelet transform
* Copyright (c) 2007 Kamil Nowosad
* Copyright (c) 2013 Nicolas Bertrand <nicoinattendu@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* Discrete wavelet transform
*/
#include "libavutil/avassert.h"
#include "libavutil/common.h"
#include "libavutil/mem.h"
#include "jpeg2000dwt.h"
#include "internal.h"
/* Defines for 9/7 DWT lifting parameters.
* Parameters are in float. */
#define F_LFTG_ALPHA 1.586134342059924f
#define F_LFTG_BETA 0.052980118572961f
#define F_LFTG_GAMMA 0.882911075530934f
#define F_LFTG_DELTA 0.443506852043971f
/* Lifting parameters in integer format.
* Computed as param = (float param) * (1 << 16) */
#define I_LFTG_ALPHA 103949ll
#define I_LFTG_BETA 3472ll
#define I_LFTG_GAMMA 57862ll
#define I_LFTG_DELTA 29066ll
#define I_LFTG_K 80621ll
#define I_LFTG_X 53274ll
#define I_PRESHIFT 8
static inline void extend53(int *p, int i0, int i1)
{
p[i0 - 1] = p[i0 + 1];
p[i1] = p[i1 - 2];
p[i0 - 2] = p[i0 + 2];
p[i1 + 1] = p[i1 - 3];
}
static inline void extend97_float(float *p, int i0, int i1)
{
int i;
for (i = 1; i <= 4; i++) {
p[i0 - i] = p[i0 + i];
p[i1 + i - 1] = p[i1 - i - 1];
}
}
static inline void extend97_int(int32_t *p, int i0, int i1)
{
int i;
for (i = 1; i <= 4; i++) {
p[i0 - i] = p[i0 + i];
p[i1 + i - 1] = p[i1 - i - 1];
}
}
static void sd_1d53(int *p, int i0, int i1)
{
int i;
if (i1 <= i0 + 1) {
if (i0 == 1)
p[1] <<= 1;
return;
}
extend53(p, i0, i1);
for (i = ((i0+1)>>1) - 1; i < (i1+1)>>1; i++)
p[2*i+1] -= (p[2*i] + p[2*i+2]) >> 1;
for (i = ((i0+1)>>1); i < (i1+1)>>1; i++)
p[2*i] += (p[2*i-1] + p[2*i+1] + 2) >> 2;
}
static void dwt_encode53(DWTContext *s, int *t)
{
int lev,
w = s->linelen[s->ndeclevels-1][0];
int *line = s->i_linebuf;
line += 3;
for (lev = s->ndeclevels-1; lev >= 0; lev--){
int lh = s->linelen[lev][0],
lv = s->linelen[lev][1],
mh = s->mod[lev][0],
mv = s->mod[lev][1],
lp;
int *l;
// VER_SD
l = line + mv;
for (lp = 0; lp < lh; lp++) {
int i, j = 0;
for (i = 0; i < lv; i++)
l[i] = t[w*i + lp];
sd_1d53(line, mv, mv + lv);
// copy back and deinterleave
for (i = mv; i < lv; i+=2, j++)
t[w*j + lp] = l[i];
for (i = 1-mv; i < lv; i+=2, j++)
t[w*j + lp] = l[i];
}
// HOR_SD
l = line + mh;
for (lp = 0; lp < lv; lp++){
int i, j = 0;
for (i = 0; i < lh; i++)
l[i] = t[w*lp + i];
sd_1d53(line, mh, mh + lh);
// copy back and deinterleave
for (i = mh; i < lh; i+=2, j++)
t[w*lp + j] = l[i];
for (i = 1-mh; i < lh; i+=2, j++)
t[w*lp + j] = l[i];
}
}
}
static void sd_1d97_float(float *p, int i0, int i1)
{
int i;
if (i1 <= i0 + 1) {
if (i0 == 1)
p[1] *= F_LFTG_X * 2;
else
p[0] *= F_LFTG_K;
return;
}
extend97_float(p, i0, i1);
i0++; i1++;
for (i = (i0>>1) - 2; i < (i1>>1) + 1; i++)
p[2*i+1] -= 1.586134 * (p[2*i] + p[2*i+2]);
for (i = (i0>>1) - 1; i < (i1>>1) + 1; i++)
p[2*i] -= 0.052980 * (p[2*i-1] + p[2*i+1]);
for (i = (i0>>1) - 1; i < (i1>>1); i++)
p[2*i+1] += 0.882911 * (p[2*i] + p[2*i+2]);
for (i = (i0>>1); i < (i1>>1); i++)
p[2*i] += 0.443506 * (p[2*i-1] + p[2*i+1]);
}
static void dwt_encode97_float(DWTContext *s, float *t)
{
int lev,
w = s->linelen[s->ndeclevels-1][0];
float *line = s->f_linebuf;
line += 5;
for (lev = s->ndeclevels-1; lev >= 0; lev--){
int lh = s->linelen[lev][0],
lv = s->linelen[lev][1],
mh = s->mod[lev][0],
mv = s->mod[lev][1],
lp;
float *l;
// HOR_SD
l = line + mh;
for (lp = 0; lp < lv; lp++){
int i, j = 0;
for (i = 0; i < lh; i++)
l[i] = t[w*lp + i];
sd_1d97_float(line, mh, mh + lh);
// copy back and deinterleave
for (i = mh; i < lh; i+=2, j++)
t[w*lp + j] = l[i];
for (i = 1-mh; i < lh; i+=2, j++)
t[w*lp + j] = l[i];
}
// VER_SD
l = line + mv;
for (lp = 0; lp < lh; lp++) {
int i, j = 0;
for (i = 0; i < lv; i++)
l[i] = t[w*i + lp];
sd_1d97_float(line, mv, mv + lv);
// copy back and deinterleave
for (i = mv; i < lv; i+=2, j++)
t[w*j + lp] = l[i];
for (i = 1-mv; i < lv; i+=2, j++)
t[w*j + lp] = l[i];
}
}
}
static void sd_1d97_int(int *p, int i0, int i1)
{
int i;
if (i1 <= i0 + 1) {
if (i0 == 1)
p[1] = (p[1] * I_LFTG_X + (1<<14)) >> 15;
else
p[0] = (p[0] * I_LFTG_K + (1<<15)) >> 16;
return;
}
extend97_int(p, i0, i1);
i0++; i1++;
for (i = (i0>>1) - 2; i < (i1>>1) + 1; i++)
p[2 * i + 1] -= (I_LFTG_ALPHA * (p[2 * i] + p[2 * i + 2]) + (1 << 15)) >> 16;
for (i = (i0>>1) - 1; i < (i1>>1) + 1; i++)
p[2 * i] -= (I_LFTG_BETA * (p[2 * i - 1] + p[2 * i + 1]) + (1 << 15)) >> 16;
for (i = (i0>>1) - 1; i < (i1>>1); i++)
p[2 * i + 1] += (I_LFTG_GAMMA * (p[2 * i] + p[2 * i + 2]) + (1 << 15)) >> 16;
for (i = (i0>>1); i < (i1>>1); i++)
p[2 * i] += (I_LFTG_DELTA * (p[2 * i - 1] + p[2 * i + 1]) + (1 << 15)) >> 16;
}
static void dwt_encode97_int(DWTContext *s, int *t)
{
int lev;
int w = s->linelen[s->ndeclevels-1][0];
int h = s->linelen[s->ndeclevels-1][1];
int i;
int *line = s->i_linebuf;
line += 5;
for (i = 0; i < w * h; i++)
t[i] <<= I_PRESHIFT;
for (lev = s->ndeclevels-1; lev >= 0; lev--){
int lh = s->linelen[lev][0],
lv = s->linelen[lev][1],
mh = s->mod[lev][0],
mv = s->mod[lev][1],
lp;
int *l;
// VER_SD
l = line + mv;
for (lp = 0; lp < lh; lp++) {
int i, j = 0;
for (i = 0; i < lv; i++)
l[i] = t[w*i + lp];
sd_1d97_int(line, mv, mv + lv);
// copy back and deinterleave
for (i = mv; i < lv; i+=2, j++)
t[w*j + lp] = ((l[i] * I_LFTG_X) + (1 << 15)) >> 16;
for (i = 1-mv; i < lv; i+=2, j++)
t[w*j + lp] = l[i];
}
// HOR_SD
l = line + mh;
for (lp = 0; lp < lv; lp++){
int i, j = 0;
for (i = 0; i < lh; i++)
l[i] = t[w*lp + i];
sd_1d97_int(line, mh, mh + lh);
// copy back and deinterleave
for (i = mh; i < lh; i+=2, j++)
t[w*lp + j] = ((l[i] * I_LFTG_X) + (1 << 15)) >> 16;
for (i = 1-mh; i < lh; i+=2, j++)
t[w*lp + j] = l[i];
}
}
for (i = 0; i < w * h; i++)
t[i] = (t[i] + ((1<<I_PRESHIFT)>>1)) >> I_PRESHIFT;
}
static void sr_1d53(int *p, int i0, int i1)
{
int i;
if (i1 <= i0 + 1) {
if (i0 == 1)
p[1] >>= 1;
return;
}
extend53(p, i0, i1);
for (i = (i0 >> 1); i < (i1 >> 1) + 1; i++)
p[2 * i] -= (p[2 * i - 1] + p[2 * i + 1] + 2) >> 2;
for (i = (i0 >> 1); i < (i1 >> 1); i++)
p[2 * i + 1] += (p[2 * i] + p[2 * i + 2]) >> 1;
}
static void dwt_decode53(DWTContext *s, int *t)
{
int lev;
int w = s->linelen[s->ndeclevels - 1][0];
int32_t *line = s->i_linebuf;
line += 3;
for (lev = 0; lev < s->ndeclevels; lev++) {
int lh = s->linelen[lev][0],
lv = s->linelen[lev][1],
mh = s->mod[lev][0],
mv = s->mod[lev][1],
lp;
int *l;
// HOR_SD
l = line + mh;
for (lp = 0; lp < lv; lp++) {
int i, j = 0;
// copy with interleaving
for (i = mh; i < lh; i += 2, j++)
l[i] = t[w * lp + j];
for (i = 1 - mh; i < lh; i += 2, j++)
l[i] = t[w * lp + j];
sr_1d53(line, mh, mh + lh);
for (i = 0; i < lh; i++)
t[w * lp + i] = l[i];
}
// VER_SD
l = line + mv;
for (lp = 0; lp < lh; lp++) {
int i, j = 0;
// copy with interleaving
for (i = mv; i < lv; i += 2, j++)
l[i] = t[w * j + lp];
for (i = 1 - mv; i < lv; i += 2, j++)
l[i] = t[w * j + lp];
sr_1d53(line, mv, mv + lv);
for (i = 0; i < lv; i++)
t[w * i + lp] = l[i];
}
}
}
static void sr_1d97_float(float *p, int i0, int i1)
{
int i;
if (i1 <= i0 + 1) {
if (i0 == 1)
p[1] *= F_LFTG_K/2;
else
p[0] *= F_LFTG_X;
return;
}
extend97_float(p, i0, i1);
for (i = (i0 >> 1) - 1; i < (i1 >> 1) + 2; i++)
p[2 * i] -= F_LFTG_DELTA * (p[2 * i - 1] + p[2 * i + 1]);
/* step 4 */
for (i = (i0 >> 1) - 1; i < (i1 >> 1) + 1; i++)
p[2 * i + 1] -= F_LFTG_GAMMA * (p[2 * i] + p[2 * i + 2]);
/*step 5*/
for (i = (i0 >> 1); i < (i1 >> 1) + 1; i++)
p[2 * i] += F_LFTG_BETA * (p[2 * i - 1] + p[2 * i + 1]);
/* step 6 */
for (i = (i0 >> 1); i < (i1 >> 1); i++)
p[2 * i + 1] += F_LFTG_ALPHA * (p[2 * i] + p[2 * i + 2]);
}
static void dwt_decode97_float(DWTContext *s, float *t)
{
int lev;
int w = s->linelen[s->ndeclevels - 1][0];
float *line = s->f_linebuf;
float *data = t;
/* position at index O of line range [0-5,w+5] cf. extend function */
line += 5;
for (lev = 0; lev < s->ndeclevels; lev++) {
int lh = s->linelen[lev][0],
lv = s->linelen[lev][1],
mh = s->mod[lev][0],
mv = s->mod[lev][1],
lp;
float *l;
// HOR_SD
l = line + mh;
for (lp = 0; lp < lv; lp++) {
int i, j = 0;
// copy with interleaving
for (i = mh; i < lh; i += 2, j++)
l[i] = data[w * lp + j];
for (i = 1 - mh; i < lh; i += 2, j++)
l[i] = data[w * lp + j];
sr_1d97_float(line, mh, mh + lh);
for (i = 0; i < lh; i++)
data[w * lp + i] = l[i];
}
// VER_SD
l = line + mv;
for (lp = 0; lp < lh; lp++) {
int i, j = 0;
// copy with interleaving
for (i = mv; i < lv; i += 2, j++)
l[i] = data[w * j + lp];
for (i = 1 - mv; i < lv; i += 2, j++)
l[i] = data[w * j + lp];
sr_1d97_float(line, mv, mv + lv);
for (i = 0; i < lv; i++)
data[w * i + lp] = l[i];
}
}
}
static void sr_1d97_int(int32_t *p, int i0, int i1)
{
int i;
if (i1 <= i0 + 1) {
if (i0 == 1)
p[1] = (p[1] * I_LFTG_K + (1<<16)) >> 17;
else
p[0] = (p[0] * I_LFTG_X + (1<<15)) >> 16;
return;
}
extend97_int(p, i0, i1);
for (i = (i0 >> 1) - 1; i < (i1 >> 1) + 2; i++)
p[2 * i] -= (I_LFTG_DELTA * (p[2 * i - 1] + p[2 * i + 1]) + (1 << 15)) >> 16;
/* step 4 */
for (i = (i0 >> 1) - 1; i < (i1 >> 1) + 1; i++)
p[2 * i + 1] -= (I_LFTG_GAMMA * (p[2 * i] + p[2 * i + 2]) + (1 << 15)) >> 16;
/*step 5*/
for (i = (i0 >> 1); i < (i1 >> 1) + 1; i++)
p[2 * i] += (I_LFTG_BETA * (p[2 * i - 1] + p[2 * i + 1]) + (1 << 15)) >> 16;
/* step 6 */
for (i = (i0 >> 1); i < (i1 >> 1); i++)
p[2 * i + 1] += (I_LFTG_ALPHA * (p[2 * i] + p[2 * i + 2]) + (1 << 15)) >> 16;
}
static void dwt_decode97_int(DWTContext *s, int32_t *t)
{
int lev;
int w = s->linelen[s->ndeclevels - 1][0];
int h = s->linelen[s->ndeclevels - 1][1];
int i;
int32_t *line = s->i_linebuf;
int32_t *data = t;
/* position at index O of line range [0-5,w+5] cf. extend function */
line += 5;
for (i = 0; i < w * h; i++)
data[i] <<= I_PRESHIFT;
for (lev = 0; lev < s->ndeclevels; lev++) {
int lh = s->linelen[lev][0],
lv = s->linelen[lev][1],
mh = s->mod[lev][0],
mv = s->mod[lev][1],
lp;
int32_t *l;
// HOR_SD
l = line + mh;
for (lp = 0; lp < lv; lp++) {
int i, j = 0;
// rescale with interleaving
for (i = mh; i < lh; i += 2, j++)
l[i] = ((data[w * lp + j] * I_LFTG_K) + (1 << 15)) >> 16;
for (i = 1 - mh; i < lh; i += 2, j++)
l[i] = data[w * lp + j];
sr_1d97_int(line, mh, mh + lh);
for (i = 0; i < lh; i++)
data[w * lp + i] = l[i];
}
// VER_SD
l = line + mv;
for (lp = 0; lp < lh; lp++) {
int i, j = 0;
// rescale with interleaving
for (i = mv; i < lv; i += 2, j++)
l[i] = ((data[w * j + lp] * I_LFTG_K) + (1 << 15)) >> 16;
for (i = 1 - mv; i < lv; i += 2, j++)
l[i] = data[w * j + lp];
sr_1d97_int(line, mv, mv + lv);
for (i = 0; i < lv; i++)
data[w * i + lp] = l[i];
}
}
for (i = 0; i < w * h; i++)
data[i] = (data[i] + ((1<<I_PRESHIFT)>>1)) >> I_PRESHIFT;
}
int ff_jpeg2000_dwt_init(DWTContext *s, uint16_t border[2][2],
int decomp_levels, int type)
{
int i, j, lev = decomp_levels, maxlen,
b[2][2];
s->ndeclevels = decomp_levels;
s->type = type;
for (i = 0; i < 2; i++)
for (j = 0; j < 2; j++)
b[i][j] = border[i][j];
maxlen = FFMAX(b[0][1] - b[0][0],
b[1][1] - b[1][0]);
while (--lev >= 0)
for (i = 0; i < 2; i++) {
s->linelen[lev][i] = b[i][1] - b[i][0];
s->mod[lev][i] = b[i][0] & 1;
for (j = 0; j < 2; j++)
b[i][j] = (b[i][j] + 1) >> 1;
}
switch (type) {
case FF_DWT97:
s->f_linebuf = av_malloc_array((maxlen + 12), sizeof(*s->f_linebuf));
if (!s->f_linebuf)
return AVERROR(ENOMEM);
break;
case FF_DWT97_INT:
s->i_linebuf = av_malloc_array((maxlen + 12), sizeof(*s->i_linebuf));
if (!s->i_linebuf)
return AVERROR(ENOMEM);
break;
case FF_DWT53:
s->i_linebuf = av_malloc_array((maxlen + 6), sizeof(*s->i_linebuf));
if (!s->i_linebuf)
return AVERROR(ENOMEM);
break;
default:
return -1;
}
return 0;
}
int ff_dwt_encode(DWTContext *s, void *t)
{
switch(s->type){
case FF_DWT97:
dwt_encode97_float(s, t); break;
case FF_DWT97_INT:
dwt_encode97_int(s, t); break;
case FF_DWT53:
dwt_encode53(s, t); break;
default:
return -1;
}
return 0;
}
int ff_dwt_decode(DWTContext *s, void *t)
{
switch (s->type) {
case FF_DWT97:
dwt_decode97_float(s, t);
break;
case FF_DWT97_INT:
dwt_decode97_int(s, t);
break;
case FF_DWT53:
dwt_decode53(s, t);
break;
default:
return -1;
}
return 0;
}
void ff_dwt_destroy(DWTContext *s)
{
av_freep(&s->f_linebuf);
av_freep(&s->i_linebuf);
}
#ifdef TEST
#include "libavutil/lfg.h"
#define MAX_W 256
static int test_dwt(int *array, int *ref, uint16_t border[2][2], int decomp_levels, int type, int max_diff) {
int ret, j;
DWTContext s1={{{0}}}, *s= &s1;
int64_t err2 = 0;
ret = ff_jpeg2000_dwt_init(s, border, decomp_levels, type);
if (ret < 0) {
fprintf(stderr, "ff_jpeg2000_dwt_init failed\n");
return 1;
}
ret = ff_dwt_encode(s, array);
if (ret < 0) {
fprintf(stderr, "ff_dwt_encode failed\n");
return 1;
}
ret = ff_dwt_decode(s, array);
if (ret < 0) {
fprintf(stderr, "ff_dwt_encode failed\n");
return 1;
}
for (j = 0; j<MAX_W * MAX_W; j++) {
if (FFABS(array[j] - ref[j]) > max_diff) {
fprintf(stderr, "missmatch at %d (%d != %d) decomp:%d border %d %d %d %d\n",
j, array[j], ref[j],decomp_levels, border[0][0], border[0][1], border[1][0], border[1][1]);
return 2;
}
err2 += (array[j] - ref[j]) * (array[j] - ref[j]);
array[j] = ref[j];
}
ff_dwt_destroy(s);
printf("%s, decomp:%2d border %3d %3d %3d %3d milli-err2:%9"PRId64"\n",
type == FF_DWT53 ? "5/3i" : "9/7i",
decomp_levels, border[0][0], border[0][1], border[1][0], border[1][1],
1000*err2 / ((border[0][1] - border[0][0])*(border[1][1] - border[1][0])));
return 0;
}
static int test_dwtf(float *array, float *ref, uint16_t border[2][2], int decomp_levels, float max_diff) {
int ret, j;
DWTContext s1={{{0}}}, *s= &s1;
double err2 = 0;
ret = ff_jpeg2000_dwt_init(s, border, decomp_levels, FF_DWT97);
if (ret < 0) {
fprintf(stderr, "ff_jpeg2000_dwt_init failed\n");
return 1;
}
ret = ff_dwt_encode(s, array);
if (ret < 0) {
fprintf(stderr, "ff_dwt_encode failed\n");
return 1;
}
ret = ff_dwt_decode(s, array);
if (ret < 0) {
fprintf(stderr, "ff_dwt_encode failed\n");
return 1;
}
for (j = 0; j<MAX_W * MAX_W; j++) {
if (FFABS(array[j] - ref[j]) > max_diff) {
fprintf(stderr, "missmatch at %d (%f != %f) decomp:%d border %d %d %d %d\n",
j, array[j], ref[j],decomp_levels, border[0][0], border[0][1], border[1][0], border[1][1]);
return 2;
}
err2 += (array[j] - ref[j]) * (array[j] - ref[j]);
array[j] = ref[j];
}
ff_dwt_destroy(s);
printf("9/7f, decomp:%2d border %3d %3d %3d %3d err2:%20.3f\n",
decomp_levels, border[0][0], border[0][1], border[1][0], border[1][1],
err2 / ((border[0][1] - border[0][0])*(border[1][1] - border[1][0])));
return 0;
}
static int array[MAX_W * MAX_W];
static int ref [MAX_W * MAX_W];
static float arrayf[MAX_W * MAX_W];
static float reff [MAX_W * MAX_W];
int main(void) {
AVLFG prng;
int i,j;
uint16_t border[2][2];
int ret, decomp_levels;
av_lfg_init(&prng, 1);
for (i = 0; i<MAX_W * MAX_W; i++)
arrayf[i] = reff[i] = array[i] = ref[i] = av_lfg_get(&prng) % 2048;
for (i = 0; i < 100; i++) {
for (j=0; j<4; j++)
border[j>>1][j&1] = av_lfg_get(&prng) % MAX_W;
if (border[0][0] >= border[0][1] || border[1][0] >= border[1][1])
continue;
decomp_levels = av_lfg_get(&prng) % FF_DWT_MAX_DECLVLS;
ret = test_dwt(array, ref, border, decomp_levels, FF_DWT53, 0);
if (ret)
return ret;
ret = test_dwt(array, ref, border, decomp_levels, FF_DWT97_INT, FFMIN(7+5*decomp_levels, 15+3*decomp_levels));
if (ret)
return ret;
ret = test_dwtf(arrayf, reff, border, decomp_levels, 0.05);
if (ret)
return ret;
}
return 0;
}
#endif