mirror of
https://github.com/xenia-project/FFmpeg.git
synced 2025-01-09 13:21:09 +00:00
avfilter/avf_showcqt: use frequency domain windowing
faster initialization and less code slightly different result computationally from previous coeffclamp option is ignored Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
This commit is contained in:
parent
36e1665d3d
commit
5b48dd75d5
@ -24,8 +24,6 @@
|
|||||||
#include "libavutil/channel_layout.h"
|
#include "libavutil/channel_layout.h"
|
||||||
#include "libavutil/opt.h"
|
#include "libavutil/opt.h"
|
||||||
#include "libavutil/xga_font_data.h"
|
#include "libavutil/xga_font_data.h"
|
||||||
#include "libavutil/qsort.h"
|
|
||||||
#include "libavutil/time.h"
|
|
||||||
#include "libavutil/eval.h"
|
#include "libavutil/eval.h"
|
||||||
#include "avfilter.h"
|
#include "avfilter.h"
|
||||||
#include "internal.h"
|
#include "internal.h"
|
||||||
@ -49,7 +47,6 @@
|
|||||||
#define SPECTOGRAM_HEIGHT ((VIDEO_HEIGHT-FONT_HEIGHT)/2)
|
#define SPECTOGRAM_HEIGHT ((VIDEO_HEIGHT-FONT_HEIGHT)/2)
|
||||||
#define SPECTOGRAM_START (VIDEO_HEIGHT-SPECTOGRAM_HEIGHT)
|
#define SPECTOGRAM_START (VIDEO_HEIGHT-SPECTOGRAM_HEIGHT)
|
||||||
#define BASE_FREQ 20.051392800492
|
#define BASE_FREQ 20.051392800492
|
||||||
#define COEFF_CLAMP 1.0e-4
|
|
||||||
#define TLENGTH_MIN 0.001
|
#define TLENGTH_MIN 0.001
|
||||||
#define TLENGTH_DEFAULT "384/f*tc/(384/f+tc)"
|
#define TLENGTH_DEFAULT "384/f*tc/(384/f+tc)"
|
||||||
#define VOLUME_MIN 1e-10
|
#define VOLUME_MIN 1e-10
|
||||||
@ -59,9 +56,9 @@
|
|||||||
"r(1-ld(1)) + b(ld(1))"
|
"r(1-ld(1)) + b(ld(1))"
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
FFTSample value;
|
FFTSample *values;
|
||||||
int index;
|
int start, len;
|
||||||
} SparseCoeff;
|
} Coeffs;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
const AVClass *class;
|
const AVClass *class;
|
||||||
@ -70,11 +67,9 @@ typedef struct {
|
|||||||
FFTComplex *fft_data;
|
FFTComplex *fft_data;
|
||||||
FFTComplex *fft_result;
|
FFTComplex *fft_result;
|
||||||
uint8_t *spectogram;
|
uint8_t *spectogram;
|
||||||
SparseCoeff *coeff_sort;
|
Coeffs coeffs[VIDEO_WIDTH];
|
||||||
SparseCoeff *coeffs[VIDEO_WIDTH];
|
|
||||||
uint8_t *font_alpha;
|
uint8_t *font_alpha;
|
||||||
char *fontfile; /* using freetype */
|
char *fontfile; /* using freetype */
|
||||||
int coeffs_len[VIDEO_WIDTH];
|
|
||||||
uint8_t fontcolor_value[VIDEO_WIDTH*3]; /* result of fontcolor option */
|
uint8_t fontcolor_value[VIDEO_WIDTH*3]; /* result of fontcolor option */
|
||||||
int64_t frame_count;
|
int64_t frame_count;
|
||||||
int spectogram_count;
|
int spectogram_count;
|
||||||
@ -124,10 +119,9 @@ static av_cold void uninit(AVFilterContext *ctx)
|
|||||||
av_fft_end(s->fft_context);
|
av_fft_end(s->fft_context);
|
||||||
s->fft_context = NULL;
|
s->fft_context = NULL;
|
||||||
for (k = 0; k < VIDEO_WIDTH; k++)
|
for (k = 0; k < VIDEO_WIDTH; k++)
|
||||||
av_freep(&s->coeffs[k]);
|
av_freep(&s->coeffs[k].values);
|
||||||
av_freep(&s->fft_data);
|
av_freep(&s->fft_data);
|
||||||
av_freep(&s->fft_result);
|
av_freep(&s->fft_result);
|
||||||
av_freep(&s->coeff_sort);
|
|
||||||
av_freep(&s->spectogram);
|
av_freep(&s->spectogram);
|
||||||
av_freep(&s->font_alpha);
|
av_freep(&s->font_alpha);
|
||||||
av_frame_free(&s->outpicref);
|
av_frame_free(&s->outpicref);
|
||||||
@ -305,14 +299,6 @@ static double b_func(void *p, double x)
|
|||||||
return (int)(x*255.0+0.5);
|
return (int)(x*255.0+0.5);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int qsort_sparsecoeff(const SparseCoeff *a, const SparseCoeff *b)
|
|
||||||
{
|
|
||||||
if (fabsf(a->value) >= fabsf(b->value))
|
|
||||||
return 1;
|
|
||||||
else
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int config_output(AVFilterLink *outlink)
|
static int config_output(AVFilterLink *outlink)
|
||||||
{
|
{
|
||||||
AVFilterContext *ctx = outlink->src;
|
AVFilterContext *ctx = outlink->src;
|
||||||
@ -325,11 +311,10 @@ static int config_output(AVFilterLink *outlink)
|
|||||||
static const char * const expr_fontcolor_func_names[] = { "midi", "r", "g", "b", NULL };
|
static const char * const expr_fontcolor_func_names[] = { "midi", "r", "g", "b", NULL };
|
||||||
static double (* const expr_funcs[])(void *, double) = { a_weighting, b_weighting, c_weighting, NULL };
|
static double (* const expr_funcs[])(void *, double) = { a_weighting, b_weighting, c_weighting, NULL };
|
||||||
static double (* const expr_fontcolor_funcs[])(void *, double) = { midi, r_func, g_func, b_func, NULL };
|
static double (* const expr_fontcolor_funcs[])(void *, double) = { midi, r_func, g_func, b_func, NULL };
|
||||||
int fft_len, k, x, y, ret;
|
int fft_len, k, x, ret;
|
||||||
int num_coeffs = 0;
|
int num_coeffs = 0;
|
||||||
int rate = inlink->sample_rate;
|
int rate = inlink->sample_rate;
|
||||||
double max_len = rate * (double) s->timeclamp;
|
double max_len = rate * (double) s->timeclamp;
|
||||||
int64_t start_time, end_time;
|
|
||||||
int video_scale = s->fullhd ? 2 : 1;
|
int video_scale = s->fullhd ? 2 : 1;
|
||||||
int video_width = (VIDEO_WIDTH/2) * video_scale;
|
int video_width = (VIDEO_WIDTH/2) * video_scale;
|
||||||
int video_height = (VIDEO_HEIGHT/2) * video_scale;
|
int video_height = (VIDEO_HEIGHT/2) * video_scale;
|
||||||
@ -344,11 +329,10 @@ static int config_output(AVFilterLink *outlink)
|
|||||||
}
|
}
|
||||||
|
|
||||||
s->fft_data = av_malloc_array(fft_len, sizeof(*s->fft_data));
|
s->fft_data = av_malloc_array(fft_len, sizeof(*s->fft_data));
|
||||||
s->coeff_sort = av_malloc_array(fft_len, sizeof(*s->coeff_sort));
|
|
||||||
s->fft_result = av_malloc_array(fft_len + 1, sizeof(*s->fft_result));
|
s->fft_result = av_malloc_array(fft_len + 1, sizeof(*s->fft_result));
|
||||||
s->fft_context = av_fft_init(s->fft_bits, 0);
|
s->fft_context = av_fft_init(s->fft_bits, 0);
|
||||||
|
|
||||||
if (!s->fft_data || !s->coeff_sort || !s->fft_result || !s->fft_context)
|
if (!s->fft_data || !s->fft_result || !s->fft_context)
|
||||||
return AVERROR(ENOMEM);
|
return AVERROR(ENOMEM);
|
||||||
|
|
||||||
#if CONFIG_LIBFREETYPE
|
#if CONFIG_LIBFREETYPE
|
||||||
@ -359,8 +343,6 @@ static int config_output(AVFilterLink *outlink)
|
|||||||
s->font_alpha = NULL;
|
s->font_alpha = NULL;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
av_log(ctx, AV_LOG_INFO, "Calculating spectral kernel, please wait\n");
|
|
||||||
start_time = av_gettime_relative();
|
|
||||||
ret = av_expr_parse(&tlength_expr, s->tlength, expr_vars, NULL, NULL, NULL, NULL, 0, ctx);
|
ret = av_expr_parse(&tlength_expr, s->tlength, expr_vars, NULL, NULL, NULL, NULL, 0, ctx);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
goto eval_error;
|
goto eval_error;
|
||||||
@ -376,22 +358,10 @@ static int config_output(AVFilterLink *outlink)
|
|||||||
goto eval_error;
|
goto eval_error;
|
||||||
|
|
||||||
for (k = 0; k < VIDEO_WIDTH; k++) {
|
for (k = 0; k < VIDEO_WIDTH; k++) {
|
||||||
int hlen = fft_len >> 1;
|
|
||||||
float total = 0;
|
|
||||||
float partial = 0;
|
|
||||||
double freq = BASE_FREQ * exp2(k * (1.0/192.0));
|
double freq = BASE_FREQ * exp2(k * (1.0/192.0));
|
||||||
double tlen, tlength, volume;
|
double flen, center, tlength, volume;
|
||||||
|
int start, end;
|
||||||
double expr_vars_val[] = { s->timeclamp, s->timeclamp, freq, freq, freq, 0 };
|
double expr_vars_val[] = { s->timeclamp, s->timeclamp, freq, freq, freq, 0 };
|
||||||
/* a window function from Albert H. Nuttall,
|
|
||||||
* "Some Windows with Very Good Sidelobe Behavior"
|
|
||||||
* -93.32 dB peak sidelobe and 18 dB/octave asymptotic decay
|
|
||||||
* coefficient normalized to a0 = 1 */
|
|
||||||
double a0 = 0.355768;
|
|
||||||
double a1 = 0.487396/a0;
|
|
||||||
double a2 = 0.144232/a0;
|
|
||||||
double a3 = 0.012604/a0;
|
|
||||||
double sv_step, cv_step, sv, cv;
|
|
||||||
double sw_step, cw_step, sw, cw, w;
|
|
||||||
|
|
||||||
tlength = av_expr_eval(tlength_expr, expr_vars_val, NULL);
|
tlength = av_expr_eval(tlength_expr, expr_vars_val, NULL);
|
||||||
if (isnan(tlength)) {
|
if (isnan(tlength)) {
|
||||||
@ -424,75 +394,31 @@ static int config_output(AVFilterLink *outlink)
|
|||||||
fontcolor_value += 3;
|
fontcolor_value += 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
tlen = tlength * rate;
|
/* direct frequency domain windowing */
|
||||||
s->fft_data[0].re = 0;
|
flen = 8.0 * fft_len / (tlength * rate);
|
||||||
s->fft_data[0].im = 0;
|
center = freq * fft_len / rate;
|
||||||
s->fft_data[hlen].re = (1.0 + a1 + a2 + a3) * (1.0/tlen) * volume * (1.0/fft_len);
|
start = FFMAX(0, ceil(center - 0.5 * flen));
|
||||||
s->fft_data[hlen].im = 0;
|
end = FFMIN(fft_len, floor(center + 0.5 * flen));
|
||||||
sv_step = sv = sin(2.0*M_PI*freq*(1.0/rate));
|
s->coeffs[k].len = end - start + 1;
|
||||||
cv_step = cv = cos(2.0*M_PI*freq*(1.0/rate));
|
s->coeffs[k].start = start;
|
||||||
/* also optimizing window func */
|
num_coeffs += s->coeffs[k].len;
|
||||||
sw_step = sw = sin(2.0*M_PI*(1.0/tlen));
|
s->coeffs[k].values = av_malloc_array(s->coeffs[k].len, sizeof(*s->coeffs[k].values));
|
||||||
cw_step = cw = cos(2.0*M_PI*(1.0/tlen));
|
if (!s->coeffs[k].values) {
|
||||||
for (x = 1; x < 0.5 * tlen; x++) {
|
|
||||||
double cv_tmp, cw_tmp;
|
|
||||||
double cw2, cw3, sw2;
|
|
||||||
|
|
||||||
cw2 = cw * cw - sw * sw;
|
|
||||||
sw2 = cw * sw + sw * cw;
|
|
||||||
cw3 = cw * cw2 - sw * sw2;
|
|
||||||
w = (1.0 + a1 * cw + a2 * cw2 + a3 * cw3) * (1.0/tlen) * volume * (1.0/fft_len);
|
|
||||||
s->fft_data[hlen + x].re = w * cv;
|
|
||||||
s->fft_data[hlen + x].im = w * sv;
|
|
||||||
s->fft_data[hlen - x].re = s->fft_data[hlen + x].re;
|
|
||||||
s->fft_data[hlen - x].im = -s->fft_data[hlen + x].im;
|
|
||||||
|
|
||||||
cv_tmp = cv * cv_step - sv * sv_step;
|
|
||||||
sv = sv * cv_step + cv * sv_step;
|
|
||||||
cv = cv_tmp;
|
|
||||||
cw_tmp = cw * cw_step - sw * sw_step;
|
|
||||||
sw = sw * cw_step + cw * sw_step;
|
|
||||||
cw = cw_tmp;
|
|
||||||
}
|
|
||||||
for (; x < hlen; x++) {
|
|
||||||
s->fft_data[hlen + x].re = 0;
|
|
||||||
s->fft_data[hlen + x].im = 0;
|
|
||||||
s->fft_data[hlen - x].re = 0;
|
|
||||||
s->fft_data[hlen - x].im = 0;
|
|
||||||
}
|
|
||||||
av_fft_permute(s->fft_context, s->fft_data);
|
|
||||||
av_fft_calc(s->fft_context, s->fft_data);
|
|
||||||
|
|
||||||
for (x = 0; x < fft_len; x++) {
|
|
||||||
s->coeff_sort[x].index = x;
|
|
||||||
s->coeff_sort[x].value = s->fft_data[x].re;
|
|
||||||
}
|
|
||||||
|
|
||||||
AV_QSORT(s->coeff_sort, fft_len, SparseCoeff, qsort_sparsecoeff);
|
|
||||||
for (x = 0; x < fft_len; x++)
|
|
||||||
total += fabsf(s->coeff_sort[x].value);
|
|
||||||
|
|
||||||
for (x = 0; x < fft_len; x++) {
|
|
||||||
partial += fabsf(s->coeff_sort[x].value);
|
|
||||||
if (partial > total * s->coeffclamp * COEFF_CLAMP) {
|
|
||||||
s->coeffs_len[k] = fft_len - x;
|
|
||||||
num_coeffs += s->coeffs_len[k];
|
|
||||||
s->coeffs[k] = av_malloc_array(s->coeffs_len[k], sizeof(*s->coeffs[k]));
|
|
||||||
if (!s->coeffs[k]) {
|
|
||||||
ret = AVERROR(ENOMEM);
|
ret = AVERROR(ENOMEM);
|
||||||
goto eval_error;
|
goto eval_error;
|
||||||
}
|
}
|
||||||
for (y = 0; y < s->coeffs_len[k]; y++)
|
for (x = start; x <= end; x++) {
|
||||||
s->coeffs[k][y] = s->coeff_sort[x+y];
|
int sign = (x & 1) ? (-1) : 1;
|
||||||
break;
|
double u = 2.0 * M_PI * (x - center) * (1.0/flen);
|
||||||
}
|
/* nuttall window */
|
||||||
|
double w = 0.355768 + 0.487396 * cos(u) + 0.144232 * cos(2*u) + 0.012604 * cos(3*u);
|
||||||
|
s->coeffs[k].values[x-start] = sign * volume * (1.0/fft_len) * w;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
av_expr_free(fontcolor_expr);
|
av_expr_free(fontcolor_expr);
|
||||||
av_expr_free(volume_expr);
|
av_expr_free(volume_expr);
|
||||||
av_expr_free(tlength_expr);
|
av_expr_free(tlength_expr);
|
||||||
end_time = av_gettime_relative();
|
av_log(ctx, AV_LOG_INFO, "fft_len=%u, num_coeffs=%u\n", fft_len, num_coeffs);
|
||||||
av_log(ctx, AV_LOG_INFO, "Elapsed time %.6f s (fft_len=%u, num_coeffs=%u)\n", 1e-6 * (end_time-start_time), fft_len, num_coeffs);
|
|
||||||
|
|
||||||
outlink->w = video_width;
|
outlink->w = video_width;
|
||||||
outlink->h = video_height;
|
outlink->h = video_height;
|
||||||
@ -552,9 +478,9 @@ static int plot_cqt(AVFilterLink *inlink)
|
|||||||
FFTComplex w = {0,0};
|
FFTComplex w = {0,0};
|
||||||
FFTComplex l, r;
|
FFTComplex l, r;
|
||||||
|
|
||||||
for (u = 0; u < s->coeffs_len[x]; u++) {
|
for (u = 0; u < s->coeffs[x].len; u++) {
|
||||||
FFTSample value = s->coeffs[x][u].value;
|
FFTSample value = s->coeffs[x].values[u];
|
||||||
int index = s->coeffs[x][u].index;
|
int index = s->coeffs[x].start + u;
|
||||||
v.re += value * s->fft_result[index].re;
|
v.re += value * s->fft_result[index].re;
|
||||||
v.im += value * s->fft_result[index].im;
|
v.im += value * s->fft_result[index].im;
|
||||||
w.re += value * s->fft_result[fft_len - index].re;
|
w.re += value * s->fft_result[fft_len - index].re;
|
||||||
|
Loading…
Reference in New Issue
Block a user