avfilter: add loudnorm

Signed-off-by: Kyle Swanson <k@ylo.ph>
This commit is contained in:
Kyle Swanson 2016-05-11 13:30:14 -05:00 committed by Paul B Mahol
parent 42ee137a0a
commit c0c378009b
8 changed files with 972 additions and 1 deletions

View File

@ -35,6 +35,7 @@ version <next>:
- Generic OpenMAX IL encoder with support for Raspberry Pi
- IFF ANIM demuxer & decoder
- Direct Stream Transfer (DST) decoder
- loudnorm filter
version 3.0:
- Common Encryption (CENC) MP4 encoding and decoding support

View File

@ -358,6 +358,7 @@ Filters:
af_compand.c Paul B Mahol
af_firequalizer.c Muhammad Faiz
af_ladspa.c Paul B Mahol
af_loudnorm.c Kyle Swanson
af_pan.c Nicolas George
af_sidechaincompress.c Paul B Mahol
af_silenceremove.c Paul B Mahol

5
configure vendored
View File

@ -226,6 +226,8 @@ External library support:
--enable-libcdio enable audio CD grabbing with libcdio [no]
--enable-libdc1394 enable IIDC-1394 grabbing using libdc1394
and libraw1394 [no]
--enable-libebur128 enable libebur128 for EBU R128 measurement,
needed for loudnorm filter [no]
--enable-libfaac enable AAC encoding via libfaac [no]
--enable-libfdk-aac enable AAC de/encoding via libfdk-aac [no]
--enable-libflite enable flite (voice synthesis) support via libflite [no]
@ -1472,6 +1474,7 @@ EXTERNAL_LIBRARY_LIST="
libcdio
libcelt
libdc1394
libebur128
libfaac
libfdk_aac
libflite
@ -2987,6 +2990,7 @@ hqdn3d_filter_deps="gpl"
interlace_filter_deps="gpl"
kerndeint_filter_deps="gpl"
ladspa_filter_deps="ladspa dlopen"
loudnorm_filter_deps="libebur128"
mcdeint_filter_deps="avcodec gpl"
movie_filter_deps="avcodec avformat"
mpdecimate_filter_deps="gpl"
@ -5593,6 +5597,7 @@ enabled libcelt && require libcelt celt/celt.h celt_decode -lcelt0 &&
{ check_lib celt/celt.h celt_decoder_create_custom -lcelt0 ||
die "ERROR: libcelt must be installed and version must be >= 0.11.0."; }
enabled libcaca && require_pkg_config caca caca.h caca_create_canvas
enabled libebur128 && require ebur128 ebur128.h ebur128_relative_threshold -lebur128
enabled libfaac && require2 libfaac "stdint.h faac.h" faacEncGetVersion -lfaac
enabled libfdk_aac && { use_pkg_config fdk-aac "fdk-aac/aacenc_lib.h" aacEncOpen ||
{ require libfdk_aac fdk-aac/aacenc_lib.h aacEncOpen -lfdk-aac &&

View File

@ -2711,6 +2711,61 @@ Modify the @var{N}-th control value.
If the specified value is not valid, it is ignored and prior one is kept.
@end table
@section loudnorm
EBU R128 loudness normalization. Includes both dynamic and linear normalization modes.
Support for both single pass (livestreams, files) and double pass (files) modes.
This algorithm can target IL, LRA, and maximum true peak.
To enable compilation of this filter you need to configure FFmpeg with
@code{--enable-libebur128}.
The filter accepts the following options:
@table @option
@item I, i
Set integrated loudness target.
Range is -70.0 - -5.0. Default value is -24.0.
@item LRA, lra
Set loudness range target.
Range is 1.0 - 20.0. Default value is 7.0.
@item TP, tp
Set maximum true peak.
Range is -9.0 - +0.0. Default value is -2.0.
@item measured_I, measured_i
Measured IL of input file.
Range is -99.0 - +0.0.
@item measured_LRA, measured_lra
Measured LRA of input file.
Range is 0.0 - 99.0.
@item measured_TP, measured_tp
Measured true peak of input file.
Range is -99.0 - +99.0.
@item measured_thresh
Measured threshold of input file.
Range is -99.0 - +0.0.
@item offset
Set offset gain. Gain is applied before the true-peak limiter.
Range is -99.0 - +99.0. Default is +0.0.
@item linear
Normalize linearly if possible.
measured_I, measured_LRA, measured_TP, and measured_thresh must also
to be specified in order to use this mode.
Options are true or false. Default is true.
@item print_format
Set print format for stats. Options are summary, json, or none.
Default value is none.
@end table
@section lowpass
Apply a low-pass filter with 3dB point frequency.

View File

@ -89,6 +89,7 @@ OBJS-$(CONFIG_FLANGER_FILTER) += af_flanger.o generate_wave_table
OBJS-$(CONFIG_HIGHPASS_FILTER) += af_biquads.o
OBJS-$(CONFIG_JOIN_FILTER) += af_join.o
OBJS-$(CONFIG_LADSPA_FILTER) += af_ladspa.o
OBJS-$(CONFIG_LOUDNORM_FILTER) += af_loudnorm.o
OBJS-$(CONFIG_LOWPASS_FILTER) += af_biquads.o
OBJS-$(CONFIG_PAN_FILTER) += af_pan.o
OBJS-$(CONFIG_REPLAYGAIN_FILTER) += af_replaygain.o

907
libavfilter/af_loudnorm.c Normal file
View File

@ -0,0 +1,907 @@
/*
* Copyright (c) 2016 Kyle Swanson <k@ylo.ph>.
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/* http://k.ylo.ph/2016/04/04/loudnorm.html */
#include "libavutil/opt.h"
#include "avfilter.h"
#include "internal.h"
#include "audio.h"
#include <ebur128.h>
enum FrameType {
FIRST_FRAME,
INNER_FRAME,
FINAL_FRAME,
LINEAR_MODE,
FRAME_NB
};
enum LimiterState {
OUT,
ATTACK,
SUSTAIN,
RELEASE,
STATE_NB
};
enum PrintFormat {
NONE,
JSON,
SUMMARY,
PF_NB
};
typedef struct LoudNormContext {
const AVClass *class;
double target_i;
double target_lra;
double target_tp;
double measured_i;
double measured_lra;
double measured_tp;
double measured_thresh;
double offset;
int linear;
enum PrintFormat print_format;
double *buf;
int buf_size;
int buf_index;
int prev_buf_index;
double delta[30];
double weights[21];
double prev_delta;
int index;
double gain_reduction[2];
double *limiter_buf;
double *prev_smp;
int limiter_buf_index;
int limiter_buf_size;
enum LimiterState limiter_state;
int peak_index;
int env_index;
int env_cnt;
int attack_length;
int release_length;
int64_t pts;
enum FrameType frame_type;
int above_threshold;
int prev_nb_samples;
int channels;
ebur128_state *r128_in;
ebur128_state *r128_out;
} LoudNormContext;
#define OFFSET(x) offsetof(LoudNormContext, x)
#define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
static const AVOption loudnorm_options[] = {
{ "I", "set integrated loudness target", OFFSET(target_i), AV_OPT_TYPE_DOUBLE, {.dbl = -24.}, -70., -5., FLAGS },
{ "i", "set integrated loudness target", OFFSET(target_i), AV_OPT_TYPE_DOUBLE, {.dbl = -24.}, -70., -5., FLAGS },
{ "LRA", "set loudness range target", OFFSET(target_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 7.}, 1., 20., FLAGS },
{ "lra", "set loudness range target", OFFSET(target_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 7.}, 1., 20., FLAGS },
{ "TP", "set maximum true peak", OFFSET(target_tp), AV_OPT_TYPE_DOUBLE, {.dbl = -2.}, -9., 0., FLAGS },
{ "tp", "set maximum true peak", OFFSET(target_tp), AV_OPT_TYPE_DOUBLE, {.dbl = -2.}, -9., 0., FLAGS },
{ "measured_I", "measured IL of input file", OFFSET(measured_i), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, -99., 0., FLAGS },
{ "measured_i", "measured IL of input file", OFFSET(measured_i), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, -99., 0., FLAGS },
{ "measured_LRA", "measured LRA of input file", OFFSET(measured_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, 0., 99., FLAGS },
{ "measured_lra", "measured LRA of input file", OFFSET(measured_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, 0., 99., FLAGS },
{ "measured_TP", "measured true peak of input file", OFFSET(measured_tp), AV_OPT_TYPE_DOUBLE, {.dbl = 99.}, -99., 99., FLAGS },
{ "measured_tp", "measured true peak of input file", OFFSET(measured_tp), AV_OPT_TYPE_DOUBLE, {.dbl = 99.}, -99., 99., FLAGS },
{ "measured_thresh", "measured threshold of input file", OFFSET(measured_thresh), AV_OPT_TYPE_DOUBLE, {.dbl = -70.}, -99., 0., FLAGS },
{ "offset", "set offset gain", OFFSET(offset), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, -99., 99., FLAGS },
{ "linear", "normalize linearly if possible", OFFSET(linear), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS },
{ "print_format", "set print format for stats", OFFSET(print_format), AV_OPT_TYPE_INT, {.i64 = NONE}, NONE, PF_NB -1, FLAGS, "print_format" },
{ "none", 0, 0, AV_OPT_TYPE_CONST, {.i64 = NONE}, 0, 0, FLAGS, "print_format" },
{ "json", 0, 0, AV_OPT_TYPE_CONST, {.i64 = JSON}, 0, 0, FLAGS, "print_format" },
{ "summary", 0, 0, AV_OPT_TYPE_CONST, {.i64 = SUMMARY}, 0, 0, FLAGS, "print_format" },
{ NULL }
};
AVFILTER_DEFINE_CLASS(loudnorm);
static inline int frame_size(int sample_rate, int frame_len_msec)
{
const int frame_size = round((double)sample_rate * (frame_len_msec / 1000.0));
return frame_size + (frame_size % 2);
}
static void init_gaussian_filter(LoudNormContext *s)
{
double total_weight = 0.0;
const double sigma = 3.5;
double adjust;
int i;
const int offset = 21 / 2;
const double c1 = 1.0 / (sigma * sqrt(2.0 * M_PI));
const double c2 = 2.0 * pow(sigma, 2.0);
for (i = 0; i < 21; i++) {
const int x = i - offset;
s->weights[i] = c1 * exp(-(pow(x, 2.0) / c2));
total_weight += s->weights[i];
}
adjust = 1.0 / total_weight;
for (i = 0; i < 21; i++)
s->weights[i] *= adjust;
}
static double gaussian_filter(LoudNormContext *s, int index)
{
double result = 0.;
int i;
index = index - 10 > 0 ? index - 10 : index + 20;
for (i = 0; i < 21; i++)
result += s->delta[((index + i) < 30) ? (index + i) : (index + i - 30)] * s->weights[i];
return result;
}
static void detect_peak(LoudNormContext *s, int offset, int nb_samples, int channels, int *peak_delta, double *peak_value)
{
int n, c, i, index;
double ceiling;
double *buf;
*peak_delta = -1;
buf = s->limiter_buf;
ceiling = s->target_tp;
index = s->limiter_buf_index + (offset * channels) + (1920 * channels);
if (index >= s->limiter_buf_size)
index -= s->limiter_buf_size;
if (s->frame_type == FIRST_FRAME) {
for (c = 0; c < channels; c++)
s->prev_smp[c] = fabs(buf[index + c - channels]);
}
for (n = 0; n < nb_samples; n++) {
for (c = 0; c < channels; c++) {
double this, next, max_peak;
this = fabs(buf[(index + c) < s->limiter_buf_size ? (index + c) : (index + c - s->limiter_buf_size)]);
next = fabs(buf[(index + c + channels) < s->limiter_buf_size ? (index + c + channels) : (index + c + channels - s->limiter_buf_size)]);
if ((s->prev_smp[c] <= this) && (next <= this) && (this > ceiling) && (n > 0)) {
int detected;
detected = 1;
for (i = 2; i < 12; i++) {
next = fabs(buf[(index + c + (i * channels)) < s->limiter_buf_size ? (index + c + (i * channels)) : (index + c + (i * channels) - s->limiter_buf_size)]);
if (next > this) {
detected = 0;
break;
}
}
if (!detected)
continue;
for (c = 0; c < channels; c++) {
if (c == 0 || fabs(buf[index + c]) > max_peak)
max_peak = fabs(buf[index + c]);
s->prev_smp[c] = fabs(buf[(index + c) < s->limiter_buf_size ? (index + c) : (index + c - s->limiter_buf_size)]);
}
*peak_delta = n;
s->peak_index = index;
*peak_value = max_peak;
return;
}
s->prev_smp[c] = this;
}
index += channels;
if (index >= s->limiter_buf_size)
index -= s->limiter_buf_size;
}
}
static void true_peak_limiter(LoudNormContext *s, double *out, int nb_samples, int channels)
{
int n, c, index, peak_delta, smp_cnt;
double ceiling, peak_value;
double *buf;
buf = s->limiter_buf;
ceiling = s->target_tp;
index = s->limiter_buf_index;
smp_cnt = 0;
if (s->frame_type == FIRST_FRAME) {
double max;
max = 0.;
for (n = 0; n < 1920; n++) {
for (c = 0; c < channels; c++) {
max = fabs(buf[c]) > max ? fabs(buf[c]) : max;
}
buf += channels;
}
if (max > ceiling) {
s->gain_reduction[1] = ceiling / max;
s->limiter_state = SUSTAIN;
buf = s->limiter_buf;
for (n = 0; n < 1920; n++) {
for (c = 0; c < channels; c++) {
double env;
env = s->gain_reduction[1];
buf[c] *= env;
}
buf += channels;
}
}
buf = s->limiter_buf;
}
do {
switch(s->limiter_state) {
case OUT:
detect_peak(s, smp_cnt, nb_samples - smp_cnt, channels, &peak_delta, &peak_value);
if (peak_delta != -1) {
s->env_cnt = 0;
smp_cnt += (peak_delta - s->attack_length);
s->gain_reduction[0] = 1.;
s->gain_reduction[1] = ceiling / peak_value;
s->limiter_state = ATTACK;
s->env_index = s->peak_index - (s->attack_length * channels);
if (s->env_index < 0)
s->env_index += s->limiter_buf_size;
s->env_index += (s->env_cnt * channels);
if (s->env_index > s->limiter_buf_size)
s->env_index -= s->limiter_buf_size;
} else {
smp_cnt = nb_samples;
}
break;
case ATTACK:
for (; s->env_cnt < s->attack_length; s->env_cnt++) {
for (c = 0; c < channels; c++) {
double env;
env = s->gain_reduction[0] - ((double) s->env_cnt / (s->attack_length - 1) * (s->gain_reduction[0] - s->gain_reduction[1]));
buf[s->env_index + c] *= env;
}
s->env_index += channels;
if (s->env_index >= s->limiter_buf_size)
s->env_index -= s->limiter_buf_size;
smp_cnt++;
if (smp_cnt >= nb_samples) {
s->env_cnt++;
break;
}
}
if (smp_cnt < nb_samples) {
s->env_cnt = 0;
s->attack_length = 1920;
s->limiter_state = SUSTAIN;
}
break;
case SUSTAIN:
detect_peak(s, smp_cnt, nb_samples, channels, &peak_delta, &peak_value);
if (peak_delta == -1) {
s->limiter_state = RELEASE;
s->gain_reduction[0] = s->gain_reduction[1];
s->gain_reduction[1] = 1.;
s->env_cnt = 0;
break;
} else {
double gain_reduction;
gain_reduction = ceiling / peak_value;
if (gain_reduction < s->gain_reduction[1]) {
s->limiter_state = ATTACK;
s->attack_length = peak_delta;
if (s->attack_length <= 1)
s->attack_length = 2;
s->gain_reduction[0] = s->gain_reduction[1];
s->gain_reduction[1] = gain_reduction;
s->env_cnt = 0;
break;
}
for (s->env_cnt = 0; s->env_cnt < peak_delta; s->env_cnt++) {
for (c = 0; c < channels; c++) {
double env;
env = s->gain_reduction[1];
buf[s->env_index + c] *= env;
}
s->env_index += channels;
if (s->env_index >= s->limiter_buf_size)
s->env_index -= s->limiter_buf_size;
smp_cnt++;
if (smp_cnt >= nb_samples) {
s->env_cnt++;
break;
}
}
}
break;
case RELEASE:
for (; s->env_cnt < s->release_length; s->env_cnt++) {
for (c = 0; c < channels; c++) {
double env;
env = s->gain_reduction[0] + (((double) s->env_cnt / (s->release_length - 1)) * (s->gain_reduction[1] - s->gain_reduction[0]));
buf[s->env_index + c] *= env;
}
s->env_index += channels;
if (s->env_index >= s->limiter_buf_size)
s->env_index -= s->limiter_buf_size;
smp_cnt++;
if (smp_cnt >= nb_samples) {
s->env_cnt++;
break;
}
}
if (smp_cnt < nb_samples) {
s->env_cnt = 0;
s->limiter_state = OUT;
}
break;
}
} while (smp_cnt < nb_samples);
for (n = 0; n < nb_samples; n++) {
for (c = 0; c < channels; c++) {
out[c] = buf[index + c];
if (fabs(out[c]) > ceiling) {
out[c] = ceiling * (out[c] < 0 ? -1 : 1);
}
}
out += channels;
index += channels;
if (index >= s->limiter_buf_size)
index -= s->limiter_buf_size;
}
}
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
{
AVFilterContext *ctx = inlink->dst;
LoudNormContext *s = ctx->priv;
AVFilterLink *outlink = ctx->outputs[0];
AVFrame *out;
const double *src;
double *dst;
double *buf;
double *limiter_buf;
int i, n, c, subframe_length, src_index;
double gain, gain_next, env_global, env_shortterm,
global, shortterm, lra, relative_threshold;
if (av_frame_is_writable(in)) {
out = in;
} else {
out = ff_get_audio_buffer(inlink, in->nb_samples);
if (!out) {
av_frame_free(&in);
return AVERROR(ENOMEM);
}
av_frame_copy_props(out, in);
}
out->pts = s->pts;
src = (const double *)in->data[0];
dst = (double *)out->data[0];
buf = s->buf;
limiter_buf = s->limiter_buf;
ebur128_add_frames_double(s->r128_in, src, in->nb_samples);
if (s->frame_type == FIRST_FRAME && in->nb_samples < frame_size(inlink->sample_rate, 3000)) {
double offset, offset_tp, true_peak;
ebur128_loudness_global(s->r128_in, &global);
for (c = 0; c < inlink->channels; c++) {
double tmp;
ebur128_sample_peak(s->r128_in, c, &tmp);
if (c == 0 || tmp > true_peak)
true_peak = tmp;
}
offset = s->target_i - global;
offset_tp = true_peak + offset;
s->offset = offset_tp < s->target_tp ? offset : s->target_tp - true_peak;
s->offset = pow(10., s->offset / 20.);
s->frame_type = LINEAR_MODE;
}
switch (s->frame_type) {
case FIRST_FRAME:
for (n = 0; n < in->nb_samples; n++) {
for (c = 0; c < inlink->channels; c++) {
buf[s->buf_index + c] = src[c];
}
src += inlink->channels;
s->buf_index += inlink->channels;
}
ebur128_loudness_shortterm(s->r128_in, &shortterm);
if (shortterm < s->measured_thresh) {
s->above_threshold = 0;
env_shortterm = shortterm <= -70. ? 0. : s->target_i - s->measured_i;
} else {
s->above_threshold = 1;
env_shortterm = shortterm <= -70. ? 0. : s->target_i - shortterm;
}
for (n = 0; n < 30; n++)
s->delta[n] = pow(10., env_shortterm / 20.);
s->prev_delta = s->delta[s->index];
s->buf_index =
s->limiter_buf_index = 0;
for (n = 0; n < (s->limiter_buf_size / inlink->channels); n++) {
for (c = 0; c < inlink->channels; c++) {
limiter_buf[s->limiter_buf_index + c] = buf[s->buf_index + c] * s->delta[s->index] * s->offset;
}
s->limiter_buf_index += inlink->channels;
if (s->limiter_buf_index >= s->limiter_buf_size)
s->limiter_buf_index -= s->limiter_buf_size;
s->buf_index += inlink->channels;
}
subframe_length = frame_size(inlink->sample_rate, 100);
true_peak_limiter(s, dst, subframe_length, inlink->channels);
ebur128_add_frames_double(s->r128_out, dst, subframe_length);
s->pts +=
out->nb_samples =
inlink->min_samples =
inlink->max_samples =
inlink->partial_buf_size = subframe_length;
s->frame_type = INNER_FRAME;
break;
case INNER_FRAME:
gain = gaussian_filter(s, s->index + 10 < 30 ? s->index + 10 : s->index + 10 - 30);
gain_next = gaussian_filter(s, s->index + 11 < 30 ? s->index + 11 : s->index + 11 - 30);
for (n = 0; n < in->nb_samples; n++) {
for (c = 0; c < inlink->channels; c++) {
buf[s->prev_buf_index + c] = src[c];
limiter_buf[s->limiter_buf_index + c] = buf[s->buf_index + c] * (gain + (((double) n / in->nb_samples) * (gain_next - gain))) * s->offset;
}
src += inlink->channels;
s->limiter_buf_index += inlink->channels;
if (s->limiter_buf_index >= s->limiter_buf_size)
s->limiter_buf_index -= s->limiter_buf_size;
s->prev_buf_index += inlink->channels;
if (s->prev_buf_index >= s->buf_size)
s->prev_buf_index -= s->buf_size;
s->buf_index += inlink->channels;
if (s->buf_index >= s->buf_size)
s->buf_index -= s->buf_size;
}
subframe_length = (frame_size(inlink->sample_rate, 100) - in->nb_samples) * inlink->channels;
s->limiter_buf_index = s->limiter_buf_index + subframe_length < s->limiter_buf_size ? s->limiter_buf_index + subframe_length : s->limiter_buf_index + subframe_length - s->limiter_buf_size;
true_peak_limiter(s, dst, in->nb_samples, inlink->channels);
ebur128_add_frames_double(s->r128_out, dst, in->nb_samples);
ebur128_loudness_range(s->r128_in, &lra);
ebur128_loudness_global(s->r128_in, &global);
ebur128_loudness_shortterm(s->r128_in, &shortterm);
ebur128_relative_threshold(s->r128_in, &relative_threshold);
if (s->above_threshold == 0) {
double shortterm_out;
if (shortterm > s->measured_thresh)
s->prev_delta *= 1.0058;
ebur128_loudness_shortterm(s->r128_out, &shortterm_out);
if (shortterm_out >= s->target_i)
s->above_threshold = 1;
}
if (shortterm < relative_threshold || shortterm <= -70. || s->above_threshold == 0) {
s->delta[s->index] = s->prev_delta;
} else {
env_global = fabs(shortterm - global) < (s->target_lra / 2.) ? shortterm - global : (s->target_lra / 2.) * ((shortterm - global) < 0 ? -1 : 1);
env_shortterm = s->target_i - shortterm;
s->delta[s->index] = pow(10., (env_global + env_shortterm) / 20.);
}
s->prev_delta = s->delta[s->index];
s->index++;
if (s->index >= 30)
s->index -= 30;
s->prev_nb_samples = in->nb_samples;
s->pts += in->nb_samples;
break;
case FINAL_FRAME:
gain = gaussian_filter(s, s->index + 10 < 30 ? s->index + 10 : s->index + 10 - 30);
s->limiter_buf_index = 0;
src_index = 0;
for (n = 0; n < s->limiter_buf_size / inlink->channels; n++) {
for (c = 0; c < inlink->channels; c++) {
s->limiter_buf[s->limiter_buf_index + c] = src[src_index + c] * gain * s->offset;
}
src_index += inlink->channels;
s->limiter_buf_index += inlink->channels;
if (s->limiter_buf_index >= s->limiter_buf_size)
s->limiter_buf_index -= s->limiter_buf_size;
}
subframe_length = frame_size(inlink->sample_rate, 100);
for (i = 0; i < in->nb_samples / subframe_length; i++) {
true_peak_limiter(s, dst, subframe_length, inlink->channels);
for (n = 0; n < subframe_length; n++) {
for (c = 0; c < inlink->channels; c++) {
if (src_index < (in->nb_samples * inlink->channels)) {
limiter_buf[s->limiter_buf_index + c] = src[src_index + c] * gain * s->offset;
} else {
limiter_buf[s->limiter_buf_index + c] = 0.;
}
}
if (src_index < (in->nb_samples * inlink->channels))
src_index += inlink->channels;
s->limiter_buf_index += inlink->channels;
if (s->limiter_buf_index >= s->limiter_buf_size)
s->limiter_buf_index -= s->limiter_buf_size;
}
dst += (subframe_length * inlink->channels);
}
dst = (double *)out->data[0];
ebur128_add_frames_double(s->r128_out, dst, in->nb_samples);
break;
case LINEAR_MODE:
for (n = 0; n < in->nb_samples; n++) {
for (c = 0; c < inlink->channels; c++) {
dst[c] = src[c] * s->offset;
}
src += inlink->channels;
dst += inlink->channels;
}
dst = (double *)out->data[0];
ebur128_add_frames_double(s->r128_out, dst, in->nb_samples);
s->pts += in->nb_samples;
break;
}
if (in != out)
av_frame_free(&in);
return ff_filter_frame(outlink, out);
}
static int request_frame(AVFilterLink *outlink)
{
int ret;
AVFilterContext *ctx = outlink->src;
AVFilterLink *inlink = ctx->inputs[0];
LoudNormContext *s = ctx->priv;
ret = ff_request_frame(inlink);
if (ret == AVERROR_EOF && s->frame_type == INNER_FRAME) {
double *src;
double *buf;
int nb_samples, n, c, offset;
AVFrame *frame;
nb_samples = (s->buf_size / inlink->channels) - s->prev_nb_samples;
nb_samples -= (frame_size(inlink->sample_rate, 100) - s->prev_nb_samples);
frame = ff_get_audio_buffer(outlink, nb_samples);
if (!frame)
return AVERROR(ENOMEM);
frame->nb_samples = nb_samples;
buf = s->buf;
src = (double *)frame->data[0];
offset = ((s->limiter_buf_size / inlink->channels) - s->prev_nb_samples) * inlink->channels;
offset -= (frame_size(inlink->sample_rate, 100) - s->prev_nb_samples) * inlink->channels;
s->buf_index = s->buf_index - offset < 0 ? s->buf_index - offset + s->buf_size : s->buf_index - offset;
for (n = 0; n < nb_samples; n++) {
for (c = 0; c < inlink->channels; c++) {
src[c] = buf[s->buf_index + c];
}
src += inlink->channels;
s->buf_index += inlink->channels;
if (s->buf_index >= s->buf_size)
s->buf_index -= s->buf_size;
}
s->frame_type = FINAL_FRAME;
ret = filter_frame(inlink, frame);
}
return ret;
}
static int query_formats(AVFilterContext *ctx)
{
AVFilterFormats *formats;
AVFilterChannelLayouts *layouts;
AVFilterLink *inlink = ctx->inputs[0];
AVFilterLink *outlink = ctx->outputs[0];
static const int input_srate[] = {192000, -1};
static const enum AVSampleFormat sample_fmts[] = {
AV_SAMPLE_FMT_DBL,
AV_SAMPLE_FMT_NONE
};
int ret;
layouts = ff_all_channel_counts();
if (!layouts)
return AVERROR(ENOMEM);
ret = ff_set_common_channel_layouts(ctx, layouts);
if (ret < 0)
return ret;
formats = ff_make_format_list(sample_fmts);
if (!formats)
return AVERROR(ENOMEM);
ret = ff_set_common_formats(ctx, formats);
if (ret < 0)
return ret;
formats = ff_make_format_list(input_srate);
if (!formats)
return AVERROR(ENOMEM);
ret = ff_formats_ref(formats, &inlink->out_samplerates);
if (ret < 0)
return ret;
ret = ff_formats_ref(formats, &outlink->in_samplerates);
if (ret < 0)
return ret;
return 0;
}
static int config_input(AVFilterLink *inlink)
{
AVFilterContext *ctx = inlink->dst;
LoudNormContext *s = ctx->priv;
s->r128_in = ebur128_init(inlink->channels, inlink->sample_rate, EBUR128_MODE_I | EBUR128_MODE_S | EBUR128_MODE_LRA | EBUR128_MODE_SAMPLE_PEAK);
if (!s->r128_in)
return AVERROR(ENOMEM);
s->r128_out = ebur128_init(inlink->channels, inlink->sample_rate, EBUR128_MODE_I | EBUR128_MODE_S | EBUR128_MODE_LRA | EBUR128_MODE_SAMPLE_PEAK);
if (!s->r128_out)
return AVERROR(ENOMEM);
s->buf_size = frame_size(inlink->sample_rate, 3000) * inlink->channels;
s->buf = av_malloc_array(s->buf_size, sizeof(*s->buf));
if (!s->buf)
return AVERROR(ENOMEM);
s->limiter_buf_size = frame_size(inlink->sample_rate, 210) * inlink->channels;
s->limiter_buf = av_malloc_array(s->buf_size, sizeof(*s->limiter_buf));
if (!s->limiter_buf)
return AVERROR(ENOMEM);
s->prev_smp = av_malloc_array(inlink->channels, sizeof(*s->prev_smp));
if (!s->prev_smp)
return AVERROR(ENOMEM);
init_gaussian_filter(s);
s->frame_type = FIRST_FRAME;
if (s->linear) {
double offset, offset_tp;
offset = s->target_i - s->measured_i;
offset_tp = s->measured_tp + offset;
if (s->measured_tp != 99 && s->measured_thresh != -70 && s->measured_lra != 0 && s->measured_i != 0) {
if ((offset_tp <= s->target_tp) && (s->measured_lra <= s->target_lra)) {
s->frame_type = LINEAR_MODE;
s->offset = offset;
}
}
}
if (s->frame_type != LINEAR_MODE) {
inlink->min_samples =
inlink->max_samples =
inlink->partial_buf_size = frame_size(inlink->sample_rate, 3000);
}
s->pts =
s->buf_index =
s->prev_buf_index =
s->limiter_buf_index = 0;
s->channels = inlink->channels;
s->index = 1;
s->limiter_state = OUT;
s->offset = pow(10., s->offset / 20.);
s->target_tp = pow(10., s->target_tp / 20.);
s->attack_length = frame_size(inlink->sample_rate, 10);
s->release_length = frame_size(inlink->sample_rate, 100);
return 0;
}
static av_cold void uninit(AVFilterContext *ctx)
{
LoudNormContext *s = ctx->priv;
double i_in, i_out, lra_in, lra_out, thresh_in, thresh_out, tp_in, tp_out;
int c;
ebur128_loudness_range(s->r128_in, &lra_in);
ebur128_loudness_global(s->r128_in, &i_in);
ebur128_relative_threshold(s->r128_in, &thresh_in);
for (c = 0; c < s->channels; c++) {
double tmp;
ebur128_sample_peak(s->r128_in, c, &tmp);
if ((c == 0) || (tmp > tp_in))
tp_in = tmp;
}
ebur128_loudness_range(s->r128_out, &lra_out);
ebur128_loudness_global(s->r128_out, &i_out);
ebur128_relative_threshold(s->r128_out, &thresh_out);
for (c = 0; c < s->channels; c++) {
double tmp;
ebur128_sample_peak(s->r128_out, c, &tmp);
if ((c == 0) || (tmp > tp_out))
tp_out = tmp;
}
switch(s->print_format) {
case NONE:
break;
case JSON:
av_log(ctx, AV_LOG_INFO,
"\n{\n"
"\t\"input_i\" : \"%.2f\",\n"
"\t\"input_tp\" : \"%.2f\",\n"
"\t\"input_lra\" : \"%.2f\",\n"
"\t\"input_thresh\" : \"%.2f\",\n"
"\t\"output_i\" : \"%.2f\",\n"
"\t\"output_tp\" : \"%+.2f\",\n"
"\t\"output_lra\" : \"%.2f\",\n"
"\t\"output_thresh\" : \"%.2f\",\n"
"\t\"normalization_type\" : \"%s\",\n"
"\t\"target_offset\" : \"%.2f\"\n"
"}\n",
i_in,
20. * log10(tp_in),
lra_in,
thresh_in,
i_out,
20. * log10(tp_out),
lra_out,
thresh_out,
s->frame_type == LINEAR_MODE ? "linear" : "dynamic",
s->target_i - i_out
);
break;
case SUMMARY:
av_log(ctx, AV_LOG_INFO,
"\n"
"Input Integrated: %+6.1f LUFS\n"
"Input True Peak: %+6.1f dBTP\n"
"Input LRA: %6.1f LU\n"
"Input Threshold: %+6.1f LUFS\n"
"\n"
"Output Integrated: %+6.1f LUFS\n"
"Output True Peak: %+6.1f dBTP\n"
"Output LRA: %6.1f LU\n"
"Output Threshold: %+6.1f LUFS\n"
"\n"
"Normalization Type: %s\n"
"Target Offset: %+6.1f LU\n",
i_in,
20. * log10(tp_in),
lra_in,
thresh_in,
i_out,
20. * log10(tp_out),
lra_out,
thresh_out,
s->frame_type == LINEAR_MODE ? "Linear" : "Dynamic",
s->target_i - i_out
);
break;
}
ebur128_destroy(&s->r128_in);
ebur128_destroy(&s->r128_out);
av_freep(&s->limiter_buf);
av_freep(&s->prev_smp);
av_freep(&s->buf);
}
static const AVFilterPad avfilter_af_loudnorm_inputs[] = {
{
.name = "default",
.type = AVMEDIA_TYPE_AUDIO,
.config_props = config_input,
.filter_frame = filter_frame,
},
{ NULL }
};
static const AVFilterPad avfilter_af_loudnorm_outputs[] = {
{
.name = "default",
.request_frame = request_frame,
.type = AVMEDIA_TYPE_AUDIO,
},
{ NULL }
};
AVFilter ff_af_loudnorm = {
.name = "loudnorm",
.description = NULL_IF_CONFIG_SMALL("EBU R128 loudness normalization"),
.priv_size = sizeof(LoudNormContext),
.priv_class = &loudnorm_class,
.query_formats = query_formats,
.uninit = uninit,
.inputs = avfilter_af_loudnorm_inputs,
.outputs = avfilter_af_loudnorm_outputs,
};

View File

@ -108,6 +108,7 @@ void avfilter_register_all(void)
REGISTER_FILTER(HIGHPASS, highpass, af);
REGISTER_FILTER(JOIN, join, af);
REGISTER_FILTER(LADSPA, ladspa, af);
REGISTER_FILTER(LOUDNORM, loudnorm, af);
REGISTER_FILTER(LOWPASS, lowpass, af);
REGISTER_FILTER(PAN, pan, af);
REGISTER_FILTER(REPLAYGAIN, replaygain, af);

View File

@ -30,7 +30,7 @@
#include "libavutil/version.h"
#define LIBAVFILTER_VERSION_MAJOR 6
#define LIBAVFILTER_VERSION_MINOR 45
#define LIBAVFILTER_VERSION_MINOR 46
#define LIBAVFILTER_VERSION_MICRO 100
#define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \