diff --git a/Changelog b/Changelog index cd73c6d623..14e01f3870 100644 --- a/Changelog +++ b/Changelog @@ -50,6 +50,7 @@ version next: - edge detection filter - framestep filter - ffmpeg -shortest option is now per-output file +- volume measurement filter version 0.11: diff --git a/doc/filters.texi b/doc/filters.texi index 5793100682..8847990eea 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -690,6 +690,46 @@ volume=-12dB @end example @end itemize +@section volumedetect + +Detect the volume of the input video. + +The filter has no parameters. The input is not modified. Statistics about +the volume will be printed in the log when the input stream end is reached. + +In particular it will show the mean volume (root mean square), maximum +volume (on a per-sample basis), and the beginning of an histogram of the +registered volume values (from the maximum value to a cumulated 1/1000 of +the samples). + +All volumes are in decibels relative to the maximum PCM value. + +Here is an excerpt of the output: +@example +[Parsed_volumedetect_0 @ 0xa23120] mean_volume: -27 dB +[Parsed_volumedetect_0 @ 0xa23120] max_volume: -4 dB +[Parsed_volumedetect_0 @ 0xa23120] histogram_4db: 6 +[Parsed_volumedetect_0 @ 0xa23120] histogram_5db: 62 +[Parsed_volumedetect_0 @ 0xa23120] histogram_6db: 286 +[Parsed_volumedetect_0 @ 0xa23120] histogram_7db: 1042 +[Parsed_volumedetect_0 @ 0xa23120] histogram_8db: 2551 +[Parsed_volumedetect_0 @ 0xa23120] histogram_9db: 4609 +[Parsed_volumedetect_0 @ 0xa23120] histogram_10db: 8409 +@end example + +It means that: +@itemize +@item +The mean square energy is approximately -27 dB, or 10^-2.7. +@item +The largest sample is at -4 dB, or more precisely between -4 dB and -5 dB. +@item +There are 6 samples at -4 dB, 62 at -5 dB, 286 at -6 dB, etc. +@end itemize + +In other words, raising the volume by +4 dB does not cause any clipping, +raising it by +5 dB causes clipping for 6 samples, etc. + @section asyncts Synchronize audio data with timestamps by squeezing/stretching it and/or dropping samples/adding silence when needed. diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 916e54aac8..af4fde637e 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -67,6 +67,7 @@ OBJS-$(CONFIG_PAN_FILTER) += af_pan.o OBJS-$(CONFIG_RESAMPLE_FILTER) += af_resample.o OBJS-$(CONFIG_SILENCEDETECT_FILTER) += af_silencedetect.o OBJS-$(CONFIG_VOLUME_FILTER) += af_volume.o +OBJS-$(CONFIG_VOLUMEDETECT_FILTER) += af_volumedetect.o OBJS-$(CONFIG_AEVALSRC_FILTER) += asrc_aevalsrc.o OBJS-$(CONFIG_ANULLSRC_FILTER) += asrc_anullsrc.o diff --git a/libavfilter/af_volumedetect.c b/libavfilter/af_volumedetect.c new file mode 100644 index 0000000000..caf8559889 --- /dev/null +++ b/libavfilter/af_volumedetect.c @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2012 Nicolas George + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with FFmpeg; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/audioconvert.h" +#include "libavutil/avassert.h" +#include "audio.h" +#include "avfilter.h" +#include "internal.h" + +typedef struct { + /** + * Number of samples at each PCM value. + * histogram[0x8000 + i] is the number of samples at value i. + * The extra element is there for symmetry. + */ + uint64_t histogram[0x10001]; +} VolDetectContext; + +static int query_formats(AVFilterContext *ctx) +{ + enum AVSampleFormat sample_fmts[] = { + AV_SAMPLE_FMT_S16, + AV_SAMPLE_FMT_S16P, + AV_SAMPLE_FMT_NONE + }; + AVFilterFormats *formats; + + if (!(formats = ff_make_format_list(sample_fmts))) + return AVERROR(ENOMEM); + ff_set_common_formats(ctx, formats); + + return 0; +} + +static int filter_samples(AVFilterLink *inlink, AVFilterBufferRef *samples) +{ + AVFilterContext *ctx = inlink->dst; + VolDetectContext *vd = ctx->priv; + int64_t layout = samples->audio->channel_layout; + int nb_samples = samples->audio->nb_samples; + int nb_channels = av_get_channel_layout_nb_channels(layout); + int nb_planes = nb_planes; + int plane, i; + int16_t *pcm; + + if (!av_sample_fmt_is_planar(samples->format)) { + nb_samples *= nb_channels; + nb_planes = 1; + } + for (plane = 0; plane < nb_planes; plane++) { + pcm = (int16_t *)samples->extended_data[plane]; + for (i = 0; i < nb_samples; i++) + vd->histogram[pcm[i] + 0x8000]++; + } + + return ff_filter_samples(inlink->dst->outputs[0], samples); +} + +#define MAX_DB 91 + +static inline double logdb(uint64_t v) +{ + double d = v / (double)(0x8000 * 0x8000); + if (!v) + return MAX_DB; + return log(d) * -4.3429448190325182765112891891660508229; /* -10/log(10) */ +} + +static void print_stats(AVFilterContext *ctx) +{ + VolDetectContext *vd = ctx->priv; + int i, max_volume, shift; + uint64_t nb_samples = 0, power = 0, nb_samples_shift = 0, sum = 0; + uint64_t histdb[MAX_DB + 1] = { 0 }; + + for (i = 0; i < 0x10000; i++) + nb_samples += vd->histogram[i]; + av_log(ctx, AV_LOG_INFO, "n_samples: %"PRId64"\n", nb_samples); + if (!nb_samples) + return; + + /* If nb_samples > 1<<34, there is a risk of overflow in the + multiplication or the sum: shift all histogram values to avoid that. + The total number of samples must be recomputed to avoid rounding + errors. */ + shift = av_log2(nb_samples >> 33); + for (i = 0; i < 0x10000; i++) { + nb_samples_shift += vd->histogram[i] >> shift; + power += (i - 0x8000) * (i - 0x8000) * (vd->histogram[i] >> shift); + } + if (!nb_samples_shift) + return; + power = (power + nb_samples_shift / 2) / nb_samples_shift; + av_assert0(power <= 0x8000 * 0x8000); + av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(power)); + + max_volume = 0x8000; + while (max_volume > 0 && !vd->histogram[0x8000 + max_volume] && + !vd->histogram[0x8000 - max_volume]) + max_volume--; + av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb(max_volume * max_volume)); + + for (i = 0; i < 0x10000; i++) + histdb[(int)logdb((i - 0x8000) * (i - 0x8000))] += vd->histogram[i]; + for (i = 0; i <= MAX_DB && !histdb[i]; i++); + for (; i <= MAX_DB && sum < nb_samples / 1000; i++) { + av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", i, histdb[i]); + sum += histdb[i]; + } +} + +static int request_frame(AVFilterLink *outlink) +{ + AVFilterContext *ctx = outlink->src; + int ret = ff_request_frame(ctx->inputs[0]); + if (ret == AVERROR_EOF) + print_stats(ctx); + return ret; +} + +AVFilter avfilter_af_volumedetect = { + .name = "volumedetect", + .description = NULL_IF_CONFIG_SMALL("Detect audio volume."), + + .priv_size = sizeof(VolDetectContext), + .query_formats = query_formats, + + .inputs = (const AVFilterPad[]) { + { .name = "default", + .type = AVMEDIA_TYPE_AUDIO, + .get_audio_buffer = ff_null_get_audio_buffer, + .filter_samples = filter_samples, + .min_perms = AV_PERM_READ, }, + { .name = NULL } + }, + .outputs = (const AVFilterPad[]) { + { .name = "default", + .type = AVMEDIA_TYPE_AUDIO, + .request_frame = request_frame, }, + { .name = NULL } + }, +}; diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index a9344c2137..6defed4cf6 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -57,6 +57,7 @@ void avfilter_register_all(void) REGISTER_FILTER (PAN, pan, af); REGISTER_FILTER (SILENCEDETECT, silencedetect, af); REGISTER_FILTER (VOLUME, volume, af); + REGISTER_FILTER (VOLUMEDETECT,volumedetect,af); REGISTER_FILTER (RESAMPLE, resample, af); REGISTER_FILTER (AEVALSRC, aevalsrc, asrc);