From f5a2d285f94585e6336838fb1efc9c28cad93142 Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Thu, 6 Jan 2011 22:34:12 +0000 Subject: [PATCH] Export dca profile information. Patch by Anssi Hannula anssi d hannula a iki d fi Originally committed as revision 26250 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/avcodec.h | 10 +- libavcodec/dca.c | 354 ++++++++++++++++++++++++++++++++++++++++++- libavcodec/options.c | 5 + 3 files changed, 364 insertions(+), 5 deletions(-) diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index 828b07ac24..7850e1da07 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -32,8 +32,8 @@ #include "libavutil/cpu.h" #define LIBAVCODEC_VERSION_MAJOR 52 -#define LIBAVCODEC_VERSION_MINOR 102 -#define LIBAVCODEC_VERSION_MICRO 0 +#define LIBAVCODEC_VERSION_MINOR 103 +#define LIBAVCODEC_VERSION_MICRO 1 #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ LIBAVCODEC_VERSION_MINOR, \ @@ -2238,6 +2238,12 @@ typedef struct AVCodecContext { #define FF_PROFILE_AAC_SSR 2 #define FF_PROFILE_AAC_LTP 3 +#define FF_PROFILE_DTS 20 +#define FF_PROFILE_DTS_ES 30 +#define FF_PROFILE_DTS_96_24 40 +#define FF_PROFILE_DTS_HD_HRA 50 +#define FF_PROFILE_DTS_HD_MA 60 + #define FF_PROFILE_H264_BASELINE 66 #define FF_PROFILE_H264_MAIN 77 #define FF_PROFILE_H264_EXTENDED 88 diff --git a/libavcodec/dca.c b/libavcodec/dca.c index 83f529ba1a..868fe2f2b5 100644 --- a/libavcodec/dca.c +++ b/libavcodec/dca.c @@ -26,6 +26,7 @@ #include #include +#include "libavutil/common.h" #include "libavutil/intmath.h" #include "libavutil/intreadwrite.h" #include "libavcore/audioconvert.h" @@ -64,6 +65,39 @@ enum DCAMode { DCA_4F2R }; +/* these are unconfirmed but should be mostly correct */ +enum DCAExSSSpeakerMask { + DCA_EXSS_FRONT_CENTER = 0x0001, + DCA_EXSS_FRONT_LEFT_RIGHT = 0x0002, + DCA_EXSS_SIDE_REAR_LEFT_RIGHT = 0x0004, + DCA_EXSS_LFE = 0x0008, + DCA_EXSS_REAR_CENTER = 0x0010, + DCA_EXSS_FRONT_HIGH_LEFT_RIGHT = 0x0020, + DCA_EXSS_REAR_LEFT_RIGHT = 0x0040, + DCA_EXSS_FRONT_HIGH_CENTER = 0x0080, + DCA_EXSS_OVERHEAD = 0x0100, + DCA_EXSS_CENTER_LEFT_RIGHT = 0x0200, + DCA_EXSS_WIDE_LEFT_RIGHT = 0x0400, + DCA_EXSS_SIDE_LEFT_RIGHT = 0x0800, + DCA_EXSS_LFE2 = 0x1000, + DCA_EXSS_SIDE_HIGH_LEFT_RIGHT = 0x2000, + DCA_EXSS_REAR_HIGH_CENTER = 0x4000, + DCA_EXSS_REAR_HIGH_LEFT_RIGHT = 0x8000, +}; + +enum DCAExtensionMask { + DCA_EXT_CORE = 0x001, ///< core in core substream + DCA_EXT_XXCH = 0x002, ///< XXCh channels extension in core substream + DCA_EXT_X96 = 0x004, ///< 96/24 extension in core substream + DCA_EXT_XCH = 0x008, ///< XCh channel extension in core substream + DCA_EXT_EXSS_CORE = 0x010, ///< core in ExSS (extension substream) + DCA_EXT_EXSS_XBR = 0x020, ///< extended bitrate extension in ExSS + DCA_EXT_EXSS_XXCH = 0x040, ///< XXCh channels extension in ExSS + DCA_EXT_EXSS_X96 = 0x080, ///< 96/24 extension in ExSS + DCA_EXT_EXSS_LBR = 0x100, ///< low bitrate component in ExSS + DCA_EXT_EXSS_XLL = 0x200, ///< lossless extension in ExSS +}; + /* Tables for mapping dts channel configurations to libavcodec multichannel api. * Some compromises have been made for special configurations. Most configurations * are never used so complete accuracy is not needed. @@ -183,6 +217,7 @@ static const int8_t dca_channel_reorder_nolfe_xch[][9] = { #define HEADER_SIZE 14 #define DCA_MAX_FRAME_SIZE 16384 +#define DCA_MAX_EXSS_HEADER_SIZE 4096 /** Bit allocation */ typedef struct { @@ -280,7 +315,7 @@ typedef struct { DECLARE_ALIGNED(16, float, samples)[(DCA_PRIM_CHANNELS_MAX+1)*256]; const float *samples_chanptr[DCA_PRIM_CHANNELS_MAX+1]; - uint8_t dca_buffer[DCA_MAX_FRAME_SIZE]; + uint8_t dca_buffer[DCA_MAX_FRAME_SIZE + DCA_MAX_EXSS_HEADER_SIZE]; int dca_buffer_size; ///< how much data is in the dca_buffer const int8_t* channel_order_tab; ///< channel reordering table, lfe and non lfe @@ -293,6 +328,18 @@ typedef struct { int xch_present; int xch_base_channel; ///< index of first (only) channel containing XCH data + /* Other detected extensions in the core substream */ + int xxch_present; + int x96_present; + + /* ExSS header parser */ + int static_fields; ///< static fields present + int mix_metadata; ///< mixing metadata present + int num_mix_configs; ///< number of mix out configurations + int mix_config_num_ch[4]; ///< number of channels in each mix out configuration + + int profile; + int debug_flag; ///< used for suppressing repeated error messages output DSPContext dsp; FFTContext imdct; @@ -1270,6 +1317,278 @@ static int dca_convert_bitstream(const uint8_t * src, int src_size, uint8_t * ds } } +/** + * Return the number of channels in an ExSS speaker mask (HD) + */ +static int dca_exss_mask2count(int mask) +{ + /* count bits that mean speaker pairs twice */ + return av_popcount(mask) + + av_popcount(mask & ( + DCA_EXSS_CENTER_LEFT_RIGHT + | DCA_EXSS_FRONT_LEFT_RIGHT + | DCA_EXSS_FRONT_HIGH_LEFT_RIGHT + | DCA_EXSS_WIDE_LEFT_RIGHT + | DCA_EXSS_SIDE_LEFT_RIGHT + | DCA_EXSS_SIDE_HIGH_LEFT_RIGHT + | DCA_EXSS_SIDE_REAR_LEFT_RIGHT + | DCA_EXSS_REAR_LEFT_RIGHT + | DCA_EXSS_REAR_HIGH_LEFT_RIGHT + )); +} + +/** + * Skip mixing coefficients of a single mix out configuration (HD) + */ +static void dca_exss_skip_mix_coeffs(GetBitContext *gb, int channels, int out_ch) +{ + for (int i = 0; i < channels; i++) { + int mix_map_mask = get_bits(gb, out_ch); + int num_coeffs = av_popcount(mix_map_mask); + skip_bits_long(gb, num_coeffs * 6); + } +} + +/** + * Parse extension substream asset header (HD) + */ +static int dca_exss_parse_asset_header(DCAContext *s) +{ + int header_pos = get_bits_count(&s->gb); + int header_size; + int channels; + int embedded_stereo = 0; + int embedded_6ch = 0; + int drc_code_present; + int extensions_mask; + int i, j; + + if (get_bits_left(&s->gb) < 16) + return -1; + + /* We will parse just enough to get to the extensions bitmask with which + * we can set the profile value. */ + + header_size = get_bits(&s->gb, 9) + 1; + skip_bits(&s->gb, 3); // asset index + + if (s->static_fields) { + if (get_bits1(&s->gb)) + skip_bits(&s->gb, 4); // asset type descriptor + if (get_bits1(&s->gb)) + skip_bits_long(&s->gb, 24); // language descriptor + + if (get_bits1(&s->gb)) { + /* How can one fit 1024 bytes of text here if the maximum value + * for the asset header size field above was 512 bytes? */ + int text_length = get_bits(&s->gb, 10) + 1; + if (get_bits_left(&s->gb) < text_length * 8) + return -1; + skip_bits_long(&s->gb, text_length * 8); // info text + } + + skip_bits(&s->gb, 5); // bit resolution - 1 + skip_bits(&s->gb, 4); // max sample rate code + channels = get_bits(&s->gb, 8) + 1; + + if (get_bits1(&s->gb)) { // 1-to-1 channels to speakers + int spkr_remap_sets; + int spkr_mask_size = 16; + int num_spkrs[7]; + + if (channels > 2) + embedded_stereo = get_bits1(&s->gb); + if (channels > 6) + embedded_6ch = get_bits1(&s->gb); + + if (get_bits1(&s->gb)) { + spkr_mask_size = (get_bits(&s->gb, 2) + 1) << 2; + skip_bits(&s->gb, spkr_mask_size); // spkr activity mask + } + + spkr_remap_sets = get_bits(&s->gb, 3); + + for (i = 0; i < spkr_remap_sets; i++) { + /* std layout mask for each remap set */ + num_spkrs[i] = dca_exss_mask2count(get_bits(&s->gb, spkr_mask_size)); + } + + for (i = 0; i < spkr_remap_sets; i++) { + int num_dec_ch_remaps = get_bits(&s->gb, 5) + 1; + if (get_bits_left(&s->gb) < 0) + return -1; + + for (j = 0; j < num_spkrs[i]; j++) { + int remap_dec_ch_mask = get_bits_long(&s->gb, num_dec_ch_remaps); + int num_dec_ch = av_popcount(remap_dec_ch_mask); + skip_bits_long(&s->gb, num_dec_ch * 5); // remap codes + } + } + + } else { + skip_bits(&s->gb, 3); // representation type + } + } + + drc_code_present = get_bits1(&s->gb); + if (drc_code_present) + get_bits(&s->gb, 8); // drc code + + if (get_bits1(&s->gb)) + skip_bits(&s->gb, 5); // dialog normalization code + + if (drc_code_present && embedded_stereo) + get_bits(&s->gb, 8); // drc stereo code + + if (s->mix_metadata && get_bits1(&s->gb)) { + skip_bits(&s->gb, 1); // external mix + skip_bits(&s->gb, 6); // post mix gain code + + if (get_bits(&s->gb, 2) != 3) // mixer drc code + skip_bits(&s->gb, 3); // drc limit + else + skip_bits(&s->gb, 8); // custom drc code + + if (get_bits1(&s->gb)) // channel specific scaling + for (i = 0; i < s->num_mix_configs; i++) + skip_bits_long(&s->gb, s->mix_config_num_ch[i] * 6); // scale codes + else + skip_bits_long(&s->gb, s->num_mix_configs * 6); // scale codes + + for (i = 0; i < s->num_mix_configs; i++) { + if (get_bits_left(&s->gb) < 0) + return -1; + dca_exss_skip_mix_coeffs(&s->gb, channels, s->mix_config_num_ch[i]); + if (embedded_6ch) + dca_exss_skip_mix_coeffs(&s->gb, 6, s->mix_config_num_ch[i]); + if (embedded_stereo) + dca_exss_skip_mix_coeffs(&s->gb, 2, s->mix_config_num_ch[i]); + } + } + + switch (get_bits(&s->gb, 2)) { + case 0: extensions_mask = get_bits(&s->gb, 12); break; + case 1: extensions_mask = DCA_EXT_EXSS_XLL; break; + case 2: extensions_mask = DCA_EXT_EXSS_LBR; break; + case 3: extensions_mask = 0; /* aux coding */ break; + } + + /* not parsed further, we were only interested in the extensions mask */ + + if (get_bits_left(&s->gb) < 0) + return -1; + + if (get_bits_count(&s->gb) - header_pos > header_size * 8) { + av_log(s->avctx, AV_LOG_WARNING, "Asset header size mismatch.\n"); + return -1; + } + skip_bits_long(&s->gb, header_pos + header_size * 8 - get_bits_count(&s->gb)); + + if (extensions_mask & DCA_EXT_EXSS_XLL) + s->profile = FF_PROFILE_DTS_HD_MA; + else if (extensions_mask & DCA_EXT_EXSS_XBR) + s->profile = FF_PROFILE_DTS_HD_HRA; + else if (extensions_mask & DCA_EXT_EXSS_X96) + s->profile = FF_PROFILE_DTS_96_24; + else if (extensions_mask & DCA_EXT_EXSS_XXCH) + s->profile = FFMAX(s->profile, FF_PROFILE_DTS_ES); + + if (!(extensions_mask & DCA_EXT_CORE)) + av_log(s->avctx, AV_LOG_WARNING, "DTS core detection mismatch.\n"); + if (!!(extensions_mask & DCA_EXT_XCH) != s->xch_present) + av_log(s->avctx, AV_LOG_WARNING, "DTS XCh detection mismatch.\n"); + if (!!(extensions_mask & DCA_EXT_XXCH) != s->xxch_present) + av_log(s->avctx, AV_LOG_WARNING, "DTS XXCh detection mismatch.\n"); + if (!!(extensions_mask & DCA_EXT_X96) != s->x96_present) + av_log(s->avctx, AV_LOG_WARNING, "DTS X96 detection mismatch.\n"); + + return 0; +} + +/** + * Parse extension substream header (HD) + */ +static void dca_exss_parse_header(DCAContext *s) +{ + int ss_index; + int blownup; + int header_size; + int hd_size; + int num_audiop = 1; + int num_assets = 1; + int active_ss_mask[8]; + int i, j; + + if (get_bits_left(&s->gb) < 52) + return; + + skip_bits(&s->gb, 8); // user data + ss_index = get_bits(&s->gb, 2); + + blownup = get_bits1(&s->gb); + header_size = get_bits(&s->gb, 8 + 4 * blownup) + 1; + hd_size = get_bits_long(&s->gb, 16 + 4 * blownup) + 1; + + s->static_fields = get_bits1(&s->gb); + if (s->static_fields) { + skip_bits(&s->gb, 2); // reference clock code + skip_bits(&s->gb, 3); // frame duration code + + if (get_bits1(&s->gb)) + skip_bits_long(&s->gb, 36); // timestamp + + /* a single stream can contain multiple audio assets that can be + * combined to form multiple audio presentations */ + + num_audiop = get_bits(&s->gb, 3) + 1; + if (num_audiop > 1) { + av_log_ask_for_sample(s->avctx, "Multiple DTS-HD audio presentations."); + /* ignore such streams for now */ + return; + } + + num_assets = get_bits(&s->gb, 3) + 1; + if (num_assets > 1) { + av_log_ask_for_sample(s->avctx, "Multiple DTS-HD audio assets."); + /* ignore such streams for now */ + return; + } + + for (i = 0; i < num_audiop; i++) + active_ss_mask[i] = get_bits(&s->gb, ss_index + 1); + + for (i = 0; i < num_audiop; i++) + for (j = 0; j <= ss_index; j++) + if (active_ss_mask[i] & (1 << j)) + skip_bits(&s->gb, 8); // active asset mask + + s->mix_metadata = get_bits1(&s->gb); + if (s->mix_metadata) { + int mix_out_mask_size; + + skip_bits(&s->gb, 2); // adjustment level + mix_out_mask_size = (get_bits(&s->gb, 2) + 1) << 2; + s->num_mix_configs = get_bits(&s->gb, 2) + 1; + + for (i = 0; i < s->num_mix_configs; i++) { + int mix_out_mask = get_bits(&s->gb, mix_out_mask_size); + s->mix_config_num_ch[i] = dca_exss_mask2count(mix_out_mask); + } + } + } + + for (i = 0; i < num_assets; i++) + skip_bits_long(&s->gb, 16 + 4 * blownup); // asset size + + for (i = 0; i < num_assets; i++) { + if (dca_exss_parse_asset_header(s)) + return; + } + + /* not parsed further, we were only interested in the extensions mask + * from the asset header */ +} + /** * Main frame decoding function * FIXME add arguments @@ -1287,10 +1606,15 @@ static int dca_decode_frame(AVCodecContext * avctx, int16_t *samples = data; DCAContext *s = avctx->priv_data; int channels; + int core_ss_end; s->xch_present = 0; - s->dca_buffer_size = dca_convert_bitstream(buf, buf_size, s->dca_buffer, DCA_MAX_FRAME_SIZE); + s->x96_present = 0; + s->xxch_present = 0; + + s->dca_buffer_size = dca_convert_bitstream(buf, buf_size, s->dca_buffer, + DCA_MAX_FRAME_SIZE + DCA_MAX_EXSS_HEADER_SIZE); if (s->dca_buffer_size == -1) { av_log(avctx, AV_LOG_ERROR, "Not a valid DCA frame\n"); return -1; @@ -1306,6 +1630,8 @@ static int dca_decode_frame(AVCodecContext * avctx, avctx->sample_rate = s->sample_rate; avctx->bit_rate = s->bit_rate; + s->profile = FF_PROFILE_DTS; + for (i = 0; i < (s->sample_blocks / 8); i++) { dca_decode_block(s, 0, i); } @@ -1316,7 +1642,9 @@ static int dca_decode_frame(AVCodecContext * avctx, /* extensions start at 32-bit boundaries into bitstream */ skip_bits_long(&s->gb, (-get_bits_count(&s->gb)) & 31); - while(get_bits_left(&s->gb) >= 32) { + core_ss_end = FFMIN(s->frame_size, s->dca_buffer_size) * 8; + + while(core_ss_end - get_bits_count(&s->gb) >= 32) { uint32_t bits = get_bits_long(&s->gb, 32); switch(bits) { @@ -1334,6 +1662,8 @@ static int dca_decode_frame(AVCodecContext * avctx, /* skip length-to-end-of-frame field for the moment */ skip_bits(&s->gb, 10); + s->profile = FFMAX(s->profile, FF_PROFILE_DTS_ES); + /* extension amode should == 1, number of channels in extension */ /* AFAIK XCh is not used for more channels */ if ((ext_amode = get_bits(&s->gb, 4)) != 1) { @@ -1352,6 +1682,14 @@ static int dca_decode_frame(AVCodecContext * avctx, s->xch_present = 1; break; } + case 0x47004a03: + /* XXCh: extended channels */ + /* usually found either in core or HD part in DTS-HD HRA streams, + * but not in DTS-ES which contains XCh extensions instead */ + s->xxch_present = 1; + s->profile = FFMAX(s->profile, FF_PROFILE_DTS_ES); + break; + case 0x1d95f262: { int fsize96 = show_bits(&s->gb, 12) + 1; if (s->frame_size != (get_bits_count(&s->gb) >> 3) - 4 + fsize96) @@ -1361,6 +1699,9 @@ static int dca_decode_frame(AVCodecContext * avctx, skip_bits(&s->gb, 12); av_log(avctx, AV_LOG_DEBUG, "FSIZE96 = %d bytes\n", fsize96); av_log(avctx, AV_LOG_DEBUG, "REVNO = %d\n", get_bits(&s->gb, 4)); + + s->x96_present = 1; + s->profile = FFMAX(s->profile, FF_PROFILE_DTS_96_24); break; } } @@ -1368,6 +1709,13 @@ static int dca_decode_frame(AVCodecContext * avctx, skip_bits_long(&s->gb, (-get_bits_count(&s->gb)) & 31); } + /* check for ExSS (HD part) */ + if (s->dca_buffer_size - s->frame_size > 32 + && get_bits_long(&s->gb, 32) == DCA_HD_MARKER) + dca_exss_parse_header(s); + + avctx->profile = s->profile; + channels = s->prim_channels + !!s->lfe; if (s->amode<16) { diff --git a/libavcodec/options.c b/libavcodec/options.c index e31a007891..369ee5d334 100644 --- a/libavcodec/options.c +++ b/libavcodec/options.c @@ -331,6 +331,11 @@ static const AVOption options[]={ {"aac_low", NULL, 0, FF_OPT_TYPE_CONST, FF_PROFILE_AAC_LOW, INT_MIN, INT_MAX, A|E, "profile"}, {"aac_ssr", NULL, 0, FF_OPT_TYPE_CONST, FF_PROFILE_AAC_SSR, INT_MIN, INT_MAX, A|E, "profile"}, {"aac_ltp", NULL, 0, FF_OPT_TYPE_CONST, FF_PROFILE_AAC_LTP, INT_MIN, INT_MAX, A|E, "profile"}, +{"dts", NULL, 0, FF_OPT_TYPE_CONST, FF_PROFILE_DTS, INT_MIN, INT_MAX, A|E, "profile"}, +{"dts_es", NULL, 0, FF_OPT_TYPE_CONST, FF_PROFILE_DTS_ES, INT_MIN, INT_MAX, A|E, "profile"}, +{"dts_96_24", NULL, 0, FF_OPT_TYPE_CONST, FF_PROFILE_DTS_96_24, INT_MIN, INT_MAX, A|E, "profile"}, +{"dts_hd_hra", NULL, 0, FF_OPT_TYPE_CONST, FF_PROFILE_DTS_HD_HRA, INT_MIN, INT_MAX, A|E, "profile"}, +{"dts_hd_ma", NULL, 0, FF_OPT_TYPE_CONST, FF_PROFILE_DTS_HD_MA, INT_MIN, INT_MAX, A|E, "profile"}, {"level", NULL, OFFSET(level), FF_OPT_TYPE_INT, FF_LEVEL_UNKNOWN, INT_MIN, INT_MAX, V|A|E, "level"}, {"unknown", NULL, 0, FF_OPT_TYPE_CONST, FF_LEVEL_UNKNOWN, INT_MIN, INT_MAX, V|A|E, "level"}, {"lowres", "decode at 1= 1/2, 2=1/4, 3=1/8 resolutions", OFFSET(lowres), FF_OPT_TYPE_INT, 0, 0, INT_MAX, V|A|D},