aacenc: support extended channel layouts using PCEs

This commit implements support for PCE (Program Configuration Elements) in the
AAC encoder, and as such allows for encoding of channel layouts not present
in the presets defined by the spec (which only lists the 8 most common ones).

This has been a highly requested feature and is also the first open source encoder
to support this many layouts.

Many thanks to pkviet <pkv.stream@gmail.com> who implemented support for and
verified all channel layouts.
This commit is contained in:
Rostislav Pehlivanov 2016-10-03 19:53:11 +01:00
parent 0a771e6b32
commit fbf295e2bd
4 changed files with 361 additions and 8 deletions

View File

@ -10,6 +10,7 @@ version <next>:
- Raw AMR-NB and AMR-WB demuxers
- TiVo ty/ty+ demuxer
- Intel QSV-accelerated MJPEG encoding
- PCE support for extended channel layouts in the AAC encoder
version 3.4:

View File

@ -50,6 +50,40 @@
static AVOnce aac_table_init = AV_ONCE_INIT;
static void put_pce(PutBitContext *pb, AVCodecContext *avctx)
{
int i, j;
AACEncContext *s = avctx->priv_data;
AACPCEInfo *pce = &s->pce;
put_bits(pb, 4, 0);
put_bits(pb, 2, avctx->profile);
put_bits(pb, 4, s->samplerate_index);
put_bits(pb, 4, pce->num_ele[0]); /* Front */
put_bits(pb, 4, pce->num_ele[1]); /* Side */
put_bits(pb, 4, pce->num_ele[2]); /* Back */
put_bits(pb, 2, pce->num_ele[3]); /* LFE */
put_bits(pb, 3, 0); /* Assoc data */
put_bits(pb, 4, 0); /* CCs */
put_bits(pb, 1, 0); /* Stereo mixdown */
put_bits(pb, 1, 0); /* Mono mixdown */
put_bits(pb, 1, 0); /* Something else */
for (i = 0; i < 4; i++) {
for (j = 0; j < pce->num_ele[i]; j++) {
if (i < 3)
put_bits(pb, 1, pce->pairing[i][j]);
put_bits(pb, 4, pce->index[i][j]);
}
}
avpriv_align_put_bits(pb);
put_bits(pb, 8, 0);
}
/**
* Make AAC audio config object.
* @see 1.6.2.1 "Syntax - AudioSpecificConfig"
@ -58,7 +92,7 @@ static void put_audio_specific_config(AVCodecContext *avctx)
{
PutBitContext pb;
AACEncContext *s = avctx->priv_data;
int channels = s->channels - (s->channels == 8 ? 1 : 0);
int channels = (!s->needs_pce)*(s->channels - (s->channels == 8 ? 1 : 0));
init_put_bits(&pb, avctx->extradata, avctx->extradata_size);
put_bits(&pb, 5, s->profile+1); //profile
@ -68,6 +102,8 @@ static void put_audio_specific_config(AVCodecContext *avctx)
put_bits(&pb, 1, 0); //frame length - 1024 samples
put_bits(&pb, 1, 0); //does not depend on core coder
put_bits(&pb, 1, 0); //is not extension
if (s->needs_pce)
put_pce(&pb, avctx);
//Explicitly Mark SBR absent
put_bits(&pb, 11, 0x2b7); //sync extension
@ -488,7 +524,7 @@ static void copy_input_samples(AACEncContext *s, const AVFrame *frame)
{
int ch;
int end = 2048 + (frame ? frame->nb_samples : 0);
const uint8_t *channel_map = aac_chan_maps[s->channels - 1];
const uint8_t *channel_map = s->reorder_map;
/* copy and remap input samples */
for (ch = 0; ch < s->channels; ch++) {
@ -920,16 +956,36 @@ static av_cold int aac_encode_init(AVCodecContext *avctx)
/* Constants */
s->last_frame_pb_count = 0;
avctx->extradata_size = 5;
avctx->extradata_size = 20;
avctx->frame_size = 1024;
avctx->initial_padding = 1024;
s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
/* Channel map and unspecified bitrate guessing */
s->channels = avctx->channels;
ERROR_IF(s->channels > AAC_MAX_CHANNELS || s->channels == 7,
"Unsupported number of channels: %d\n", s->channels);
s->chan_map = aac_chan_configs[s->channels-1];
s->needs_pce = 1;
for (i = 0; i < FF_ARRAY_ELEMS(aac_normal_chan_layouts); i++) {
if (avctx->channel_layout == aac_normal_chan_layouts[i]) {
s->needs_pce = s->options.pce;
break;
}
}
if (s->needs_pce) {
for (i = 0; i < FF_ARRAY_ELEMS(aac_pce_configs); i++)
if (avctx->channel_layout == aac_pce_configs[i].layout)
break;
ERROR_IF(i == FF_ARRAY_ELEMS(aac_pce_configs), "Unsupported channel layout\n");
av_log(avctx, AV_LOG_INFO, "Using a PCE to encode channel layout\n");
s->pce = aac_pce_configs[i];
s->reorder_map = s->pce.reorder_map;
s->chan_map = s->pce.config_map;
} else {
s->reorder_map = aac_chan_maps[s->channels - 1];
s->chan_map = aac_chan_configs[s->channels - 1];
}
if (!avctx->bit_rate) {
for (i = 1; i <= s->chan_map[0]; i++) {
avctx->bit_rate += s->chan_map[i] == TYPE_CPE ? 128000 : /* Pair */
@ -1062,6 +1118,7 @@ static const AVOption aacenc_options[] = {
{"aac_tns", "Temporal noise shaping", offsetof(AACEncContext, options.tns), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
{"aac_ltp", "Long term prediction", offsetof(AACEncContext, options.ltp), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
{"aac_pred", "AAC-Main prediction", offsetof(AACEncContext, options.pred), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
{"aac_pce", "Forces the use of PCEs", offsetof(AACEncContext, options.pce), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
{NULL}
};

View File

@ -45,6 +45,7 @@ typedef struct AACEncOptions {
int pns;
int tns;
int ltp;
int pce;
int pred;
int mid_side;
int intensity_stereo;
@ -89,6 +90,286 @@ typedef struct AACQuantizeBandCostCacheEntry {
uint16_t generation;
} AACQuantizeBandCostCacheEntry;
typedef struct AACPCEInfo {
int64_t layout;
int num_ele[4]; ///< front, side, back, lfe
int pairing[3][8]; ///< front, side, back
int index[4][8]; ///< front, side, back, lfe
uint8_t config_map[16]; ///< configs the encoder's channel specific settings
uint8_t reorder_map[16]; ///< maps channels from lavc to aac order
} AACPCEInfo;
/**
* List of PCE (Program Configuration Element) for the channel layouts listed
* in channel_layout.h
*
* For those wishing in the future to add other layouts:
*
* - num_ele: number of elements in each group of front, side, back, lfe channels
* (an element is of type SCE (single channel), CPE (channel pair) for
* the first 3 groups; and is LFE for LFE group).
*
* - pairing: 0 for an SCE element or 1 for a CPE; does not apply to LFE group
*
* - index: there are three independent indices for SCE, CPE and LFE;
* they are incremented irrespective of the group to which the element belongs;
* they are not reset when going from one group to another
*
* Example: for 7.0 channel layout,
* .pairing = { { 1, 0 }, { 1 }, { 1 }, }, (3 CPE and 1 SCE in front group)
* .index = { { 0, 0 }, { 1 }, { 2 }, },
* (index is 0 for the single SCE but goes from 0 to 2 for the CPEs)
*
* The index order impacts the channel ordering. But is otherwise arbitrary
* (the sequence could have been 2, 0, 1 instead of 0, 1, 2).
*
* Spec allows for discontinuous indices, e.g. if one has a total of two SCE,
* SCE.0 SCE.15 is OK per spec; BUT it won't be decoded by our AAC decoder
* which at this time requires that indices fully cover some range starting
* from 0 (SCE.1 SCE.0 is OK but not SCE.0 SCE.15).
*
* - config_map: total number of elements and their types. Beware, the way the
* types are ordered impacts the final channel ordering.
*
* - reorder_map: reorders the channels.
*
*/
static const AACPCEInfo aac_pce_configs[] = {
{
.layout = AV_CH_LAYOUT_MONO,
.num_ele = { 1, 0, 0, 0 },
.pairing = { { 0 }, },
.index = { { 0 }, },
.config_map = { 1, TYPE_SCE, },
.reorder_map = { 0 },
},
{
.layout = AV_CH_LAYOUT_STEREO,
.num_ele = { 1, 0, 0, 0 },
.pairing = { { 1 }, },
.index = { { 0 }, },
.config_map = { 1, TYPE_CPE, },
.reorder_map = { 0, 1 },
},
{
.layout = AV_CH_LAYOUT_2POINT1,
.num_ele = { 1, 0, 0, 1 },
.pairing = { { 1 }, },
.index = { { 0 },{ 0 },{ 0 },{ 0 } },
.config_map = { 2, TYPE_CPE, TYPE_LFE },
.reorder_map = { 0, 1, 2 },
},
{
.layout = AV_CH_LAYOUT_2_1,
.num_ele = { 1, 0, 1, 0 },
.pairing = { { 1 },{ 0 },{ 0 } },
.index = { { 0 },{ 0 },{ 0 }, },
.config_map = { 2, TYPE_CPE, TYPE_SCE },
.reorder_map = { 0, 1, 2 },
},
{
.layout = AV_CH_LAYOUT_SURROUND,
.num_ele = { 2, 0, 0, 0 },
.pairing = { { 1, 0 }, },
.index = { { 0, 0 }, },
.config_map = { 2, TYPE_CPE, TYPE_SCE, },
.reorder_map = { 0, 1, 2 },
},
{
.layout = AV_CH_LAYOUT_3POINT1,
.num_ele = { 2, 0, 0, 1 },
.pairing = { { 1, 0 }, },
.index = { { 0, 0 }, { 0 }, { 0 }, { 0 }, },
.config_map = { 3, TYPE_CPE, TYPE_SCE, TYPE_LFE },
.reorder_map = { 0, 1, 2, 3 },
},
{
.layout = AV_CH_LAYOUT_4POINT0,
.num_ele = { 2, 0, 1, 0 },
.pairing = { { 1, 0 }, { 0 }, { 0 }, },
.index = { { 0, 0 }, { 0 }, { 1 } },
.config_map = { 3, TYPE_CPE, TYPE_SCE, TYPE_SCE },
.reorder_map = { 0, 1, 2, 3 },
},
{
.layout = AV_CH_LAYOUT_4POINT1,
.num_ele = { 2, 1, 1, 0 },
.pairing = { { 1, 0 }, { 0 }, { 0 }, },
.index = { { 0, 0 }, { 1 }, { 2 }, { 0 } },
.config_map = { 4, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_SCE },
.reorder_map = { 0, 1, 2, 3, 4 },
},
{
.layout = AV_CH_LAYOUT_2_2,
.num_ele = { 1, 1, 0, 0 },
.pairing = { { 1 }, { 1 }, },
.index = { { 0 }, { 1 }, },
.config_map = { 2, TYPE_CPE, TYPE_CPE },
.reorder_map = { 0, 1, 2, 3 },
},
{
.layout = AV_CH_LAYOUT_QUAD,
.num_ele = { 1, 0, 1, 0 },
.pairing = { { 1 }, { 0 }, { 1 }, },
.index = { { 0 }, { 0 }, { 1 } },
.config_map = { 2, TYPE_CPE, TYPE_CPE },
.reorder_map = { 0, 1, 2, 3 },
},
{
.layout = AV_CH_LAYOUT_5POINT0,
.num_ele = { 2, 1, 0, 0 },
.pairing = { { 1, 0 }, { 1 }, },
.index = { { 0, 0 }, { 1 } },
.config_map = { 3, TYPE_CPE, TYPE_SCE, TYPE_CPE },
.reorder_map = { 0, 1, 2, 3, 4 },
},
{
.layout = AV_CH_LAYOUT_5POINT1,
.num_ele = { 2, 1, 1, 0 },
.pairing = { { 1, 0 }, { 0 }, { 1 }, },
.index = { { 0, 0 }, { 1 }, { 1 } },
.config_map = { 4, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_CPE },
.reorder_map = { 0, 1, 2, 3, 4, 5 },
},
{
.layout = AV_CH_LAYOUT_5POINT0_BACK,
.num_ele = { 2, 0, 1, 0 },
.pairing = { { 1, 0 }, { 0 }, { 1 } },
.index = { { 0, 0 }, { 0 }, { 1 } },
.config_map = { 3, TYPE_CPE, TYPE_SCE, TYPE_CPE },
.reorder_map = { 0, 1, 2, 3, 4 },
},
{
.layout = AV_CH_LAYOUT_5POINT1_BACK,
.num_ele = { 2, 1, 1, 0 },
.pairing = { { 1, 0 }, { 0 }, { 1 }, },
.index = { { 0, 0 }, { 1 }, { 1 } },
.config_map = { 4, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_CPE },
.reorder_map = { 0, 1, 2, 3, 4, 5 },
},
{
.layout = AV_CH_LAYOUT_6POINT0,
.num_ele = { 2, 1, 1, 0 },
.pairing = { { 1, 0 }, { 1 }, { 0 }, },
.index = { { 0, 0 }, { 1 }, { 1 } },
.config_map = { 4, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_SCE },
.reorder_map = { 0, 1, 2, 3, 4, 5 },
},
{
.layout = AV_CH_LAYOUT_6POINT0_FRONT,
.num_ele = { 2, 1, 0, 0 },
.pairing = { { 1, 1 }, { 1 } },
.index = { { 1, 0 }, { 2 }, },
.config_map = { 3, TYPE_CPE, TYPE_CPE, TYPE_CPE, },
.reorder_map = { 0, 1, 2, 3, 4, 5 },
},
{
.layout = AV_CH_LAYOUT_HEXAGONAL,
.num_ele = { 2, 0, 2, 0 },
.pairing = { { 1, 0 },{ 0 },{ 1, 0 }, },
.index = { { 0, 0 },{ 0 },{ 1, 1 } },
.config_map = { 4, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_SCE, },
.reorder_map = { 0, 1, 2, 3, 4, 5 },
},
{
.layout = AV_CH_LAYOUT_6POINT1,
.num_ele = { 2, 1, 2, 0 },
.pairing = { { 1, 0 },{ 0 },{ 1, 0 }, },
.index = { { 0, 0 },{ 1 },{ 1, 2 } },
.config_map = { 5, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_CPE, TYPE_SCE },
.reorder_map = { 0, 1, 2, 3, 4, 5, 6 },
},
{
.layout = AV_CH_LAYOUT_6POINT1_BACK,
.num_ele = { 2, 1, 2, 0 },
.pairing = { { 1, 0 }, { 0 }, { 1, 0 }, },
.index = { { 0, 0 }, { 1 }, { 1, 2 } },
.config_map = { 5, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_CPE, TYPE_SCE },
.reorder_map = { 0, 1, 2, 3, 4, 5, 6 },
},
{
.layout = AV_CH_LAYOUT_6POINT1_FRONT,
.num_ele = { 2, 1, 2, 0 },
.pairing = { { 1, 0 }, { 0 }, { 1, 0 }, },
.index = { { 0, 0 }, { 1 }, { 1, 2 } },
.config_map = { 5, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_CPE, TYPE_SCE },
.reorder_map = { 0, 1, 2, 3, 4, 5, 6 },
},
{
.layout = AV_CH_LAYOUT_7POINT0,
.num_ele = { 2, 1, 1, 0 },
.pairing = { { 1, 0 }, { 1 }, { 1 }, },
.index = { { 0, 0 }, { 1 }, { 2 }, },
.config_map = { 4, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_CPE },
.reorder_map = { 0, 1, 2, 3, 4, 5, 6 },
},
{
.layout = AV_CH_LAYOUT_7POINT0_FRONT,
.num_ele = { 2, 1, 1, 0 },
.pairing = { { 1, 0 }, { 1 }, { 1 }, },
.index = { { 0, 0 }, { 1 }, { 2 }, },
.config_map = { 4, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_CPE },
.reorder_map = { 0, 1, 2, 3, 4, 5, 6 },
},
{
.layout = AV_CH_LAYOUT_7POINT1,
.num_ele = { 2, 1, 2, 0 },
.pairing = { { 1, 0 }, { 0 }, { 1, 1 }, },
.index = { { 0, 0 }, { 1 }, { 1, 2 }, { 0 } },
.config_map = { 5, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_CPE, TYPE_CPE },
.reorder_map = { 0, 1, 2, 3, 4, 5, 6, 7 },
},
{
.layout = AV_CH_LAYOUT_7POINT1_WIDE,
.num_ele = { 2, 1, 2, 0 },
.pairing = { { 1, 0 }, { 0 },{ 1, 1 }, },
.index = { { 0, 0 }, { 1 }, { 1, 2 }, { 0 } },
.config_map = { 5, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_CPE, TYPE_CPE },
.reorder_map = { 0, 1, 2, 3, 4, 5, 6, 7 },
},
{
.layout = AV_CH_LAYOUT_7POINT1_WIDE_BACK,
.num_ele = { 2, 1, 2, 0 },
.pairing = { { 1, 0 }, { 0 }, { 1, 1 }, },
.index = { { 0, 0 }, { 1 }, { 1, 2 }, { 0 } },
.config_map = { 5, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_CPE, TYPE_CPE },
.reorder_map = { 0, 1, 2, 3, 4, 5, 6, 7 },
},
{
.layout = AV_CH_LAYOUT_OCTAGONAL,
.num_ele = { 2, 1, 2, 0 },
.pairing = { { 1, 0 }, { 1 }, { 1, 0 }, },
.index = { { 0, 0 }, { 1 }, { 2, 1 } },
.config_map = { 5, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_SCE },
.reorder_map = { 0, 1, 2, 3, 4, 5, 6, 7 },
},
{ /* Meant for order 2/mixed ambisonics */
.layout = AV_CH_LAYOUT_OCTAGONAL | AV_CH_TOP_CENTER,
.num_ele = { 2, 2, 2, 0 },
.pairing = { { 1, 0 }, { 1, 0 }, { 1, 0 }, },
.index = { { 0, 0 }, { 1, 1 }, { 2, 2 } },
.config_map = { 6, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_SCE },
.reorder_map = { 0, 1, 2, 3, 4, 5, 6, 7, 8 },
},
{ /* Meant for order 2/mixed ambisonics */
.layout = AV_CH_LAYOUT_6POINT0_FRONT | AV_CH_BACK_CENTER |
AV_CH_BACK_LEFT | AV_CH_BACK_RIGHT | AV_CH_TOP_CENTER,
.num_ele = { 2, 2, 2, 0 },
.pairing = { { 1, 1 }, { 1, 0 }, { 1, 0 }, },
.index = { { 0, 1 }, { 2, 0 }, { 3, 1 } },
.config_map = { 6, TYPE_CPE, TYPE_CPE, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_SCE },
.reorder_map = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 },
},
{
.layout = AV_CH_LAYOUT_HEXADECAGONAL,
.num_ele = { 4, 2, 4, 0 },
.pairing = { { 1, 0, 1, 0 }, { 1, 1 }, { 1, 0, 1, 0 }, },
.index = { { 0, 0, 1, 1 }, { 2, 3 }, { 4, 2, 5, 3 } },
.config_map = { 10, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_SCE },
.reorder_map = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
},
};
/**
* AAC encoder context
*/
@ -99,12 +380,15 @@ typedef struct AACEncContext {
FFTContext mdct1024; ///< long (1024 samples) frame transform context
FFTContext mdct128; ///< short (128 samples) frame transform context
AVFloatDSPContext *fdsp;
float *planar_samples[8]; ///< saved preprocessed input
AACPCEInfo pce; ///< PCE data, if needed
float *planar_samples[16]; ///< saved preprocessed input
int profile; ///< copied from avctx
int needs_pce; ///< flag for non-standard layout
LPCContext lpc; ///< used by TNS
int samplerate_index; ///< MPEG-4 samplerate index
int channels; ///< channel count
const uint8_t *reorder_map; ///< lavc to aac reorder map
const uint8_t *chan_map; ///< channel configuration map
ChannelElement *cpe; ///< channel elements

View File

@ -36,13 +36,24 @@
/** Total number of codebooks, including special ones **/
#define CB_TOT_ALL 15
#define AAC_MAX_CHANNELS 8
#define AAC_MAX_CHANNELS 16
extern const uint8_t *ff_aac_swb_size_1024[];
extern const int ff_aac_swb_size_1024_len;
extern const uint8_t *ff_aac_swb_size_128[];
extern const int ff_aac_swb_size_128_len;
/* Supported layouts without using a PCE */
static const int64_t aac_normal_chan_layouts[7] = {
AV_CH_LAYOUT_MONO,
AV_CH_LAYOUT_STEREO,
AV_CH_LAYOUT_SURROUND,
AV_CH_LAYOUT_4POINT0,
AV_CH_LAYOUT_5POINT0,
AV_CH_LAYOUT_5POINT1,
AV_CH_LAYOUT_7POINT1,
};
/** default channel configurations */
static const uint8_t aac_chan_configs[AAC_MAX_CHANNELS][6] = {
{1, TYPE_SCE}, // 1 channel - single channel element