diff --git a/Changelog b/Changelog index 2dac228957..0348ff73fb 100644 --- a/Changelog +++ b/Changelog @@ -25,6 +25,7 @@ version : - On2 AVC (Audio for Video) decoder - support for decoding through DXVA2 in avconv - libbs2b-based stereo-to-binaural audio filter +- native Opus decoder version 10: diff --git a/configure b/configure index 1e9a54c654..c27bc75616 100755 --- a/configure +++ b/configure @@ -1822,6 +1822,7 @@ nellymoser_decoder_select="mdct sinewin" nellymoser_encoder_select="audio_frame_queue mdct sinewin" nuv_decoder_select="dsputil lzo" on2avc_decoder_select="mdct" +opus_decoder_deps="avresample" png_decoder_deps="zlib" png_encoder_deps="zlib" png_encoder_select="dsputil" @@ -4387,6 +4388,8 @@ enabled movie_filter && prepend avfilter_deps "avformat avcodec" enabled resample_filter && prepend avfilter_deps "avresample" enabled scale_filter && prepend avfilter_deps "swscale" +enabled opus_decoder && prepend avcodec_deps "avresample" + expand_deps(){ lib_deps=${1}_deps eval "deps=\$$lib_deps" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index f226484607..60c3f326bd 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -279,6 +279,9 @@ OBJS-$(CONFIG_NELLYMOSER_DECODER) += nellymoserdec.o nellymoser.o OBJS-$(CONFIG_NELLYMOSER_ENCODER) += nellymoserenc.o nellymoser.o OBJS-$(CONFIG_NUV_DECODER) += nuv.o rtjpeg.o OBJS-$(CONFIG_ON2AVC_DECODER) += on2avc.o on2avcdata.o +OBJS-$(CONFIG_OPUS_DECODER) += opusdec.o opus.o opus_celt.o \ + opus_imdct.o opus_silk.o \ + vorbis_data.o OBJS-$(CONFIG_PAF_VIDEO_DECODER) += paf.o OBJS-$(CONFIG_PAF_AUDIO_DECODER) += paf.o OBJS-$(CONFIG_PAM_DECODER) += pnmdec.o pnm.o @@ -653,6 +656,7 @@ OBJS-$(CONFIG_MPEGAUDIO_PARSER) += mpegaudio_parser.o \ mpegaudiodecheader.o mpegaudiodata.o OBJS-$(CONFIG_MPEGVIDEO_PARSER) += mpegvideo_parser.o \ mpeg12.o mpeg12data.o +OBJS-$(CONFIG_OPUS_PARSER) += opus_parser.o opus.o vorbis_data.o OBJS-$(CONFIG_PNG_PARSER) += png_parser.o OBJS-$(CONFIG_PNM_PARSER) += pnm_parser.o pnm.o OBJS-$(CONFIG_RV30_PARSER) += rv34_parser.o diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c index 9f4fd2531e..bd74e0ba37 100644 --- a/libavcodec/allcodecs.c +++ b/libavcodec/allcodecs.c @@ -331,6 +331,7 @@ void avcodec_register_all(void) REGISTER_DECODER(MPC8, mpc8); REGISTER_ENCDEC (NELLYMOSER, nellymoser); REGISTER_DECODER(ON2AVC, on2avc); + REGISTER_DECODER(OPUS, opus); REGISTER_DECODER(PAF_AUDIO, paf_audio); REGISTER_DECODER(QCELP, qcelp); REGISTER_DECODER(QDM2, qdm2); @@ -483,6 +484,7 @@ void avcodec_register_all(void) REGISTER_PARSER(MPEG4VIDEO, mpeg4video); REGISTER_PARSER(MPEGAUDIO, mpegaudio); REGISTER_PARSER(MPEGVIDEO, mpegvideo); + REGISTER_PARSER(OPUS, opus); REGISTER_PARSER(PNG, png); REGISTER_PARSER(PNM, pnm); REGISTER_PARSER(RV30, rv30); diff --git a/libavcodec/opus.c b/libavcodec/opus.c new file mode 100644 index 0000000000..91021ce65a --- /dev/null +++ b/libavcodec/opus.c @@ -0,0 +1,428 @@ +/* + * Copyright (c) 2012 Andrew D'Addesio + * Copyright (c) 2013-2014 Mozilla Corporation + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Opus decoder/parser shared code + */ + +#include + +#include "libavutil/error.h" + +#include "opus.h" +#include "vorbis.h" + +static const uint16_t opus_frame_duration[32] = { + 480, 960, 1920, 2880, + 480, 960, 1920, 2880, + 480, 960, 1920, 2880, + 480, 960, + 480, 960, + 120, 240, 480, 960, + 120, 240, 480, 960, + 120, 240, 480, 960, + 120, 240, 480, 960, +}; + +/** + * Read a 1- or 2-byte frame length + */ +static inline int xiph_lacing_16bit(const uint8_t **ptr, const uint8_t *end) +{ + int val; + + if (*ptr >= end) + return AVERROR_INVALIDDATA; + val = *(*ptr)++; + if (val >= 252) { + if (*ptr >= end) + return AVERROR_INVALIDDATA; + val += 4 * *(*ptr)++; + } + return val; +} + +/** + * Read a multi-byte length (used for code 3 packet padding size) + */ +static inline int xiph_lacing_full(const uint8_t **ptr, const uint8_t *end) +{ + int val = 0; + int next; + + while (1) { + if (*ptr >= end || val > INT_MAX - 254) + return AVERROR_INVALIDDATA; + next = *(*ptr)++; + val += next; + if (next < 255) + break; + else + val--; + } + return val; +} + +/** + * Parse Opus packet info from raw packet data + */ +int ff_opus_parse_packet(OpusPacket *pkt, const uint8_t *buf, int buf_size, + int self_delimiting) +{ + const uint8_t *ptr = buf; + const uint8_t *end = buf + buf_size; + int padding = 0; + int frame_bytes, i; + + if (buf_size < 1) + goto fail; + + /* TOC byte */ + i = *ptr++; + pkt->code = (i ) & 0x3; + pkt->stereo = (i >> 2) & 0x1; + pkt->config = (i >> 3) & 0x1F; + + /* code 2 and code 3 packets have at least 1 byte after the TOC */ + if (pkt->code >= 2 && buf_size < 2) + goto fail; + + switch (pkt->code) { + case 0: + /* 1 frame */ + pkt->frame_count = 1; + pkt->vbr = 0; + + if (self_delimiting) { + int len = xiph_lacing_16bit(&ptr, end); + if (len < 0 || len > end - ptr) + goto fail; + end = ptr + len; + buf_size = end - buf; + } + + frame_bytes = end - ptr; + if (frame_bytes > MAX_FRAME_SIZE) + goto fail; + pkt->frame_offset[0] = ptr - buf; + pkt->frame_size[0] = frame_bytes; + break; + case 1: + /* 2 frames, equal size */ + pkt->frame_count = 2; + pkt->vbr = 0; + + if (self_delimiting) { + int len = xiph_lacing_16bit(&ptr, end); + if (len < 0 || 2 * len > end - ptr) + goto fail; + end = ptr + 2 * len; + buf_size = end - buf; + } + + frame_bytes = end - ptr; + if (frame_bytes & 1 || frame_bytes >> 1 > MAX_FRAME_SIZE) + goto fail; + pkt->frame_offset[0] = ptr - buf; + pkt->frame_size[0] = frame_bytes >> 1; + pkt->frame_offset[1] = pkt->frame_offset[0] + pkt->frame_size[0]; + pkt->frame_size[1] = frame_bytes >> 1; + break; + case 2: + /* 2 frames, different sizes */ + pkt->frame_count = 2; + pkt->vbr = 1; + + /* read 1st frame size */ + frame_bytes = xiph_lacing_16bit(&ptr, end); + if (frame_bytes < 0) + goto fail; + + if (self_delimiting) { + int len = xiph_lacing_16bit(&ptr, end); + if (len < 0 || len + frame_bytes > end - ptr) + goto fail; + end = ptr + frame_bytes + len; + buf_size = end - buf; + } + + pkt->frame_offset[0] = ptr - buf; + pkt->frame_size[0] = frame_bytes; + + /* calculate 2nd frame size */ + frame_bytes = end - ptr - pkt->frame_size[0]; + if (frame_bytes < 0 || frame_bytes > MAX_FRAME_SIZE) + goto fail; + pkt->frame_offset[1] = pkt->frame_offset[0] + pkt->frame_size[0]; + pkt->frame_size[1] = frame_bytes; + break; + case 3: + /* 1 to 48 frames, can be different sizes */ + i = *ptr++; + pkt->frame_count = (i ) & 0x3F; + padding = (i >> 6) & 0x01; + pkt->vbr = (i >> 7) & 0x01; + + if (pkt->frame_count == 0 || pkt->frame_count > MAX_FRAMES) + goto fail; + + /* read padding size */ + if (padding) { + padding = xiph_lacing_full(&ptr, end); + if (padding < 0) + goto fail; + } + + /* read frame sizes */ + if (pkt->vbr) { + /* for VBR, all frames except the final one have their size coded + in the bitstream. the last frame size is implicit. */ + int total_bytes = 0; + for (i = 0; i < pkt->frame_count - 1; i++) { + frame_bytes = xiph_lacing_16bit(&ptr, end); + if (frame_bytes < 0) + goto fail; + pkt->frame_size[i] = frame_bytes; + total_bytes += frame_bytes; + } + + if (self_delimiting) { + int len = xiph_lacing_16bit(&ptr, end); + if (len < 0 || len + total_bytes + padding > end - ptr) + goto fail; + end = ptr + total_bytes + len + padding; + buf_size = end - buf; + } + + frame_bytes = end - ptr - padding; + if (total_bytes > frame_bytes) + goto fail; + pkt->frame_offset[0] = ptr - buf; + for (i = 1; i < pkt->frame_count; i++) + pkt->frame_offset[i] = pkt->frame_offset[i-1] + pkt->frame_size[i-1]; + pkt->frame_size[pkt->frame_count-1] = frame_bytes - total_bytes; + } else { + /* for CBR, the remaining packet bytes are divided evenly between + the frames */ + if (self_delimiting) { + frame_bytes = xiph_lacing_16bit(&ptr, end); + if (frame_bytes < 0 || pkt->frame_count * frame_bytes + padding > end - ptr) + goto fail; + end = ptr + pkt->frame_count * frame_bytes + padding; + buf_size = end - buf; + } else { + frame_bytes = end - ptr - padding; + if (frame_bytes % pkt->frame_count || + frame_bytes / pkt->frame_count > MAX_FRAME_SIZE) + goto fail; + frame_bytes /= pkt->frame_count; + } + + pkt->frame_offset[0] = ptr - buf; + pkt->frame_size[0] = frame_bytes; + for (i = 1; i < pkt->frame_count; i++) { + pkt->frame_offset[i] = pkt->frame_offset[i-1] + pkt->frame_size[i-1]; + pkt->frame_size[i] = frame_bytes; + } + } + } + + pkt->packet_size = buf_size; + pkt->data_size = pkt->packet_size - padding; + + /* total packet duration cannot be larger than 120ms */ + pkt->frame_duration = opus_frame_duration[pkt->config]; + if (pkt->frame_duration * pkt->frame_count > MAX_PACKET_DUR) + goto fail; + + /* set mode and bandwidth */ + if (pkt->config < 12) { + pkt->mode = OPUS_MODE_SILK; + pkt->bandwidth = pkt->config >> 2; + } else if (pkt->config < 16) { + pkt->mode = OPUS_MODE_HYBRID; + pkt->bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND + (pkt->config >= 14); + } else { + pkt->mode = OPUS_MODE_CELT; + pkt->bandwidth = (pkt->config - 16) >> 2; + /* skip mediumband */ + if (pkt->bandwidth) + pkt->bandwidth++; + } + + return 0; + +fail: + memset(pkt, 0, sizeof(*pkt)); + return AVERROR_INVALIDDATA; +} + +static int channel_reorder_vorbis(int nb_channels, int channel_idx) +{ + return ff_vorbis_channel_layout_offsets[nb_channels - 1][channel_idx]; +} + +static int channel_reorder_unknown(int nb_channels, int channel_idx) +{ + return channel_idx; +} + +av_cold int ff_opus_parse_extradata(AVCodecContext *avctx, + OpusContext *s) +{ + static const uint8_t default_channel_map[2] = { 0, 1 }; + uint8_t default_extradata[19] = { + 'O', 'p', 'u', 's', 'H', 'e', 'a', 'd', + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + + int (*channel_reorder)(int, int) = channel_reorder_unknown; + + const uint8_t *extradata, *channel_map; + int extradata_size; + int version, channels, map_type, streams, stereo_streams, i, j; + uint64_t layout; + + if (!avctx->extradata) { + if (avctx->channels > 2) { + av_log(avctx, AV_LOG_ERROR, + "Multichannel configuration without extradata.\n"); + return AVERROR(EINVAL); + } + default_extradata[9] = (avctx->channels == 1) ? 1 : 2; + extradata = default_extradata; + extradata_size = sizeof(default_extradata); + } else { + extradata = avctx->extradata; + extradata_size = avctx->extradata_size; + } + + if (extradata_size < 19) { + av_log(avctx, AV_LOG_ERROR, "Invalid extradata size: %d\n", + extradata_size); + return AVERROR_INVALIDDATA; + } + + version = extradata[8]; + if (version > 15) { + avpriv_request_sample(avctx, "Extradata version %d", version); + return AVERROR_PATCHWELCOME; + } + + avctx->delay = AV_RL16(extradata + 10); + + channels = extradata[9]; + if (!channels) { + av_log(avctx, AV_LOG_ERROR, "Zero channel count specified in the extadata\n"); + return AVERROR_INVALIDDATA; + } + + s->gain_i = AV_RL16(extradata + 16); + if (s->gain_i) + s->gain = pow(10, s->gain_i / (20.0 * 256)); + + map_type = extradata[18]; + if (!map_type) { + if (channels > 2) { + av_log(avctx, AV_LOG_ERROR, + "Channel mapping 0 is only specified for up to 2 channels\n"); + return AVERROR_INVALIDDATA; + } + layout = (channels == 1) ? AV_CH_LAYOUT_MONO : AV_CH_LAYOUT_STEREO; + streams = 1; + stereo_streams = channels - 1; + channel_map = default_channel_map; + } else if (map_type == 1 || map_type == 255) { + if (extradata_size < 21 + channels) { + av_log(avctx, AV_LOG_ERROR, "Invalid extradata size: %d\n", + extradata_size); + return AVERROR_INVALIDDATA; + } + + streams = extradata[19]; + stereo_streams = extradata[20]; + if (!streams || stereo_streams > streams || + streams + stereo_streams > 255) { + av_log(avctx, AV_LOG_ERROR, + "Invalid stream/stereo stream count: %d/%d\n", streams, stereo_streams); + return AVERROR_INVALIDDATA; + } + + if (map_type == 1) { + if (channels > 8) { + av_log(avctx, AV_LOG_ERROR, + "Channel mapping 1 is only specified for up to 8 channels\n"); + return AVERROR_INVALIDDATA; + } + layout = ff_vorbis_channel_layouts[channels - 1]; + channel_reorder = channel_reorder_vorbis; + } else + layout = 0; + + channel_map = extradata + 21; + } else { + avpriv_request_sample(avctx, "Mapping type %d", map_type); + return AVERROR_PATCHWELCOME; + } + + s->channel_maps = av_mallocz_array(channels, sizeof(*s->channel_maps)); + if (!s->channel_maps) + return AVERROR(ENOMEM); + + for (i = 0; i < channels; i++) { + ChannelMap *map = &s->channel_maps[i]; + uint8_t idx = channel_map[channel_reorder(channels, i)]; + + if (idx == 255) { + map->silence = 1; + continue; + } else if (idx >= streams + stereo_streams) { + av_log(avctx, AV_LOG_ERROR, + "Invalid channel map for output channel %d: %d\n", i, idx); + return AVERROR_INVALIDDATA; + } + + /* check that we din't see this index yet */ + map->copy = 0; + for (j = 0; j < i; j++) + if (channel_map[channel_reorder(channels, j)] == idx) { + map->copy = 1; + map->copy_idx = j; + break; + } + + if (idx < 2 * stereo_streams) { + map->stream_idx = idx / 2; + map->channel_idx = idx & 1; + } else { + map->stream_idx = idx - stereo_streams; + map->channel_idx = 0; + } + } + + avctx->channels = channels; + avctx->channel_layout = layout; + s->nb_streams = streams; + s->nb_stereo_streams = stereo_streams; + + return 0; +} diff --git a/libavcodec/opus.h b/libavcodec/opus.h new file mode 100644 index 0000000000..ab2975fa22 --- /dev/null +++ b/libavcodec/opus.h @@ -0,0 +1,429 @@ +/* + * Opus decoder/demuxer common functions + * Copyright (c) 2012 Andrew D'Addesio + * Copyright (c) 2013-2014 Mozilla Corporation + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_OPUS_H +#define AVCODEC_OPUS_H + +#include + +#include "libavutil/audio_fifo.h" +#include "libavutil/float_dsp.h" +#include "libavutil/frame.h" + +#include "libavresample/avresample.h" + +#include "avcodec.h" +#include "get_bits.h" + +#define MAX_FRAME_SIZE 1275 +#define MAX_FRAMES 48 +#define MAX_PACKET_DUR 5760 + +#define CELT_SHORT_BLOCKSIZE 120 +#define CELT_OVERLAP CELT_SHORT_BLOCKSIZE +#define CELT_MAX_LOG_BLOCKS 3 +#define CELT_MAX_FRAME_SIZE (CELT_SHORT_BLOCKSIZE * (1 << CELT_MAX_LOG_BLOCKS)) +#define CELT_MAX_BANDS 21 +#define CELT_VECTORS 11 +#define CELT_ALLOC_STEPS 6 +#define CELT_FINE_OFFSET 21 +#define CELT_MAX_FINE_BITS 8 +#define CELT_NORM_SCALE 16384 +#define CELT_QTHETA_OFFSET 4 +#define CELT_QTHETA_OFFSET_TWOPHASE 16 +#define CELT_DEEMPH_COEFF 0.85000610f +#define CELT_POSTFILTER_MINPERIOD 15 +#define CELT_ENERGY_SILENCE (-28.0f) + +#define SILK_HISTORY 322 +#define SILK_MAX_LPC 16 + +#define ROUND_MULL(a,b,s) (((MUL64(a, b) >> (s - 1)) + 1) >> 1) +#define ROUND_MUL16(a,b) ((MUL16(a, b) + 16384) >> 15) +#define opus_ilog(i) (av_log2(i) + !!(i)) + +enum OpusMode { + OPUS_MODE_SILK, + OPUS_MODE_HYBRID, + OPUS_MODE_CELT +}; + +enum OpusBandwidth { + OPUS_BANDWIDTH_NARROWBAND, + OPUS_BANDWIDTH_MEDIUMBAND, + OPUS_BANDWIDTH_WIDEBAND, + OPUS_BANDWIDTH_SUPERWIDEBAND, + OPUS_BANDWIDTH_FULLBAND +}; + +typedef struct RawBitsContext { + const uint8_t *position; + unsigned int bytes; + unsigned int cachelen; + unsigned int cacheval; +} RawBitsContext; + +typedef struct OpusRangeCoder { + GetBitContext gb; + RawBitsContext rb; + unsigned int range; + unsigned int value; + unsigned int total_read_bits; +} OpusRangeCoder; + +typedef struct SilkContext SilkContext; + +typedef struct CeltIMDCTContext CeltIMDCTContext; + +typedef struct CeltContext CeltContext; + +typedef struct OpusPacket { + int packet_size; /** packet size */ + int data_size; /** size of the useful data -- packet size - padding */ + int code; /** packet code: specifies the frame layout */ + int stereo; /** whether this packet is mono or stereo */ + int vbr; /** vbr flag */ + int config; /** configuration: tells the audio mode, + ** bandwidth, and frame duration */ + int frame_count; /** frame count */ + int frame_offset[MAX_FRAMES]; /** frame offsets */ + int frame_size[MAX_FRAMES]; /** frame sizes */ + int frame_duration; /** frame duration, in samples @ 48kHz */ + enum OpusMode mode; /** mode */ + enum OpusBandwidth bandwidth; /** bandwidth */ +} OpusPacket; + +typedef struct OpusStreamContext { + AVCodecContext *avctx; + int output_channels; + + OpusRangeCoder rc; + OpusRangeCoder redundancy_rc; + SilkContext *silk; + CeltContext *celt; + AVFloatDSPContext *fdsp; + + float silk_buf[2][960]; + float *silk_output[2]; + DECLARE_ALIGNED(32, float, celt_buf)[2][960]; + float *celt_output[2]; + + float redundancy_buf[2][960]; + float *redundancy_output[2]; + + /* data buffers for the final output data */ + float *out[2]; + int out_size; + + float *out_dummy; + int out_dummy_allocated_size; + + AVAudioResampleContext *avr; + AVAudioFifo *celt_delay; + int silk_samplerate; + /* number of samples we still want to get from the resampler */ + int delayed_samples; + + OpusPacket packet; + + int redundancy_idx; +} OpusStreamContext; + +// a mapping between an opus stream and an output channel +typedef struct ChannelMap { + int stream_idx; + int channel_idx; + + // when a single decoded channel is mapped to multiple output channels, we + // write to the first output directly and copy from it to the others + // this field is set to 1 for those copied output channels + int copy; + // this is the index of the output channel to copy from + int copy_idx; + + // this channel is silent + int silence; +} ChannelMap; + +typedef struct OpusContext { + OpusStreamContext *streams; + int nb_streams; + int nb_stereo_streams; + + AVFloatDSPContext fdsp; + int16_t gain_i; + float gain; + + ChannelMap *channel_maps; +} OpusContext; + +static av_always_inline void opus_rc_normalize(OpusRangeCoder *rc) +{ + while (rc->range <= 1<<23) { + rc->value = ((rc->value << 8) | (get_bits(&rc->gb, 8) ^ 0xFF)) & ((1u << 31) - 1); + rc->range <<= 8; + rc->total_read_bits += 8; + } +} + +static av_always_inline void opus_rc_update(OpusRangeCoder *rc, unsigned int scale, + unsigned int low, unsigned int high, + unsigned int total) +{ + rc->value -= scale * (total - high); + rc->range = low ? scale * (high - low) + : rc->range - scale * (total - high); + opus_rc_normalize(rc); +} + +static av_always_inline unsigned int opus_rc_getsymbol(OpusRangeCoder *rc, const uint16_t *cdf) +{ + unsigned int k, scale, total, symbol, low, high; + + total = *cdf++; + + scale = rc->range / total; + symbol = rc->value / scale + 1; + symbol = total - FFMIN(symbol, total); + + for (k = 0; cdf[k] <= symbol; k++); + high = cdf[k]; + low = k ? cdf[k-1] : 0; + + opus_rc_update(rc, scale, low, high, total); + + return k; +} + +static av_always_inline unsigned int opus_rc_p2model(OpusRangeCoder *rc, unsigned int bits) +{ + unsigned int k, scale; + scale = rc->range >> bits; // in this case, scale = symbol + + if (rc->value >= scale) { + rc->value -= scale; + rc->range -= scale; + k = 0; + } else { + rc->range = scale; + k = 1; + } + opus_rc_normalize(rc); + return k; +} + +/** + * CELT: estimate bits of entropy that have thus far been consumed for the + * current CELT frame, to integer and fractional (1/8th bit) precision + */ +static av_always_inline unsigned int opus_rc_tell(const OpusRangeCoder *rc) +{ + return rc->total_read_bits - av_log2(rc->range) - 1; +} + +static av_always_inline unsigned int opus_rc_tell_frac(const OpusRangeCoder *rc) +{ + unsigned int i, total_bits, rcbuffer, range; + + total_bits = rc->total_read_bits << 3; + rcbuffer = av_log2(rc->range) + 1; + range = rc->range >> (rcbuffer-16); + + for (i = 0; i < 3; i++) { + int bit; + range = range * range >> 15; + bit = range >> 16; + rcbuffer = rcbuffer << 1 | bit; + range >>= bit; + } + + return total_bits - rcbuffer; +} + +/** + * CELT: read 1-25 raw bits at the end of the frame, backwards byte-wise + */ +static av_always_inline unsigned int opus_getrawbits(OpusRangeCoder *rc, unsigned int count) +{ + unsigned int value = 0; + + while (rc->rb.bytes && rc->rb.cachelen < count) { + rc->rb.cacheval |= *--rc->rb.position << rc->rb.cachelen; + rc->rb.cachelen += 8; + rc->rb.bytes--; + } + + value = rc->rb.cacheval & ((1<rb.cacheval >>= count; + rc->rb.cachelen -= count; + rc->total_read_bits += count; + + return value; +} + +/** + * CELT: read a uniform distribution + */ +static av_always_inline unsigned int opus_rc_unimodel(OpusRangeCoder *rc, unsigned int size) +{ + unsigned int bits, k, scale, total; + + bits = opus_ilog(size - 1); + total = (bits > 8) ? ((size - 1) >> (bits - 8)) + 1 : size; + + scale = rc->range / total; + k = rc->value / scale + 1; + k = total - FFMIN(k, total); + opus_rc_update(rc, scale, k, k + 1, total); + + if (bits > 8) { + k = k << (bits - 8) | opus_getrawbits(rc, bits - 8); + return FFMIN(k, size - 1); + } else + return k; +} + +static av_always_inline int opus_rc_laplace(OpusRangeCoder *rc, unsigned int symbol, int decay) +{ + /* extends the range coder to model a Laplace distribution */ + int value = 0; + unsigned int scale, low = 0, center; + + scale = rc->range >> 15; + center = rc->value / scale + 1; + center = (1 << 15) - FFMIN(center, 1 << 15); + + if (center >= symbol) { + value++; + low = symbol; + symbol = 1 + ((32768 - 32 - symbol) * (16384-decay) >> 15); + + while (symbol > 1 && center >= low + 2 * symbol) { + value++; + symbol *= 2; + low += symbol; + symbol = (((symbol - 2) * decay) >> 15) + 1; + } + + if (symbol <= 1) { + int distance = (center - low) >> 1; + value += distance; + low += 2 * distance; + } + + if (center < low + symbol) + value *= -1; + else + low += symbol; + } + + opus_rc_update(rc, scale, low, FFMIN(low + symbol, 32768), 32768); + + return value; +} + +static av_always_inline unsigned int opus_rc_stepmodel(OpusRangeCoder *rc, int k0) +{ + /* Use a probability of 3 up to itheta=8192 and then use 1 after */ + unsigned int k, scale, symbol, total = (k0+1)*3 + k0; + scale = rc->range / total; + symbol = rc->value / scale + 1; + symbol = total - FFMIN(symbol, total); + + k = (symbol < (k0+1)*3) ? symbol/3 : symbol - (k0+1)*2; + + opus_rc_update(rc, scale, (k <= k0) ? 3*(k+0) : (k-1-k0) + 3*(k0+1), + (k <= k0) ? 3*(k+1) : (k-0-k0) + 3*(k0+1), total); + return k; +} + +static av_always_inline unsigned int opus_rc_trimodel(OpusRangeCoder *rc, int qn) +{ + unsigned int k, scale, symbol, total, low, center; + + total = ((qn>>1) + 1) * ((qn>>1) + 1); + scale = rc->range / total; + center = rc->value / scale + 1; + center = total - FFMIN(center, total); + + if (center < total >> 1) { + k = (ff_sqrt(8 * center + 1) - 1) >> 1; + low = k * (k + 1) >> 1; + symbol = k + 1; + } else { + k = (2*(qn + 1) - ff_sqrt(8*(total - center - 1) + 1)) >> 1; + low = total - ((qn + 1 - k) * (qn + 2 - k) >> 1); + symbol = qn + 1 - k; + } + + opus_rc_update(rc, scale, low, low + symbol, total); + + return k; +} + +int ff_opus_parse_packet(OpusPacket *pkt, const uint8_t *buf, int buf_size, + int self_delimited); + +int ff_opus_parse_extradata(AVCodecContext *avctx, OpusContext *s); + +int ff_silk_init(AVCodecContext *avctx, SilkContext **ps, int output_channels); +void ff_silk_free(SilkContext **ps); +void ff_silk_flush(SilkContext *s); + +/** + * Decode the LP layer of one Opus frame (which may correspond to several SILK + * frames). + */ +int ff_silk_decode_superframe(SilkContext *s, OpusRangeCoder *rc, + float *output[2], + enum OpusBandwidth bandwidth, int coded_channels, + int duration_ms); + +/** + * Init an iMDCT of the length 2 * 15 * (2^N) + */ +int ff_celt_imdct_init(CeltIMDCTContext **s, int N); + +/** + * Free an iMDCT. + */ +void ff_celt_imdct_uninit(CeltIMDCTContext **s); + +/** + * Calculate the middle half of the iMDCT + */ +void ff_celt_imdct_half(CeltIMDCTContext *s, float *dst, const float *src, + int src_stride, float scale); + +int ff_celt_init(AVCodecContext *avctx, CeltContext **s, int output_channels); + +void ff_celt_free(CeltContext **s); + +void ff_celt_flush(CeltContext *s); + +int ff_celt_decode_frame(CeltContext *s, OpusRangeCoder *rc, + float **output, int coded_channels, int frame_size, + int startband, int endband); + +extern const float ff_celt_window2[120]; + +#endif /* AVCODEC_OPUS_H */ diff --git a/libavcodec/opus_celt.c b/libavcodec/opus_celt.c new file mode 100644 index 0000000000..6757136a9c --- /dev/null +++ b/libavcodec/opus_celt.c @@ -0,0 +1,2220 @@ +/* + * Copyright (c) 2012 Andrew D'Addesio + * Copyright (c) 2013-2014 Mozilla Corporation + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Opus CELT decoder + */ + +#include + +#include "libavutil/float_dsp.h" + +#include "opus.h" + +enum CeltSpread { + CELT_SPREAD_NONE, + CELT_SPREAD_LIGHT, + CELT_SPREAD_NORMAL, + CELT_SPREAD_AGGRESSIVE +}; + +typedef struct CeltFrame { + float energy[CELT_MAX_BANDS]; + float prev_energy[2][CELT_MAX_BANDS]; + + uint8_t collapse_masks[CELT_MAX_BANDS]; + + /* buffer for mdct output + postfilter */ + DECLARE_ALIGNED(32, float, buf)[2048]; + + /* postfilter parameters */ + int pf_period_new; + float pf_gains_new[3]; + int pf_period; + float pf_gains[3]; + int pf_period_old; + float pf_gains_old[3]; + + float deemph_coeff; +} CeltFrame; + +struct CeltContext { + // constant values that do not change during context lifetime + AVCodecContext *avctx; + CeltIMDCTContext *imdct[4]; + AVFloatDSPContext dsp; + int output_channels; + + // values that have inter-frame effect and must be reset on flush + CeltFrame frame[2]; + uint32_t seed; + int flushed; + + // values that only affect a single frame + int coded_channels; + int framebits; + int duration; + + /* number of iMDCT blocks in the frame */ + int blocks; + /* size of each block */ + int blocksize; + + int startband; + int endband; + int codedbands; + + int anticollapse_bit; + + int intensitystereo; + int dualstereo; + enum CeltSpread spread; + + int remaining; + int remaining2; + int fine_bits [CELT_MAX_BANDS]; + int fine_priority[CELT_MAX_BANDS]; + int pulses [CELT_MAX_BANDS]; + int tf_change [CELT_MAX_BANDS]; + + DECLARE_ALIGNED(32, float, coeffs)[2][CELT_MAX_FRAME_SIZE]; + DECLARE_ALIGNED(32, float, scratch)[22 * 8]; // MAX(celt_freq_range) * 1<> 13; + x = (32767-x) + ROUND_MUL16(x, (-7651 + ROUND_MUL16(x, (8277 + ROUND_MUL16(-626, x))))); + return 1+x; +} + +static inline int celt_log2tan(int isin, int icos) +{ + int lc, ls; + lc = opus_ilog(icos); + ls = opus_ilog(isin); + icos <<= 15 - lc; + isin <<= 15 - ls; + return (ls << 11) - (lc << 11) + + ROUND_MUL16(isin, ROUND_MUL16(isin, -2597) + 7932) - + ROUND_MUL16(icos, ROUND_MUL16(icos, -2597) + 7932); +} + +static inline uint32_t celt_rng(CeltContext *s) +{ + s->seed = 1664525 * s->seed + 1013904223; + return s->seed; +} + +static void celt_decode_coarse_energy(CeltContext *s, OpusRangeCoder *rc) +{ + int i, j; + float prev[2] = {0}; + float alpha, beta; + const uint8_t *model; + + /* use the 2D z-transform to apply prediction in both */ + /* the time domain (alpha) and the frequency domain (beta) */ + + if (opus_rc_tell(rc)+3 <= s->framebits && opus_rc_p2model(rc, 3)) { + /* intra frame */ + alpha = 0; + beta = 1.0f - 4915.0f/32768.0f; + model = celt_coarse_energy_dist[s->duration][1]; + } else { + alpha = celt_alpha_coef[s->duration]; + beta = 1.0f - celt_beta_coef[s->duration]; + model = celt_coarse_energy_dist[s->duration][0]; + } + + for (i = 0; i < CELT_MAX_BANDS; i++) { + for (j = 0; j < s->coded_channels; j++) { + CeltFrame *frame = &s->frame[j]; + float value; + int available; + + if (i < s->startband || i >= s->endband) { + frame->energy[i] = 0.0; + continue; + } + + available = s->framebits - opus_rc_tell(rc); + if (available >= 15) { + /* decode using a Laplace distribution */ + int k = FFMIN(i, 20) << 1; + value = opus_rc_laplace(rc, model[k] << 7, model[k+1] << 6); + } else if (available >= 2) { + int x = opus_rc_getsymbol(rc, celt_model_energy_small); + value = (x>>1) ^ -(x&1); + } else if (available >= 1) { + value = -(float)opus_rc_p2model(rc, 1); + } else value = -1; + + frame->energy[i] = FFMAX(-9.0f, frame->energy[i]) * alpha + prev[j] + value; + prev[j] += beta * value; + } + } +} + +static void celt_decode_fine_energy(CeltContext *s, OpusRangeCoder *rc) +{ + int i; + for (i = s->startband; i < s->endband; i++) { + int j; + if (!s->fine_bits[i]) + continue; + + for (j = 0; j < s->coded_channels; j++) { + CeltFrame *frame = &s->frame[j]; + int q2; + float offset; + q2 = opus_getrawbits(rc, s->fine_bits[i]); + offset = (q2 + 0.5f) * (1 << (14 - s->fine_bits[i])) / 16384.0f - 0.5f; + frame->energy[i] += offset; + } + } +} + +static void celt_decode_final_energy(CeltContext *s, OpusRangeCoder *rc, + int bits_left) +{ + int priority, i, j; + + for (priority = 0; priority < 2; priority++) { + for (i = s->startband; i < s->endband && bits_left >= s->coded_channels; i++) { + if (s->fine_priority[i] != priority || s->fine_bits[i] >= CELT_MAX_FINE_BITS) + continue; + + for (j = 0; j < s->coded_channels; j++) { + int q2; + float offset; + q2 = opus_getrawbits(rc, 1); + offset = (q2 - 0.5f) * (1 << (14 - s->fine_bits[i] - 1)) / 16384.0f; + s->frame[j].energy[i] += offset; + bits_left--; + } + } + } +} + +static void celt_decode_tf_changes(CeltContext *s, OpusRangeCoder *rc, + int transient) +{ + int i, diff = 0, tf_select = 0, tf_changed = 0, tf_select_bit; + int consumed, bits = transient ? 2 : 4; + + consumed = opus_rc_tell(rc); + tf_select_bit = (s->duration != 0 && consumed+bits+1 <= s->framebits); + + for (i = s->startband; i < s->endband; i++) { + if (consumed+bits+tf_select_bit <= s->framebits) { + diff ^= opus_rc_p2model(rc, bits); + consumed = opus_rc_tell(rc); + tf_changed |= diff; + } + s->tf_change[i] = diff; + bits = transient ? 4 : 5; + } + + if (tf_select_bit && celt_tf_select[s->duration][transient][0][tf_changed] != + celt_tf_select[s->duration][transient][1][tf_changed]) + tf_select = opus_rc_p2model(rc, 1); + + for (i = s->startband; i < s->endband; i++) { + s->tf_change[i] = celt_tf_select[s->duration][transient][tf_select][s->tf_change[i]]; + } +} + +static void celt_decode_allocation(CeltContext *s, OpusRangeCoder *rc) +{ + // approx. maximum bit allocation for each band before boost/trim + int cap[CELT_MAX_BANDS]; + int boost[CELT_MAX_BANDS]; + int threshold[CELT_MAX_BANDS]; + int bits1[CELT_MAX_BANDS]; + int bits2[CELT_MAX_BANDS]; + int trim_offset[CELT_MAX_BANDS]; + + int skip_startband = s->startband; + int dynalloc = 6; + int alloctrim = 5; + int extrabits = 0; + + int skip_bit = 0; + int intensitystereo_bit = 0; + int dualstereo_bit = 0; + + int remaining, bandbits; + int low, high, total, done; + int totalbits; + int consumed; + int i, j; + + consumed = opus_rc_tell(rc); + + /* obtain spread flag */ + s->spread = CELT_SPREAD_NORMAL; + if (consumed + 4 <= s->framebits) + s->spread = opus_rc_getsymbol(rc, celt_model_spread); + + /* generate static allocation caps */ + for (i = 0; i < CELT_MAX_BANDS; i++) { + cap[i] = (celt_static_caps[s->duration][s->coded_channels - 1][i] + 64) + * celt_freq_range[i] << (s->coded_channels - 1) << s->duration >> 2; + } + + /* obtain band boost */ + totalbits = s->framebits << 3; // convert to 1/8 bits + consumed = opus_rc_tell_frac(rc); + for (i = s->startband; i < s->endband; i++) { + int quanta, band_dynalloc; + + boost[i] = 0; + + quanta = celt_freq_range[i] << (s->coded_channels - 1) << s->duration; + quanta = FFMIN(quanta << 3, FFMAX(6 << 3, quanta)); + band_dynalloc = dynalloc; + while (consumed + (band_dynalloc<<3) < totalbits && boost[i] < cap[i]) { + int add = opus_rc_p2model(rc, band_dynalloc); + consumed = opus_rc_tell_frac(rc); + if (!add) + break; + + boost[i] += quanta; + totalbits -= quanta; + band_dynalloc = 1; + } + /* dynalloc is more likely to occur if it's already been used for earlier bands */ + if (boost[i]) + dynalloc = FFMAX(2, dynalloc - 1); + } + + /* obtain allocation trim */ + if (consumed + (6 << 3) <= totalbits) + alloctrim = opus_rc_getsymbol(rc, celt_model_alloc_trim); + + /* anti-collapse bit reservation */ + totalbits = (s->framebits << 3) - opus_rc_tell_frac(rc) - 1; + s->anticollapse_bit = 0; + if (s->blocks > 1 && s->duration >= 2 && + totalbits >= ((s->duration + 2) << 3)) + s->anticollapse_bit = 1 << 3; + totalbits -= s->anticollapse_bit; + + /* band skip bit reservation */ + if (totalbits >= 1 << 3) + skip_bit = 1 << 3; + totalbits -= skip_bit; + + /* intensity/dual stereo bit reservation */ + if (s->coded_channels == 2) { + intensitystereo_bit = celt_log2_frac[s->endband - s->startband]; + if (intensitystereo_bit <= totalbits) { + totalbits -= intensitystereo_bit; + if (totalbits >= 1 << 3) { + dualstereo_bit = 1 << 3; + totalbits -= 1 << 3; + } + } else + intensitystereo_bit = 0; + } + + for (i = s->startband; i < s->endband; i++) { + int trim = alloctrim - 5 - s->duration; + int band = celt_freq_range[i] * (s->endband - i - 1); + int duration = s->duration + 3; + int scale = duration + s->coded_channels - 1; + + /* PVQ minimum allocation threshold, below this value the band is + * skipped */ + threshold[i] = FFMAX(3 * celt_freq_range[i] << duration >> 4, + s->coded_channels << 3); + + trim_offset[i] = trim * (band << scale) >> 6; + + if (celt_freq_range[i] << s->duration == 1) + trim_offset[i] -= s->coded_channels << 3; + } + + /* bisection */ + low = 1; + high = CELT_VECTORS - 1; + while (low <= high) { + int center = (low + high) >> 1; + done = total = 0; + + for (i = s->endband - 1; i >= s->startband; i--) { + bandbits = celt_freq_range[i] * celt_static_alloc[center][i] + << (s->coded_channels - 1) << s->duration >> 2; + + if (bandbits) + bandbits = FFMAX(0, bandbits + trim_offset[i]); + bandbits += boost[i]; + + if (bandbits >= threshold[i] || done) { + done = 1; + total += FFMIN(bandbits, cap[i]); + } else if (bandbits >= s->coded_channels << 3) + total += s->coded_channels << 3; + } + + if (total > totalbits) + high = center - 1; + else + low = center + 1; + } + high = low--; + + for (i = s->startband; i < s->endband; i++) { + bits1[i] = celt_freq_range[i] * celt_static_alloc[low][i] + << (s->coded_channels - 1) << s->duration >> 2; + bits2[i] = high >= CELT_VECTORS ? cap[i] : + celt_freq_range[i] * celt_static_alloc[high][i] + << (s->coded_channels - 1) << s->duration >> 2; + + if (bits1[i]) + bits1[i] = FFMAX(0, bits1[i] + trim_offset[i]); + if (bits2[i]) + bits2[i] = FFMAX(0, bits2[i] + trim_offset[i]); + if (low) + bits1[i] += boost[i]; + bits2[i] += boost[i]; + + if (boost[i]) + skip_startband = i; + bits2[i] = FFMAX(0, bits2[i] - bits1[i]); + } + + /* bisection */ + low = 0; + high = 1 << CELT_ALLOC_STEPS; + for (i = 0; i < CELT_ALLOC_STEPS; i++) { + int center = (low + high) >> 1; + done = total = 0; + + for (j = s->endband - 1; j >= s->startband; j--) { + bandbits = bits1[j] + (center * bits2[j] >> CELT_ALLOC_STEPS); + + if (bandbits >= threshold[j] || done) { + done = 1; + total += FFMIN(bandbits, cap[j]); + } else if (bandbits >= s->coded_channels << 3) + total += s->coded_channels << 3; + } + if (total > totalbits) + high = center; + else + low = center; + } + + done = total = 0; + for (i = s->endband - 1; i >= s->startband; i--) { + bandbits = bits1[i] + (low * bits2[i] >> CELT_ALLOC_STEPS); + + if (bandbits >= threshold[i] || done) + done = 1; + else + bandbits = (bandbits >= s->coded_channels << 3) ? + s->coded_channels << 3 : 0; + + bandbits = FFMIN(bandbits, cap[i]); + s->pulses[i] = bandbits; + total += bandbits; + } + + /* band skipping */ + for (s->codedbands = s->endband; ; s->codedbands--) { + int allocation; + j = s->codedbands - 1; + + if (j == skip_startband) { + /* all remaining bands are not skipped */ + totalbits += skip_bit; + break; + } + + /* determine the number of bits available for coding "do not skip" markers */ + remaining = totalbits - total; + bandbits = remaining / (celt_freq_bands[j+1] - celt_freq_bands[s->startband]); + remaining -= bandbits * (celt_freq_bands[j+1] - celt_freq_bands[s->startband]); + allocation = s->pulses[j] + bandbits * celt_freq_range[j] + + FFMAX(0, remaining - (celt_freq_bands[j] - celt_freq_bands[s->startband])); + + /* a "do not skip" marker is only coded if the allocation is + above the chosen threshold */ + if (allocation >= FFMAX(threshold[j], (s->coded_channels + 1) <<3 )) { + if (opus_rc_p2model(rc, 1)) + break; + + total += 1 << 3; + allocation -= 1 << 3; + } + + /* the band is skipped, so reclaim its bits */ + total -= s->pulses[j]; + if (intensitystereo_bit) { + total -= intensitystereo_bit; + intensitystereo_bit = celt_log2_frac[j - s->startband]; + total += intensitystereo_bit; + } + + total += s->pulses[j] = (allocation >= s->coded_channels << 3) ? + s->coded_channels << 3 : 0; + } + + /* obtain stereo flags */ + s->intensitystereo = 0; + s->dualstereo = 0; + if (intensitystereo_bit) + s->intensitystereo = s->startband + + opus_rc_unimodel(rc, s->codedbands + 1 - s->startband); + if (s->intensitystereo <= s->startband) + totalbits += dualstereo_bit; /* no intensity stereo means no dual stereo */ + else if (dualstereo_bit) + s->dualstereo = opus_rc_p2model(rc, 1); + + /* supply the remaining bits in this frame to lower bands */ + remaining = totalbits - total; + bandbits = remaining / (celt_freq_bands[s->codedbands] - celt_freq_bands[s->startband]); + remaining -= bandbits * (celt_freq_bands[s->codedbands] - celt_freq_bands[s->startband]); + for (i = s->startband; i < s->codedbands; i++) { + int bits = FFMIN(remaining, celt_freq_range[i]); + + s->pulses[i] += bits + bandbits * celt_freq_range[i]; + remaining -= bits; + } + + for (i = s->startband; i < s->codedbands; i++) { + int N = celt_freq_range[i] << s->duration; + int prev_extra = extrabits; + s->pulses[i] += extrabits; + + if (N > 1) { + int dof; // degrees of freedom + int temp; // dof * channels * log(dof) + int offset; // fine energy quantization offset, i.e. + // extra bits assigned over the standard + // totalbits/dof + int fine_bits, max_bits; + + extrabits = FFMAX(0, s->pulses[i] - cap[i]); + s->pulses[i] -= extrabits; + + /* intensity stereo makes use of an extra degree of freedom */ + dof = N * s->coded_channels + + (s->coded_channels == 2 && N > 2 && !s->dualstereo && i < s->intensitystereo); + temp = dof * (celt_log_freq_range[i] + (s->duration<<3)); + offset = (temp >> 1) - dof * CELT_FINE_OFFSET; + if (N == 2) /* dof=2 is the only case that doesn't fit the model */ + offset += dof<<1; + + /* grant an additional bias for the first and second pulses */ + if (s->pulses[i] + offset < 2 * (dof << 3)) + offset += temp >> 2; + else if (s->pulses[i] + offset < 3 * (dof << 3)) + offset += temp >> 3; + + fine_bits = (s->pulses[i] + offset + (dof << 2)) / (dof << 3); + max_bits = FFMIN((s->pulses[i]>>3) >> (s->coded_channels - 1), + CELT_MAX_FINE_BITS); + + max_bits = FFMAX(max_bits, 0); + + s->fine_bits[i] = av_clip(fine_bits, 0, max_bits); + + /* if fine_bits was rounded down or capped, + give priority for the final fine energy pass */ + s->fine_priority[i] = (s->fine_bits[i] * (dof<<3) >= s->pulses[i] + offset); + + /* the remaining bits are assigned to PVQ */ + s->pulses[i] -= s->fine_bits[i] << (s->coded_channels - 1) << 3; + } else { + /* all bits go to fine energy except for the sign bit */ + extrabits = FFMAX(0, s->pulses[i] - (s->coded_channels << 3)); + s->pulses[i] -= extrabits; + s->fine_bits[i] = 0; + s->fine_priority[i] = 1; + } + + /* hand back a limited number of extra fine energy bits to this band */ + if (extrabits > 0) { + int fineextra = FFMIN(extrabits >> (s->coded_channels + 2), + CELT_MAX_FINE_BITS - s->fine_bits[i]); + s->fine_bits[i] += fineextra; + + fineextra <<= s->coded_channels + 2; + s->fine_priority[i] = (fineextra >= extrabits - prev_extra); + extrabits -= fineextra; + } + } + s->remaining = extrabits; + + /* skipped bands dedicate all of their bits for fine energy */ + for (; i < s->endband; i++) { + s->fine_bits[i] = s->pulses[i] >> (s->coded_channels - 1) >> 3; + s->pulses[i] = 0; + s->fine_priority[i] = s->fine_bits[i] < 1; + } +} + +static inline int celt_bits2pulses(const uint8_t *cache, int bits) +{ + // TODO: Find the size of cache and make it into an array in the parameters list + int i, low = 0, high; + + high = cache[0]; + bits--; + + for (i = 0; i < 6; i++) { + int center = (low + high + 1) >> 1; + if (cache[center] >= bits) + high = center; + else + low = center; + } + + return (bits - (low == 0 ? -1 : cache[low]) <= cache[high] - bits) ? low : high; +} + +static inline int celt_pulses2bits(const uint8_t *cache, int pulses) +{ + // TODO: Find the size of cache and make it into an array in the parameters list + return (pulses == 0) ? 0 : cache[pulses] + 1; +} + +static inline void celt_normalize_residual(const int * restrict iy, float * restrict X, + int N, float g) +{ + int i; + for (i = 0; i < N; i++) + X[i] = g * iy[i]; +} + +static void celt_exp_rotation1(float *X, unsigned int len, unsigned int stride, + float c, float s) +{ + float *Xptr; + int i; + + Xptr = X; + for (i = 0; i < len - stride; i++) { + float x1, x2; + x1 = Xptr[0]; + x2 = Xptr[stride]; + Xptr[stride] = c * x2 + s * x1; + *Xptr++ = c * x1 - s * x2; + } + + Xptr = &X[len - 2 * stride - 1]; + for (i = len - 2 * stride - 1; i >= 0; i--) { + float x1, x2; + x1 = Xptr[0]; + x2 = Xptr[stride]; + Xptr[stride] = c * x2 + s * x1; + *Xptr-- = c * x1 - s * x2; + } +} + +static inline void celt_exp_rotation(float *X, unsigned int len, + unsigned int stride, unsigned int K, + enum CeltSpread spread) +{ + unsigned int stride2 = 0; + float c, s; + float gain, theta; + int i; + + if (2*K >= len || spread == CELT_SPREAD_NONE) + return; + + gain = (float)len / (len + (20 - 5*spread) * K); + theta = M_PI * gain * gain / 4; + + c = cos(theta); + s = sin(theta); + + if (len >= stride << 3) { + stride2 = 1; + /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding. + It's basically incrementing long as (stride2+0.5)^2 < len/stride. */ + while ((stride2 * stride2 + stride2) * stride + (stride >> 2) < len) + stride2++; + } + + /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for + extract_collapse_mask().*/ + len /= stride; + for (i = 0; i < stride; i++) { + if (stride2) + celt_exp_rotation1(X + i * len, len, stride2, s, c); + celt_exp_rotation1(X + i * len, len, 1, c, s); + } +} + +static inline unsigned int celt_extract_collapse_mask(const int *iy, + unsigned int N, + unsigned int B) +{ + unsigned int collapse_mask; + int N0; + int i, j; + + if (B <= 1) + return 1; + + /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for + exp_rotation().*/ + N0 = N/B; + collapse_mask = 0; + for (i = 0; i < B; i++) + for (j = 0; j < N0; j++) + collapse_mask |= (iy[i*N0+j]!=0)<>= 1; + for (i = 0; i < stride; i++) { + for (j = 0; j < N0; j++) { + float x0 = X[stride * (2 * j + 0) + i]; + float x1 = X[stride * (2 * j + 1) + i]; + X[stride * (2 * j + 0) + i] = (x0 + x1) * M_SQRT1_2; + X[stride * (2 * j + 1) + i] = (x0 - x1) * M_SQRT1_2; + } + } +} + +static inline int celt_compute_qn(int N, int b, int offset, int pulse_cap, + int dualstereo) +{ + int qn, qb; + int N2 = 2 * N - 1; + if (dualstereo && N == 2) + N2--; + + /* The upper limit ensures that in a stereo split with itheta==16384, we'll + * always have enough bits left over to code at least one pulse in the + * side; otherwise it would collapse, since it doesn't get folded. */ + qb = FFMIN3(b - pulse_cap - (4 << 3), (b + N2 * offset) / N2, 8 << 3); + qn = (qb < (1 << 3 >> 1)) ? 1 : ((celt_qn_exp2[qb & 0x7] >> (14 - (qb >> 3))) + 1) >> 1 << 1; + return qn; +} + +// this code was adapted from libopus +static inline uint64_t celt_cwrsi(unsigned int N, unsigned int K, unsigned int i, int *y) +{ + uint64_t norm = 0; + uint32_t p; + int s, val; + int k0; + + while (N > 2) { + uint32_t q; + + /*Lots of pulses case:*/ + if (K >= N) { + const uint32_t *row = celt_pvq_u_row[N]; + + /* Are the pulses in this dimension negative? */ + p = row[K + 1]; + s = -(i >= p); + i -= p & s; + + /*Count how many pulses were placed in this dimension.*/ + k0 = K; + q = row[N]; + if (q > i) { + K = N; + do { + p = celt_pvq_u_row[--K][N]; + } while (p > i); + } else + for (p = row[K]; p > i; p = row[K]) + K--; + + i -= p; + val = (k0 - K + s) ^ s; + norm += val * val; + *y++ = val; + } else { /*Lots of dimensions case:*/ + /*Are there any pulses in this dimension at all?*/ + p = celt_pvq_u_row[K ][N]; + q = celt_pvq_u_row[K + 1][N]; + + if (p <= i && i < q) { + i -= p; + *y++ = 0; + } else { + /*Are the pulses in this dimension negative?*/ + s = -(i >= q); + i -= q & s; + + /*Count how many pulses were placed in this dimension.*/ + k0 = K; + do p = celt_pvq_u_row[--K][N]; + while (p > i); + + i -= p; + val = (k0 - K + s) ^ s; + norm += val * val; + *y++ = val; + } + } + N--; + } + + /* N == 2 */ + p = 2 * K + 1; + s = -(i >= p); + i -= p & s; + k0 = K; + K = (i + 1) / 2; + + if (K) + i -= 2 * K - 1; + + val = (k0 - K + s) ^ s; + norm += val * val; + *y++ = val; + + /* N==1 */ + s = -i; + val = (K + s) ^ s; + norm += val * val; + *y = val; + + return norm; +} + +static inline float celt_decode_pulses(OpusRangeCoder *rc, int *y, unsigned int N, unsigned int K) +{ + unsigned int idx; +#define CELT_PVQ_U(n, k) (celt_pvq_u_row[FFMIN(n, k)][FFMAX(n, k)]) +#define CELT_PVQ_V(n, k) (CELT_PVQ_U(n, k) + CELT_PVQ_U(n, k + 1)) + idx = opus_rc_unimodel(rc, CELT_PVQ_V(N, K)); + return celt_cwrsi(N, K, idx, y); +} + +/** Decode pulse vector and combine the result with the pitch vector to produce + the final normalised signal in the current band. */ +static inline unsigned int celt_alg_unquant(OpusRangeCoder *rc, float *X, + unsigned int N, unsigned int K, + enum CeltSpread spread, + unsigned int blocks, float gain) +{ + int y[176]; + + gain /= sqrtf(celt_decode_pulses(rc, y, N, K)); + celt_normalize_residual(y, X, N, gain); + celt_exp_rotation(X, N, blocks, K, spread); + return celt_extract_collapse_mask(y, N, blocks); +} + +static unsigned int celt_decode_band(CeltContext *s, OpusRangeCoder *rc, + const int band, float *X, float *Y, + int N, int b, unsigned int blocks, + float *lowband, int duration, + float *lowband_out, int level, + float gain, float *lowband_scratch, + int fill) +{ + const uint8_t *cache; + int dualstereo, split; + int imid = 0, iside = 0; + unsigned int N0 = N; + int N_B; + int N_B0; + int B0 = blocks; + int time_divide = 0; + int recombine = 0; + int inv = 0; + float mid = 0, side = 0; + int longblocks = (B0 == 1); + unsigned int cm = 0; + + N_B0 = N_B = N / blocks; + split = dualstereo = (Y != NULL); + + if (N == 1) { + /* special case for one sample */ + int i; + float *x = X; + for (i = 0; i <= dualstereo; i++) { + int sign = 0; + if (s->remaining2 >= 1<<3) { + sign = opus_getrawbits(rc, 1); + s->remaining2 -= 1 << 3; + b -= 1 << 3; + } + x[0] = sign ? -1.0f : 1.0f; + x = Y; + } + if (lowband_out) + lowband_out[0] = X[0]; + return 1; + } + + if (!dualstereo && level == 0) { + int tf_change = s->tf_change[band]; + int k; + if (tf_change > 0) + recombine = tf_change; + /* Band recombining to increase frequency resolution */ + + if (lowband && + (recombine || ((N_B & 1) == 0 && tf_change < 0) || B0 > 1)) { + int j; + for (j = 0; j < N; j++) + lowband_scratch[j] = lowband[j]; + lowband = lowband_scratch; + } + + for (k = 0; k < recombine; k++) { + if (lowband) + celt_haar1(lowband, N >> k, 1 << k); + fill = celt_bit_interleave[fill & 0xF] | celt_bit_interleave[fill >> 4] << 2; + } + blocks >>= recombine; + N_B <<= recombine; + + /* Increasing the time resolution */ + while ((N_B & 1) == 0 && tf_change < 0) { + if (lowband) + celt_haar1(lowband, N_B, blocks); + fill |= fill << blocks; + blocks <<= 1; + N_B >>= 1; + time_divide++; + tf_change++; + } + B0 = blocks; + N_B0 = N_B; + + /* Reorganize the samples in time order instead of frequency order */ + if (B0 > 1 && lowband) + celt_deinterleave_hadamard(s->scratch, lowband, N_B >> recombine, + B0 << recombine, longblocks); + } + + /* If we need 1.5 more bit than we can produce, split the band in two. */ + cache = celt_cache_bits + + celt_cache_index[(duration + 1) * CELT_MAX_BANDS + band]; + if (!dualstereo && duration >= 0 && b > cache[cache[0]] + 12 && N > 2) { + N >>= 1; + Y = X + N; + split = 1; + duration -= 1; + if (blocks == 1) + fill = (fill & 1) | (fill << 1); + blocks = (blocks + 1) >> 1; + } + + if (split) { + int qn; + int itheta = 0; + int mbits, sbits, delta; + int qalloc; + int pulse_cap; + int offset; + int orig_fill; + int tell; + + /* Decide on the resolution to give to the split parameter theta */ + pulse_cap = celt_log_freq_range[band] + duration * 8; + offset = (pulse_cap >> 1) - (dualstereo && N == 2 ? CELT_QTHETA_OFFSET_TWOPHASE : + CELT_QTHETA_OFFSET); + qn = (dualstereo && band >= s->intensitystereo) ? 1 : + celt_compute_qn(N, b, offset, pulse_cap, dualstereo); + tell = opus_rc_tell_frac(rc); + if (qn != 1) { + /* Entropy coding of the angle. We use a uniform pdf for the + time split, a step for stereo, and a triangular one for the rest. */ + if (dualstereo && N > 2) + itheta = opus_rc_stepmodel(rc, qn/2); + else if (dualstereo || B0 > 1) + itheta = opus_rc_unimodel(rc, qn+1); + else + itheta = opus_rc_trimodel(rc, qn); + itheta = itheta * 16384 / qn; + /* NOTE: Renormalising X and Y *may* help fixed-point a bit at very high rate. + Let's do that at higher complexity */ + } else if (dualstereo) { + inv = (b > 2 << 3 && s->remaining2 > 2 << 3) ? opus_rc_p2model(rc, 2) : 0; + itheta = 0; + } + qalloc = opus_rc_tell_frac(rc) - tell; + b -= qalloc; + + orig_fill = fill; + if (itheta == 0) { + imid = 32767; + iside = 0; + fill &= (1 << blocks) - 1; + delta = -16384; + } else if (itheta == 16384) { + imid = 0; + iside = 32767; + fill &= ((1 << blocks) - 1) << blocks; + delta = 16384; + } else { + imid = celt_cos(itheta); + iside = celt_cos(16384-itheta); + /* This is the mid vs side allocation that minimizes squared error + in that band. */ + delta = ROUND_MUL16((N - 1) << 7, celt_log2tan(iside, imid)); + } + + mid = imid / 32768.0f; + side = iside / 32768.0f; + + /* This is a special case for N=2 that only works for stereo and takes + advantage of the fact that mid and side are orthogonal to encode + the side with just one bit. */ + if (N == 2 && dualstereo) { + int c; + int sign = 0; + float tmp; + float *x2, *y2; + mbits = b; + /* Only need one bit for the side */ + sbits = (itheta != 0 && itheta != 16384) ? 1 << 3 : 0; + mbits -= sbits; + c = (itheta > 8192); + s->remaining2 -= qalloc+sbits; + + x2 = c ? Y : X; + y2 = c ? X : Y; + if (sbits) + sign = opus_getrawbits(rc, 1); + sign = 1 - 2 * sign; + /* We use orig_fill here because we want to fold the side, but if + itheta==16384, we'll have cleared the low bits of fill. */ + cm = celt_decode_band(s, rc, band, x2, NULL, N, mbits, blocks, + lowband, duration, lowband_out, level, gain, + lowband_scratch, orig_fill); + /* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse), + and there's no need to worry about mixing with the other channel. */ + y2[0] = -sign * x2[1]; + y2[1] = sign * x2[0]; + X[0] *= mid; + X[1] *= mid; + Y[0] *= side; + Y[1] *= side; + tmp = X[0]; + X[0] = tmp - Y[0]; + Y[0] = tmp + Y[0]; + tmp = X[1]; + X[1] = tmp - Y[1]; + Y[1] = tmp + Y[1]; + } else { + /* "Normal" split code */ + float *next_lowband2 = NULL; + float *next_lowband_out1 = NULL; + int next_level = 0; + int rebalance; + + /* Give more bits to low-energy MDCTs than they would + * otherwise deserve */ + if (B0 > 1 && !dualstereo && (itheta & 0x3fff)) { + if (itheta > 8192) + /* Rough approximation for pre-echo masking */ + delta -= delta >> (4 - duration); + else + /* Corresponds to a forward-masking slope of + * 1.5 dB per 10 ms */ + delta = FFMIN(0, delta + (N << 3 >> (5 - duration))); + } + mbits = av_clip((b - delta) / 2, 0, b); + sbits = b - mbits; + s->remaining2 -= qalloc; + + if (lowband && !dualstereo) + next_lowband2 = lowband + N; /* >32-bit split case */ + + /* Only stereo needs to pass on lowband_out. + * Otherwise, it's handled at the end */ + if (dualstereo) + next_lowband_out1 = lowband_out; + else + next_level = level + 1; + + rebalance = s->remaining2; + if (mbits >= sbits) { + /* In stereo mode, we do not apply a scaling to the mid + * because we need the normalized mid for folding later */ + cm = celt_decode_band(s, rc, band, X, NULL, N, mbits, blocks, + lowband, duration, next_lowband_out1, + next_level, dualstereo ? 1.0f : (gain * mid), + lowband_scratch, fill); + + rebalance = mbits - (rebalance - s->remaining2); + if (rebalance > 3 << 3 && itheta != 0) + sbits += rebalance - (3 << 3); + + /* For a stereo split, the high bits of fill are always zero, + * so no folding will be done to the side. */ + cm |= celt_decode_band(s, rc, band, Y, NULL, N, sbits, blocks, + next_lowband2, duration, NULL, + next_level, gain * side, NULL, + fill >> blocks) << ((B0 >> 1) & (dualstereo - 1)); + } else { + /* For a stereo split, the high bits of fill are always zero, + * so no folding will be done to the side. */ + cm = celt_decode_band(s, rc, band, Y, NULL, N, sbits, blocks, + next_lowband2, duration, NULL, + next_level, gain * side, NULL, + fill >> blocks) << ((B0 >> 1) & (dualstereo - 1)); + + rebalance = sbits - (rebalance - s->remaining2); + if (rebalance > 3 << 3 && itheta != 16384) + mbits += rebalance - (3 << 3); + + /* In stereo mode, we do not apply a scaling to the mid because + * we need the normalized mid for folding later */ + cm |= celt_decode_band(s, rc, band, X, NULL, N, mbits, blocks, + lowband, duration, next_lowband_out1, + next_level, dualstereo ? 1.0f : (gain * mid), + lowband_scratch, fill); + } + } + } else { + /* This is the basic no-split case */ + unsigned int q = celt_bits2pulses(cache, b); + unsigned int curr_bits = celt_pulses2bits(cache, q); + s->remaining2 -= curr_bits; + + /* Ensures we can never bust the budget */ + while (s->remaining2 < 0 && q > 0) { + s->remaining2 += curr_bits; + curr_bits = celt_pulses2bits(cache, --q); + s->remaining2 -= curr_bits; + } + + if (q != 0) { + /* Finally do the actual quantization */ + cm = celt_alg_unquant(rc, X, N, (q < 8) ? q : (8 + (q & 7)) << ((q >> 3) - 1), + s->spread, blocks, gain); + } else { + /* If there's no pulse, fill the band anyway */ + int j; + unsigned int cm_mask = (1 << blocks) - 1; + fill &= cm_mask; + if (!fill) { + for (j = 0; j < N; j++) + X[j] = 0.0f; + } else { + if (lowband == NULL) { + /* Noise */ + for (j = 0; j < N; j++) + X[j] = (((int32_t)celt_rng(s)) >> 20); + cm = cm_mask; + } else { + /* Folded spectrum */ + for (j = 0; j < N; j++) { + /* About 48 dB below the "normal" folding level */ + X[j] = lowband[j] + (((celt_rng(s)) & 0x8000) ? 1.0f / 256 : -1.0f / 256); + } + cm = fill; + } + celt_renormalize_vector(X, N, gain); + } + } + } + + /* This code is used by the decoder and by the resynthesis-enabled encoder */ + if (dualstereo) { + int j; + if (N != 2) + celt_stereo_merge(X, Y, mid, N); + if (inv) { + for (j = 0; j < N; j++) + Y[j] *= -1; + } + } else if (level == 0) { + int k; + + /* Undo the sample reorganization going from time order to frequency order */ + if (B0 > 1) + celt_interleave_hadamard(s->scratch, X, N_B>>recombine, + B0<>= 1; + N_B <<= 1; + cm |= cm >> blocks; + celt_haar1(X, N_B, blocks); + } + + for (k = 0; k < recombine; k++) { + cm = celt_bit_deinterleave[cm]; + celt_haar1(X, N0>>k, 1<startband; i < s->endband; i++) { + float *dst = data + (celt_freq_bands[i] << s->duration); + float norm = pow(2, frame->energy[i] + celt_mean_energy[i]); + + for (j = 0; j < celt_freq_range[i] << s->duration; j++) + dst[j] *= norm; + } +} + +static void celt_postfilter_apply_transition(CeltFrame *frame, float *data) +{ + const int T0 = frame->pf_period_old; + const int T1 = frame->pf_period; + + float g00, g01, g02; + float g10, g11, g12; + + float x0, x1, x2, x3, x4; + + int i; + + if (frame->pf_gains[0] == 0.0 && + frame->pf_gains_old[0] == 0.0) + return; + + g00 = frame->pf_gains_old[0]; + g01 = frame->pf_gains_old[1]; + g02 = frame->pf_gains_old[2]; + g10 = frame->pf_gains[0]; + g11 = frame->pf_gains[1]; + g12 = frame->pf_gains[2]; + + x1 = data[-T1 + 1]; + x2 = data[-T1]; + x3 = data[-T1 - 1]; + x4 = data[-T1 - 2]; + + for (i = 0; i < CELT_OVERLAP; i++) { + float w = ff_celt_window2[i]; + x0 = data[i - T1 + 2]; + + data[i] += (1.0 - w) * g00 * data[i - T0] + + (1.0 - w) * g01 * (data[i - T0 - 1] + data[i - T0 + 1]) + + (1.0 - w) * g02 * (data[i - T0 - 2] + data[i - T0 + 2]) + + w * g10 * x2 + + w * g11 * (x1 + x3) + + w * g12 * (x0 + x4); + x4 = x3; + x3 = x2; + x2 = x1; + x1 = x0; + } +} + +static void celt_postfilter_apply(CeltFrame *frame, + float *data, int len) +{ + const int T = frame->pf_period; + float g0, g1, g2; + float x0, x1, x2, x3, x4; + int i; + + if (frame->pf_gains[0] == 0.0 || len <= 0) + return; + + g0 = frame->pf_gains[0]; + g1 = frame->pf_gains[1]; + g2 = frame->pf_gains[2]; + + x4 = data[-T - 2]; + x3 = data[-T - 1]; + x2 = data[-T]; + x1 = data[-T + 1]; + + for (i = 0; i < len; i++) { + x0 = data[i - T + 2]; + data[i] += g0 * x2 + + g1 * (x1 + x3) + + g2 * (x0 + x4); + x4 = x3; + x3 = x2; + x2 = x1; + x1 = x0; + } +} + +static void celt_postfilter(CeltContext *s, CeltFrame *frame) +{ + int len = s->blocksize * s->blocks; + + celt_postfilter_apply_transition(frame, frame->buf + 1024); + + frame->pf_period_old = frame->pf_period; + memcpy(frame->pf_gains_old, frame->pf_gains, sizeof(frame->pf_gains)); + + frame->pf_period = frame->pf_period_new; + memcpy(frame->pf_gains, frame->pf_gains_new, sizeof(frame->pf_gains)); + + if (len > CELT_OVERLAP) { + celt_postfilter_apply_transition(frame, frame->buf + 1024 + CELT_OVERLAP); + celt_postfilter_apply(frame, frame->buf + 1024 + 2 * CELT_OVERLAP, + len - 2 * CELT_OVERLAP); + + frame->pf_period_old = frame->pf_period; + memcpy(frame->pf_gains_old, frame->pf_gains, sizeof(frame->pf_gains)); + } + + memmove(frame->buf, frame->buf + len, (1024 + CELT_OVERLAP / 2) * sizeof(float)); +} + +static int parse_postfilter(CeltContext *s, OpusRangeCoder *rc, int consumed) +{ + static const float postfilter_taps[3][3] = { + { 0.3066406250f, 0.2170410156f, 0.1296386719f }, + { 0.4638671875f, 0.2680664062f, 0.0 }, + { 0.7998046875f, 0.1000976562f, 0.0 } + }; + int i; + + memset(s->frame[0].pf_gains_new, 0, sizeof(s->frame[0].pf_gains_new)); + memset(s->frame[1].pf_gains_new, 0, sizeof(s->frame[1].pf_gains_new)); + + if (s->startband == 0 && consumed + 16 <= s->framebits) { + int has_postfilter = opus_rc_p2model(rc, 1); + if (has_postfilter) { + float gain; + int tapset, octave, period; + + octave = opus_rc_unimodel(rc, 6); + period = (16 << octave) + opus_getrawbits(rc, 4 + octave) - 1; + gain = 0.09375f * (opus_getrawbits(rc, 3) + 1); + tapset = (opus_rc_tell(rc) + 2 <= s->framebits) ? + opus_rc_getsymbol(rc, celt_model_tapset) : 0; + + for (i = 0; i < 2; i++) { + CeltFrame *frame = &s->frame[i]; + + frame->pf_period_new = FFMAX(period, CELT_POSTFILTER_MINPERIOD); + frame->pf_gains_new[0] = gain * postfilter_taps[tapset][0]; + frame->pf_gains_new[1] = gain * postfilter_taps[tapset][1]; + frame->pf_gains_new[2] = gain * postfilter_taps[tapset][2]; + } + } + + consumed = opus_rc_tell(rc); + } + + return consumed; +} + +static void process_anticollapse(CeltContext *s, CeltFrame *frame, float *X) +{ + int i, j, k; + + for (i = s->startband; i < s->endband; i++) { + int renormalize = 0; + float *xptr; + float prev[2]; + float Ediff, r; + float thresh, sqrt_1; + int depth; + + /* depth in 1/8 bits */ + depth = (1 + s->pulses[i]) / (celt_freq_range[i] << s->duration); + thresh = pow(2, -1.0 - 0.125f * depth); + sqrt_1 = 1.0f / sqrtf(celt_freq_range[i] << s->duration); + + xptr = X + (celt_freq_bands[i] << s->duration); + + prev[0] = frame->prev_energy[0][i]; + prev[1] = frame->prev_energy[1][i]; + if (s->coded_channels == 1) { + CeltFrame *frame1 = &s->frame[1]; + + prev[0] = FFMAX(prev[0], frame1->prev_energy[0][i]); + prev[1] = FFMAX(prev[1], frame1->prev_energy[1][i]); + } + Ediff = frame->energy[i] - FFMIN(prev[0], prev[1]); + Ediff = FFMAX(0, Ediff); + + /* r needs to be multiplied by 2 or 2*sqrt(2) depending on LM because + short blocks don't have the same energy as long */ + r = pow(2, 1 - Ediff); + if (s->duration == 3) + r *= M_SQRT2; + r = FFMIN(thresh, r) * sqrt_1; + for (k = 0; k < 1 << s->duration; k++) { + /* Detect collapse */ + if (!(frame->collapse_masks[i] & 1 << k)) { + /* Fill with noise */ + for (j = 0; j < celt_freq_range[i]; j++) + xptr[(j << s->duration) + k] = (celt_rng(s) & 0x8000) ? r : -r; + renormalize = 1; + } + } + + /* We just added some energy, so we need to renormalize */ + if (renormalize) + celt_renormalize_vector(xptr, celt_freq_range[i] << s->duration, 1.0f); + } +} + +static void celt_decode_bands(CeltContext *s, OpusRangeCoder *rc) +{ + float lowband_scratch[8 * 22]; + float norm[2 * 8 * 100]; + + int totalbits = (s->framebits << 3) - s->anticollapse_bit; + + int update_lowband = 1; + int lowband_offset = 0; + + int i, j; + + memset(s->coeffs, 0, sizeof(s->coeffs)); + + for (i = s->startband; i < s->endband; i++) { + int band_offset = celt_freq_bands[i] << s->duration; + int band_size = celt_freq_range[i] << s->duration; + float *X = s->coeffs[0] + band_offset; + float *Y = (s->coded_channels == 2) ? s->coeffs[1] + band_offset : NULL; + + int consumed = opus_rc_tell_frac(rc); + float *norm2 = norm + 8 * 100; + int effective_lowband = -1; + unsigned int cm[2]; + int b; + + /* Compute how many bits we want to allocate to this band */ + if (i != s->startband) + s->remaining -= consumed; + s->remaining2 = totalbits - consumed - 1; + if (i <= s->codedbands - 1) { + int curr_balance = s->remaining / FFMIN(3, s->codedbands-i); + b = av_clip(FFMIN(s->remaining2 + 1, s->pulses[i] + curr_balance), 0, 16383); + } else + b = 0; + + if (celt_freq_bands[i] - celt_freq_range[i] >= celt_freq_bands[s->startband] && + (update_lowband || lowband_offset == 0)) + lowband_offset = i; + + /* Get a conservative estimate of the collapse_mask's for the bands we're + going to be folding from. */ + if (lowband_offset != 0 && (s->spread != CELT_SPREAD_AGGRESSIVE || + s->blocks > 1 || s->tf_change[i] < 0)) { + int foldstart, foldend; + + /* This ensures we never repeat spectral content within one band */ + effective_lowband = FFMAX(celt_freq_bands[s->startband], + celt_freq_bands[lowband_offset] - celt_freq_range[i]); + foldstart = lowband_offset; + while (celt_freq_bands[--foldstart] > effective_lowband); + foldend = lowband_offset - 1; + while (celt_freq_bands[++foldend] < effective_lowband + celt_freq_range[i]); + + cm[0] = cm[1] = 0; + for (j = foldstart; j < foldend; j++) { + cm[0] |= s->frame[0].collapse_masks[j]; + cm[1] |= s->frame[s->coded_channels - 1].collapse_masks[j]; + } + } else + /* Otherwise, we'll be using the LCG to fold, so all blocks will (almost + always) be non-zero.*/ + cm[0] = cm[1] = (1 << s->blocks) - 1; + + if (s->dualstereo && i == s->intensitystereo) { + /* Switch off dual stereo to do intensity */ + s->dualstereo = 0; + for (j = celt_freq_bands[s->startband] << s->duration; j < band_offset; j++) + norm[j] = (norm[j] + norm2[j]) / 2; + } + + if (s->dualstereo) { + cm[0] = celt_decode_band(s, rc, i, X, NULL, band_size, b / 2, s->blocks, + effective_lowband != -1 ? norm + (effective_lowband << s->duration) : NULL, s->duration, + norm + band_offset, 0, 1.0f, lowband_scratch, cm[0]); + + cm[1] = celt_decode_band(s, rc, i, Y, NULL, band_size, b/2, s->blocks, + effective_lowband != -1 ? norm2 + (effective_lowband << s->duration) : NULL, s->duration, + norm2 + band_offset, 0, 1.0f, lowband_scratch, cm[1]); + } else { + cm[0] = celt_decode_band(s, rc, i, X, Y, band_size, b, s->blocks, + effective_lowband != -1 ? norm + (effective_lowband << s->duration) : NULL, s->duration, + norm + band_offset, 0, 1.0f, lowband_scratch, cm[0]|cm[1]); + + cm[1] = cm[0]; + } + + s->frame[0].collapse_masks[i] = (uint8_t)cm[0]; + s->frame[s->coded_channels - 1].collapse_masks[i] = (uint8_t)cm[1]; + s->remaining += s->pulses[i] + consumed; + + /* Update the folding position only as long as we have 1 bit/sample depth */ + update_lowband = (b > band_size << 3); + } +} + +int ff_celt_decode_frame(CeltContext *s, OpusRangeCoder *rc, + float **output, int coded_channels, int frame_size, + int startband, int endband) +{ + int i, j; + + int consumed; // bits of entropy consumed thus far for this frame + int silence = 0; + int transient = 0; + int anticollapse = 0; + CeltIMDCTContext *imdct; + float imdct_scale = 1.0; + + if (coded_channels != 1 && coded_channels != 2) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid number of coded channels: %d\n", + coded_channels); + return AVERROR_INVALIDDATA; + } + if (startband < 0 || startband > endband || endband > CELT_MAX_BANDS) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid start/end band: %d %d\n", + startband, endband); + return AVERROR_INVALIDDATA; + } + + s->flushed = 0; + s->coded_channels = coded_channels; + s->startband = startband; + s->endband = endband; + s->framebits = rc->rb.bytes * 8; + + s->duration = av_log2(frame_size / CELT_SHORT_BLOCKSIZE); + if (s->duration > CELT_MAX_LOG_BLOCKS || + frame_size != CELT_SHORT_BLOCKSIZE * (1 << s->duration)) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid CELT frame size: %d\n", + frame_size); + return AVERROR_INVALIDDATA; + } + + if (!s->output_channels) + s->output_channels = coded_channels; + + memset(s->frame[0].collapse_masks, 0, sizeof(s->frame[0].collapse_masks)); + memset(s->frame[1].collapse_masks, 0, sizeof(s->frame[1].collapse_masks)); + + consumed = opus_rc_tell(rc); + + /* obtain silence flag */ + if (consumed >= s->framebits) + silence = 1; + else if (consumed == 1) + silence = opus_rc_p2model(rc, 15); + + + if (silence) { + consumed = s->framebits; + rc->total_read_bits += s->framebits - opus_rc_tell(rc); + } + + /* obtain post-filter options */ + consumed = parse_postfilter(s, rc, consumed); + + /* obtain transient flag */ + if (s->duration != 0 && consumed+3 <= s->framebits) + transient = opus_rc_p2model(rc, 3); + + s->blocks = transient ? 1 << s->duration : 1; + s->blocksize = frame_size / s->blocks; + + imdct = s->imdct[transient ? 0 : s->duration]; + + if (coded_channels == 1) { + for (i = 0; i < CELT_MAX_BANDS; i++) + s->frame[0].energy[i] = FFMAX(s->frame[0].energy[i], s->frame[1].energy[i]); + } + + celt_decode_coarse_energy(s, rc); + celt_decode_tf_changes (s, rc, transient); + celt_decode_allocation (s, rc); + celt_decode_fine_energy (s, rc); + celt_decode_bands (s, rc); + + if (s->anticollapse_bit) + anticollapse = opus_getrawbits(rc, 1); + + celt_decode_final_energy(s, rc, s->framebits - opus_rc_tell(rc)); + + /* apply anti-collapse processing and denormalization to + * each coded channel */ + for (i = 0; i < s->coded_channels; i++) { + CeltFrame *frame = &s->frame[i]; + + if (anticollapse) + process_anticollapse(s, frame, s->coeffs[i]); + + celt_denormalize(s, frame, s->coeffs[i]); + } + + /* stereo -> mono downmix */ + if (s->output_channels < s->coded_channels) { + s->dsp.vector_fmac_scalar(s->coeffs[0], s->coeffs[1], 1.0, FFALIGN(frame_size, 16)); + imdct_scale = 0.5; + } else if (s->output_channels > s->coded_channels) + memcpy(s->coeffs[1], s->coeffs[0], frame_size * sizeof(float)); + + if (silence) { + for (i = 0; i < 2; i++) { + CeltFrame *frame = &s->frame[i]; + + for (j = 0; j < FF_ARRAY_ELEMS(frame->energy); j++) + frame->energy[j] = CELT_ENERGY_SILENCE; + } + memset(s->coeffs, 0, sizeof(s->coeffs)); + } + + /* transform and output for each output channel */ + for (i = 0; i < s->output_channels; i++) { + CeltFrame *frame = &s->frame[i]; + float m = frame->deemph_coeff; + + /* iMDCT and overlap-add */ + for (j = 0; j < s->blocks; j++) { + float *dst = frame->buf + 1024 + j * s->blocksize; + + ff_celt_imdct_half(imdct, dst + CELT_OVERLAP / 2, s->coeffs[i] + j, + s->blocks, imdct_scale); + s->dsp.vector_fmul_window(dst, dst, dst + CELT_OVERLAP / 2, + celt_window, CELT_OVERLAP / 2); + } + + /* postfilter */ + celt_postfilter(s, frame); + + /* deemphasis and output scaling */ + for (j = 0; j < frame_size; j++) { + float tmp = frame->buf[1024 - frame_size + j] + m; + m = tmp * CELT_DEEMPH_COEFF; + output[i][j] = tmp / 32768.; + } + frame->deemph_coeff = m; + } + + if (coded_channels == 1) + memcpy(s->frame[1].energy, s->frame[0].energy, sizeof(s->frame[0].energy)); + + for (i = 0; i < 2; i++ ) { + CeltFrame *frame = &s->frame[i]; + + if (!transient) { + memcpy(frame->prev_energy[1], frame->prev_energy[0], sizeof(frame->prev_energy[0])); + memcpy(frame->prev_energy[0], frame->energy, sizeof(frame->prev_energy[0])); + } else { + for (j = 0; j < CELT_MAX_BANDS; j++) + frame->prev_energy[0][j] = FFMIN(frame->prev_energy[0][j], frame->energy[j]); + } + + for (j = 0; j < s->startband; j++) { + frame->prev_energy[0][j] = CELT_ENERGY_SILENCE; + frame->energy[j] = 0.0; + } + for (j = s->endband; j < CELT_MAX_BANDS; j++) { + frame->prev_energy[0][j] = CELT_ENERGY_SILENCE; + frame->energy[j] = 0.0; + } + } + + s->seed = rc->range; + + return 0; +} + +void ff_celt_flush(CeltContext *s) +{ + int i, j; + + if (s->flushed) + return; + + for (i = 0; i < 2; i++) { + CeltFrame *frame = &s->frame[i]; + + for (j = 0; j < CELT_MAX_BANDS; j++) + frame->prev_energy[0][j] = frame->prev_energy[1][j] = CELT_ENERGY_SILENCE; + + memset(frame->energy, 0, sizeof(frame->energy)); + memset(frame->buf, 0, sizeof(frame->buf)); + + memset(frame->pf_gains, 0, sizeof(frame->pf_gains)); + memset(frame->pf_gains_old, 0, sizeof(frame->pf_gains_old)); + memset(frame->pf_gains_new, 0, sizeof(frame->pf_gains_new)); + + frame->deemph_coeff = 0.0; + } + s->seed = 0; + + s->flushed = 1; +} + +void ff_celt_free(CeltContext **ps) +{ + CeltContext *s = *ps; + int i; + + if (!s) + return; + + for (i = 0; i < FF_ARRAY_ELEMS(s->imdct); i++) + ff_celt_imdct_uninit(&s->imdct[i]); + + av_freep(ps); +} + +int ff_celt_init(AVCodecContext *avctx, CeltContext **ps, int output_channels) +{ + CeltContext *s; + int i, ret; + + if (output_channels != 1 && output_channels != 2) { + av_log(avctx, AV_LOG_ERROR, "Invalid number of output channels: %d\n", + output_channels); + return AVERROR(EINVAL); + } + + s = av_mallocz(sizeof(*s)); + if (!s) + return AVERROR(ENOMEM); + + s->avctx = avctx; + s->output_channels = output_channels; + + for (i = 0; i < FF_ARRAY_ELEMS(s->imdct); i++) { + ret = ff_celt_imdct_init(&s->imdct[i], i + 3); + if (ret < 0) + goto fail; + } + + avpriv_float_dsp_init(&s->dsp, avctx->flags & CODEC_FLAG_BITEXACT); + + ff_celt_flush(s); + + *ps = s; + + return 0; +fail: + ff_celt_free(&s); + return ret; +} diff --git a/libavcodec/opus_imdct.c b/libavcodec/opus_imdct.c new file mode 100644 index 0000000000..7bbaa35227 --- /dev/null +++ b/libavcodec/opus_imdct.c @@ -0,0 +1,268 @@ +/* + * Copyright (c) 2013-2014 Mozilla Corporation + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Celt non-power of 2 iMDCT + */ + +#include +#include + +#include "libavutil/attributes.h" +#include "libavutil/common.h" + +#include "fft.h" +#include "opus.h" + +// complex c = a * b +#define CMUL3(cre, cim, are, aim, bre, bim) \ +do { \ + cre = are * bre - aim * bim; \ + cim = are * bim + aim * bre; \ +} while (0) + +#define CMUL(c, a, b) CMUL3((c).re, (c).im, (a).re, (a).im, (b).re, (b).im) + +// complex c = a * b +// d = a * conjugate(b) +#define CMUL2(c, d, a, b) \ +do { \ + float are = (a).re; \ + float aim = (a).im; \ + float bre = (b).re; \ + float bim = (b).im; \ + float rr = are * bre; \ + float ri = are * bim; \ + float ir = aim * bre; \ + float ii = aim * bim; \ + (c).re = rr - ii; \ + (c).im = ri + ir; \ + (d).re = rr + ii; \ + (d).im = -ri + ir; \ +} while (0) + +struct CeltIMDCTContext { + int fft_n; + int len2; + int len4; + + FFTComplex *tmp; + + FFTComplex *twiddle_exptab; + + FFTComplex *exptab[6]; +}; + +av_cold void ff_celt_imdct_uninit(CeltIMDCTContext **ps) +{ + CeltIMDCTContext *s = *ps; + int i; + + if (!s) + return; + + for (i = 0; i < FF_ARRAY_ELEMS(s->exptab); i++) + av_freep(&s->exptab[i]); + + av_freep(&s->twiddle_exptab); + + av_freep(&s->tmp); + + av_freep(ps); +} + +av_cold int ff_celt_imdct_init(CeltIMDCTContext **ps, int N) +{ + CeltIMDCTContext *s; + int len2 = 15 * (1 << N); + int len = 2 * len2; + int i, j; + + if (len2 > CELT_MAX_FRAME_SIZE) + return AVERROR(EINVAL); + + s = av_mallocz(sizeof(*s)); + if (!s) + return AVERROR(ENOMEM); + + s->fft_n = N - 1; + s->len4 = len2 / 2; + s->len2 = len2; + + s->tmp = av_malloc(len * 2 * sizeof(*s->tmp)); + if (!s->tmp) + goto fail; + + s->twiddle_exptab = av_malloc(s->len4 * sizeof(*s->twiddle_exptab)); + if (!s->twiddle_exptab) + goto fail; + + for (i = 0; i < s->len4; i++) { + s->twiddle_exptab[i].re = cos(2 * M_PI * (i + 0.125 + s->len4) / len); + s->twiddle_exptab[i].im = sin(2 * M_PI * (i + 0.125 + s->len4) / len); + } + + for (i = 0; i < FF_ARRAY_ELEMS(s->exptab); i++) { + int N = 15 * (1 << i); + s->exptab[i] = av_malloc(sizeof(*s->exptab[i]) * FFMAX(N, 19)); + if (!s->exptab[i]) + goto fail; + + for (j = 0; j < N; j++) { + s->exptab[i][j].re = cos(2 * M_PI * j / N); + s->exptab[i][j].im = sin(2 * M_PI * j / N); + } + } + + // wrap around to simplify fft15 + for (j = 15; j < 19; j++) + s->exptab[0][j] = s->exptab[0][j - 15]; + + *ps = s; + + return 0; +fail: + ff_celt_imdct_uninit(&s); + return AVERROR(ENOMEM); +} + +static void fft5(FFTComplex *out, const FFTComplex *in, int stride) +{ + // [0] = exp(2 * i * pi / 5), [1] = exp(2 * i * pi * 2 / 5) + static const FFTComplex fact[] = { { 0.30901699437494745, 0.95105651629515353 }, + { -0.80901699437494734, 0.58778525229247325 } }; + + FFTComplex z[4][4]; + + CMUL2(z[0][0], z[0][3], in[1 * stride], fact[0]); + CMUL2(z[0][1], z[0][2], in[1 * stride], fact[1]); + CMUL2(z[1][0], z[1][3], in[2 * stride], fact[0]); + CMUL2(z[1][1], z[1][2], in[2 * stride], fact[1]); + CMUL2(z[2][0], z[2][3], in[3 * stride], fact[0]); + CMUL2(z[2][1], z[2][2], in[3 * stride], fact[1]); + CMUL2(z[3][0], z[3][3], in[4 * stride], fact[0]); + CMUL2(z[3][1], z[3][2], in[4 * stride], fact[1]); + + out[0].re = in[0].re + in[stride].re + in[2 * stride].re + in[3 * stride].re + in[4 * stride].re; + out[0].im = in[0].im + in[stride].im + in[2 * stride].im + in[3 * stride].im + in[4 * stride].im; + + out[1].re = in[0].re + z[0][0].re + z[1][1].re + z[2][2].re + z[3][3].re; + out[1].im = in[0].im + z[0][0].im + z[1][1].im + z[2][2].im + z[3][3].im; + + out[2].re = in[0].re + z[0][1].re + z[1][3].re + z[2][0].re + z[3][2].re; + out[2].im = in[0].im + z[0][1].im + z[1][3].im + z[2][0].im + z[3][2].im; + + out[3].re = in[0].re + z[0][2].re + z[1][0].re + z[2][3].re + z[3][1].re; + out[3].im = in[0].im + z[0][2].im + z[1][0].im + z[2][3].im + z[3][1].im; + + out[4].re = in[0].re + z[0][3].re + z[1][2].re + z[2][1].re + z[3][0].re; + out[4].im = in[0].im + z[0][3].im + z[1][2].im + z[2][1].im + z[3][0].im; +} + +static void fft15(CeltIMDCTContext *s, FFTComplex *out, const FFTComplex *in, int stride) +{ + const FFTComplex *exptab = s->exptab[0]; + FFTComplex tmp[5]; + FFTComplex tmp1[5]; + FFTComplex tmp2[5]; + int k; + + fft5(tmp, in, stride * 3); + fft5(tmp1, in + stride, stride * 3); + fft5(tmp2, in + 2 * stride, stride * 3); + + for (k = 0; k < 5; k++) { + FFTComplex t1, t2; + + CMUL(t1, tmp1[k], exptab[k]); + CMUL(t2, tmp2[k], exptab[2 * k]); + out[k].re = tmp[k].re + t1.re + t2.re; + out[k].im = tmp[k].im + t1.im + t2.im; + + CMUL(t1, tmp1[k], exptab[k + 5]); + CMUL(t2, tmp2[k], exptab[2 * (k + 5)]); + out[k + 5].re = tmp[k].re + t1.re + t2.re; + out[k + 5].im = tmp[k].im + t1.im + t2.im; + + CMUL(t1, tmp1[k], exptab[k + 10]); + CMUL(t2, tmp2[k], exptab[2 * k + 5]); + out[k + 10].re = tmp[k].re + t1.re + t2.re; + out[k + 10].im = tmp[k].im + t1.im + t2.im; + } +} + +/* + * FFT of the length 15 * (2^N) + */ +static void fft_calc(CeltIMDCTContext *s, FFTComplex *out, const FFTComplex *in, int N, int stride) +{ + if (N) { + const FFTComplex *exptab = s->exptab[N]; + const int len2 = 15 * (1 << (N - 1)); + int k; + + fft_calc(s, out, in, N - 1, stride * 2); + fft_calc(s, out + len2, in + stride, N - 1, stride * 2); + + for (k = 0; k < len2; k++) { + FFTComplex t; + + CMUL(t, out[len2 + k], exptab[k]); + + out[len2 + k].re = out[k].re - t.re; + out[len2 + k].im = out[k].im - t.im; + + out[k].re += t.re; + out[k].im += t.im; + } + } else + fft15(s, out, in, stride); +} + +void ff_celt_imdct_half(CeltIMDCTContext *s, float *dst, const float *src, + int stride, float scale) +{ + FFTComplex *z = (FFTComplex *)dst; + const int len8 = s->len4 / 2; + const float *in1 = src; + const float *in2 = src + (s->len2 - 1) * stride; + int i; + + for (i = 0; i < s->len4; i++) { + FFTComplex tmp = { *in2, *in1 }; + CMUL(s->tmp[i], tmp, s->twiddle_exptab[i]); + in1 += 2 * stride; + in2 -= 2 * stride; + } + + fft_calc(s, z, s->tmp, s->fft_n, 1); + + for (i = 0; i < len8; i++) { + float r0, i0, r1, i1; + + CMUL3(r0, i1, z[len8 - i - 1].im, z[len8 - i - 1].re, s->twiddle_exptab[len8 - i - 1].im, s->twiddle_exptab[len8 - i - 1].re); + CMUL3(r1, i0, z[len8 + i].im, z[len8 + i].re, s->twiddle_exptab[len8 + i].im, s->twiddle_exptab[len8 + i].re); + z[len8 - i - 1].re = scale * r0; + z[len8 - i - 1].im = scale * i0; + z[len8 + i].re = scale * r1; + z[len8 + i].im = scale * i1; + } +} diff --git a/libavcodec/opus_parser.c b/libavcodec/opus_parser.c new file mode 100644 index 0000000000..8a2bc22043 --- /dev/null +++ b/libavcodec/opus_parser.c @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2013-2014 Mozilla Corporation + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Opus parser + * + * Determines the duration for each packet. + */ + +#include "avcodec.h" +#include "opus.h" + +typedef struct OpusParseContext { + OpusContext ctx; + OpusPacket pkt; + int extradata_parsed; +} OpusParseContext; + +static int opus_parse(AVCodecParserContext *ctx, AVCodecContext *avctx, + const uint8_t **poutbuf, int *poutbuf_size, + const uint8_t *buf, int buf_size) +{ + OpusParseContext *s = ctx->priv_data; + int ret; + + if (!buf_size) + return 0; + + if (avctx->extradata && !s->extradata_parsed) { + ret = ff_opus_parse_extradata(avctx, &s->ctx); + if (ret < 0) { + av_log(avctx, AV_LOG_ERROR, "Error parsing Ogg extradata.\n"); + goto fail; + } + av_freep(&s->ctx.channel_maps); + s->extradata_parsed = 1; + } + + ret = ff_opus_parse_packet(&s->pkt, buf, buf_size, s->ctx.nb_streams > 1); + if (ret < 0) { + av_log(avctx, AV_LOG_ERROR, "Error parsing Opus packet header.\n"); + goto fail; + } + + ctx->duration = s->pkt.frame_count * s->pkt.frame_duration; + +fail: + *poutbuf = buf; + *poutbuf_size = buf_size; + return buf_size; +} + +AVCodecParser ff_opus_parser = { + .codec_ids = { AV_CODEC_ID_OPUS }, + .priv_data_size = sizeof(OpusParseContext), + .parser_parse = opus_parse, +}; diff --git a/libavcodec/opus_silk.c b/libavcodec/opus_silk.c new file mode 100644 index 0000000000..3552484542 --- /dev/null +++ b/libavcodec/opus_silk.c @@ -0,0 +1,1597 @@ +/* + * Copyright (c) 2012 Andrew D'Addesio + * Copyright (c) 2013-2014 Mozilla Corporation + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Opus SILK decoder + */ + +#include + +#include "opus.h" + +typedef struct SilkFrame { + int coded; + int log_gain; + int16_t nlsf[16]; + float lpc[16]; + + float output [2 * SILK_HISTORY]; + float lpc_history[2 * SILK_HISTORY]; + int primarylag; + + int prev_voiced; +} SilkFrame; + +struct SilkContext { + AVCodecContext *avctx; + int output_channels; + + int midonly; + int subframes; + int sflength; + int flength; + int nlsf_interp_factor; + + enum OpusBandwidth bandwidth; + int wb; + + SilkFrame frame[2]; + float prev_stereo_weights[2]; + float stereo_weights[2]; + + int prev_coded_channels; +}; + +static const uint16_t silk_model_stereo_s1[] = { + 256, 7, 9, 10, 11, 12, 22, 46, 54, 55, 56, 59, 82, 174, 197, 200, + 201, 202, 210, 234, 244, 245, 246, 247, 249, 256 +}; + +static const uint16_t silk_model_stereo_s2[] = {256, 85, 171, 256}; + +static const uint16_t silk_model_stereo_s3[] = {256, 51, 102, 154, 205, 256}; + +static const uint16_t silk_model_mid_only[] = {256, 192, 256}; + +static const uint16_t silk_model_frame_type_inactive[] = {256, 26, 256}; + +static const uint16_t silk_model_frame_type_active[] = {256, 24, 98, 246, 256}; + +static const uint16_t silk_model_gain_highbits[3][9] = { + {256, 32, 144, 212, 241, 253, 254, 255, 256}, + {256, 2, 19, 64, 124, 186, 233, 252, 256}, + {256, 1, 4, 30, 101, 195, 245, 254, 256} +}; + +static const uint16_t silk_model_gain_lowbits[] = {256, 32, 64, 96, 128, 160, 192, 224, 256}; + +static const uint16_t silk_model_gain_delta[] = { + 256, 6, 11, 22, 53, 185, 206, 214, 218, 221, 223, 225, 227, 228, 229, 230, + 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, + 247, 248, 249, 250, 251, 252, 253, 254, 255, 256 +}; +static const uint16_t silk_model_lsf_s1[2][2][33] = { + { + { // NB or MB, unvoiced + 256, 44, 78, 108, 127, 148, 160, 171, 174, 177, 179, 195, 197, 199, 200, 205, + 207, 208, 211, 214, 215, 216, 218, 220, 222, 225, 226, 235, 244, 246, 253, 255, 256 + }, { // NB or MB, voiced + 256, 1, 11, 12, 20, 23, 31, 39, 53, 66, 80, 81, 95, 107, 120, 131, + 142, 154, 165, 175, 185, 196, 204, 213, 221, 228, 236, 237, 238, 244, 245, 251, 256 + } + }, { + { // WB, unvoiced + 256, 31, 52, 55, 72, 73, 81, 98, 102, 103, 121, 137, 141, 143, 146, 147, + 157, 158, 161, 177, 188, 204, 206, 208, 211, 213, 224, 225, 229, 238, 246, 253, 256 + }, { // WB, voiced + 256, 1, 5, 21, 26, 44, 55, 60, 74, 89, 90, 93, 105, 118, 132, 146, + 152, 166, 178, 180, 186, 187, 199, 211, 222, 232, 235, 245, 250, 251, 252, 253, 256 + } + } +}; + +static const uint16_t silk_model_lsf_s2[32][10] = { + // NB, MB + { 256, 1, 2, 3, 18, 242, 253, 254, 255, 256 }, + { 256, 1, 2, 4, 38, 221, 253, 254, 255, 256 }, + { 256, 1, 2, 6, 48, 197, 252, 254, 255, 256 }, + { 256, 1, 2, 10, 62, 185, 246, 254, 255, 256 }, + { 256, 1, 4, 20, 73, 174, 248, 254, 255, 256 }, + { 256, 1, 4, 21, 76, 166, 239, 254, 255, 256 }, + { 256, 1, 8, 32, 85, 159, 226, 252, 255, 256 }, + { 256, 1, 2, 20, 83, 161, 219, 249, 255, 256 }, + + // WB + { 256, 1, 2, 3, 12, 244, 253, 254, 255, 256 }, + { 256, 1, 2, 4, 32, 218, 253, 254, 255, 256 }, + { 256, 1, 2, 5, 47, 199, 252, 254, 255, 256 }, + { 256, 1, 2, 12, 61, 187, 252, 254, 255, 256 }, + { 256, 1, 5, 24, 72, 172, 249, 254, 255, 256 }, + { 256, 1, 2, 16, 70, 170, 242, 254, 255, 256 }, + { 256, 1, 2, 17, 78, 165, 226, 251, 255, 256 }, + { 256, 1, 8, 29, 79, 156, 237, 254, 255, 256 } +}; + +static const uint16_t silk_model_lsf_s2_ext[] = { 256, 156, 216, 240, 249, 253, 255, 256 }; + +static const uint16_t silk_model_lsf_interpolation_offset[] = { 256, 13, 35, 64, 75, 256 }; + +static const uint16_t silk_model_pitch_highbits[] = { + 256, 3, 6, 12, 23, 44, 74, 106, 125, 136, 146, 158, 171, 184, 196, 207, + 216, 224, 231, 237, 241, 243, 245, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256 +}; + +static const uint16_t silk_model_pitch_lowbits_nb[]= { 256, 64, 128, 192, 256 }; + +static const uint16_t silk_model_pitch_lowbits_mb[]= { 256, 43, 85, 128, 171, 213, 256 }; + +static const uint16_t silk_model_pitch_lowbits_wb[]= { 256, 32, 64, 96, 128, 160, 192, 224, 256 }; + +static const uint16_t silk_model_pitch_delta[] = { + 256, 46, 48, 50, 53, 57, 63, 73, 88, 114, 152, 182, 204, 219, 229, 236, + 242, 246, 250, 252, 254, 256 +}; + +static const uint16_t silk_model_pitch_contour_nb10ms[] = { 256, 143, 193, 256 }; + +static const uint16_t silk_model_pitch_contour_nb20ms[] = { + 256, 68, 80, 101, 118, 137, 159, 189, 213, 230, 246, 256 +}; + +static const uint16_t silk_model_pitch_contour_mbwb10ms[] = { + 256, 91, 137, 176, 195, 209, 221, 229, 236, 242, 247, 252, 256 +}; + +static const uint16_t silk_model_pitch_contour_mbwb20ms[] = { + 256, 33, 55, 73, 89, 104, 118, 132, 145, 158, 168, 177, 186, 194, 200, 206, + 212, 217, 221, 225, 229, 232, 235, 238, 240, 242, 244, 246, 248, 250, 252, 253, + 254, 255, 256 +}; + +static const uint16_t silk_model_ltp_filter[] = { 256, 77, 157, 256 }; + +static const uint16_t silk_model_ltp_filter0_sel[] = { + 256, 185, 200, 213, 226, 235, 244, 250, 256 +}; + +static const uint16_t silk_model_ltp_filter1_sel[] = { + 256, 57, 91, 112, 132, 147, 160, 172, 185, 195, 205, 214, 224, 233, 241, 248, 256 +}; + +static const uint16_t silk_model_ltp_filter2_sel[] = { + 256, 15, 31, 45, 57, 69, 81, 92, 103, 114, 124, 133, 142, 151, 160, 168, + 176, 184, 192, 199, 206, 212, 218, 223, 227, 232, 236, 240, 244, 247, 251, 254, 256 +}; + +static const uint16_t silk_model_ltp_scale_index[] = { 256, 128, 192, 256 }; + +static const uint16_t silk_model_lcg_seed[] = { 256, 64, 128, 192, 256 }; + +static const uint16_t silk_model_exc_rate[2][10] = { + { 256, 15, 66, 78, 124, 169, 182, 215, 242, 256 }, // unvoiced + { 256, 33, 63, 99, 116, 150, 199, 217, 238, 256 } // voiced +}; + +static const uint16_t silk_model_pulse_count[11][19] = { + { 256, 131, 205, 230, 238, 241, 244, 245, 246, + 247, 248, 249, 250, 251, 252, 253, 254, 255, 256 }, + { 256, 58, 151, 211, 234, 241, 244, 245, 246, + 247, 248, 249, 250, 251, 252, 253, 254, 255, 256 }, + { 256, 43, 94, 140, 173, 197, 213, 224, 232, + 238, 241, 244, 247, 249, 250, 251, 253, 254, 256 }, + { 256, 17, 69, 140, 197, 228, 240, 245, 246, + 247, 248, 249, 250, 251, 252, 253, 254, 255, 256 }, + { 256, 6, 27, 68, 121, 170, 205, 226, 237, + 243, 246, 248, 250, 251, 252, 253, 254, 255, 256 }, + { 256, 7, 21, 43, 71, 100, 128, 153, 173, + 190, 203, 214, 223, 230, 235, 239, 243, 246, 256 }, + { 256, 2, 7, 21, 50, 92, 138, 179, 210, + 229, 240, 246, 249, 251, 252, 253, 254, 255, 256 }, + { 256, 1, 3, 7, 17, 36, 65, 100, 137, + 171, 199, 219, 233, 241, 246, 250, 252, 254, 256 }, + { 256, 1, 3, 5, 10, 19, 33, 53, 77, + 104, 132, 158, 181, 201, 216, 227, 235, 241, 256 }, + { 256, 1, 2, 3, 9, 36, 94, 150, 189, + 214, 228, 238, 244, 247, 250, 252, 253, 254, 256 }, + { 256, 2, 3, 9, 36, 94, 150, 189, 214, + 228, 238, 244, 247, 250, 252, 253, 254, 256, 256 } +}; + +static const uint16_t silk_model_pulse_location[4][168] = { + { + 256, 126, 256, + 256, 56, 198, 256, + 256, 25, 126, 230, 256, + 256, 12, 72, 180, 244, 256, + 256, 7, 42, 126, 213, 250, 256, + 256, 4, 24, 83, 169, 232, 253, 256, + 256, 3, 15, 53, 125, 200, 242, 254, 256, + 256, 2, 10, 35, 89, 162, 221, 248, 255, 256, + 256, 2, 7, 24, 63, 126, 191, 233, 251, 255, 256, + 256, 1, 5, 17, 45, 94, 157, 211, 241, 252, 255, 256, + 256, 1, 5, 13, 33, 70, 125, 182, 223, 245, 253, 255, 256, + 256, 1, 4, 11, 26, 54, 98, 151, 199, 232, 248, 254, 255, 256, + 256, 1, 3, 9, 21, 42, 77, 124, 172, 212, 237, 249, 254, 255, 256, + 256, 1, 2, 6, 16, 33, 60, 97, 144, 187, 220, 241, 250, 254, 255, 256, + 256, 1, 2, 3, 11, 25, 47, 80, 120, 163, 201, 229, 245, 253, 254, 255, 256, + 256, 1, 2, 3, 4, 17, 35, 62, 98, 139, 180, 214, 238, 252, 253, 254, 255, 256 + },{ + 256, 127, 256, + 256, 53, 202, 256, + 256, 22, 127, 233, 256, + 256, 11, 72, 183, 246, 256, + 256, 6, 41, 127, 215, 251, 256, + 256, 4, 24, 83, 170, 232, 253, 256, + 256, 3, 16, 56, 127, 200, 241, 254, 256, + 256, 3, 12, 39, 92, 162, 218, 246, 255, 256, + 256, 3, 11, 30, 67, 124, 185, 229, 249, 255, 256, + 256, 3, 10, 25, 53, 97, 151, 200, 233, 250, 255, 256, + 256, 1, 8, 21, 43, 77, 123, 171, 209, 237, 251, 255, 256, + 256, 1, 2, 13, 35, 62, 97, 139, 186, 219, 244, 254, 255, 256, + 256, 1, 2, 8, 22, 48, 85, 128, 171, 208, 234, 248, 254, 255, 256, + 256, 1, 2, 6, 16, 36, 67, 107, 149, 189, 220, 240, 250, 254, 255, 256, + 256, 1, 2, 5, 13, 29, 55, 90, 128, 166, 201, 227, 243, 251, 254, 255, 256, + 256, 1, 2, 4, 10, 22, 43, 73, 109, 147, 183, 213, 234, 246, 252, 254, 255, 256 + },{ + 256, 127, 256, + 256, 49, 206, 256, + 256, 20, 127, 236, 256, + 256, 11, 71, 184, 246, 256, + 256, 7, 43, 127, 214, 250, 256, + 256, 6, 30, 87, 169, 229, 252, 256, + 256, 5, 23, 62, 126, 194, 236, 252, 256, + 256, 6, 20, 49, 96, 157, 209, 239, 253, 256, + 256, 1, 16, 39, 74, 125, 175, 215, 245, 255, 256, + 256, 1, 2, 23, 55, 97, 149, 195, 236, 254, 255, 256, + 256, 1, 7, 23, 50, 86, 128, 170, 206, 233, 249, 255, 256, + 256, 1, 6, 18, 39, 70, 108, 148, 186, 217, 238, 250, 255, 256, + 256, 1, 4, 13, 30, 56, 90, 128, 166, 200, 226, 243, 252, 255, 256, + 256, 1, 4, 11, 25, 47, 76, 110, 146, 180, 209, 231, 245, 252, 255, 256, + 256, 1, 3, 8, 19, 37, 62, 93, 128, 163, 194, 219, 237, 248, 253, 255, 256, + 256, 1, 2, 6, 15, 30, 51, 79, 111, 145, 177, 205, 226, 241, 250, 254, 255, 256 + },{ + 256, 128, 256, + 256, 42, 214, 256, + 256, 21, 128, 235, 256, + 256, 12, 72, 184, 245, 256, + 256, 8, 42, 128, 214, 249, 256, + 256, 8, 31, 86, 176, 231, 251, 256, + 256, 5, 20, 58, 130, 202, 238, 253, 256, + 256, 6, 18, 45, 97, 174, 221, 241, 251, 256, + 256, 6, 25, 53, 88, 128, 168, 203, 231, 250, 256, + 256, 4, 18, 40, 71, 108, 148, 185, 216, 238, 252, 256, + 256, 3, 13, 31, 57, 90, 128, 166, 199, 225, 243, 253, 256, + 256, 2, 10, 23, 44, 73, 109, 147, 183, 212, 233, 246, 254, 256, + 256, 1, 6, 16, 33, 58, 90, 128, 166, 198, 223, 240, 250, 255, 256, + 256, 1, 5, 12, 25, 46, 75, 110, 146, 181, 210, 231, 244, 251, 255, 256, + 256, 1, 3, 8, 18, 35, 60, 92, 128, 164, 196, 221, 238, 248, 253, 255, 256, + 256, 1, 3, 7, 14, 27, 48, 76, 110, 146, 180, 208, 229, 242, 249, 253, 255, 256 + } +}; + +static const uint16_t silk_model_excitation_lsb[] = {256, 136, 256}; + +static const uint16_t silk_model_excitation_sign[3][2][7][3] = { + { // Inactive + { // Low offset + {256, 2, 256}, + {256, 207, 256}, + {256, 189, 256}, + {256, 179, 256}, + {256, 174, 256}, + {256, 163, 256}, + {256, 157, 256} + }, { // High offset + {256, 58, 256}, + {256, 245, 256}, + {256, 238, 256}, + {256, 232, 256}, + {256, 225, 256}, + {256, 220, 256}, + {256, 211, 256} + } + }, { // Unvoiced + { // Low offset + {256, 1, 256}, + {256, 210, 256}, + {256, 190, 256}, + {256, 178, 256}, + {256, 169, 256}, + {256, 162, 256}, + {256, 152, 256} + }, { // High offset + {256, 48, 256}, + {256, 242, 256}, + {256, 235, 256}, + {256, 224, 256}, + {256, 214, 256}, + {256, 205, 256}, + {256, 190, 256} + } + }, { // Voiced + { // Low offset + {256, 1, 256}, + {256, 162, 256}, + {256, 152, 256}, + {256, 147, 256}, + {256, 144, 256}, + {256, 141, 256}, + {256, 138, 256} + }, { // High offset + {256, 8, 256}, + {256, 203, 256}, + {256, 187, 256}, + {256, 176, 256}, + {256, 168, 256}, + {256, 161, 256}, + {256, 154, 256} + } + } +}; + +static const int16_t silk_stereo_weights[] = { + -13732, -10050, -8266, -7526, -6500, -5000, -2950, -820, + 820, 2950, 5000, 6500, 7526, 8266, 10050, 13732 +}; + +static const uint8_t silk_lsf_s2_model_sel_nbmb[32][10] = { + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + { 1, 3, 1, 2, 2, 1, 2, 1, 1, 1 }, + { 2, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, + { 1, 2, 2, 2, 2, 1, 2, 1, 1, 1 }, + { 2, 3, 3, 3, 3, 2, 2, 2, 2, 2 }, + { 0, 5, 3, 3, 2, 2, 2, 2, 1, 1 }, + { 0, 2, 2, 2, 2, 2, 2, 2, 2, 1 }, + { 2, 3, 6, 4, 4, 4, 5, 4, 5, 5 }, + { 2, 4, 5, 5, 4, 5, 4, 6, 4, 4 }, + { 2, 4, 4, 7, 4, 5, 4, 5, 5, 4 }, + { 4, 3, 3, 3, 2, 3, 2, 2, 2, 2 }, + { 1, 5, 5, 6, 4, 5, 4, 5, 5, 5 }, + { 2, 7, 4, 6, 5, 5, 5, 5, 5, 5 }, + { 2, 7, 5, 5, 5, 5, 5, 6, 5, 4 }, + { 3, 3, 5, 4, 4, 5, 4, 5, 4, 4 }, + { 2, 3, 3, 5, 5, 4, 4, 4, 4, 4 }, + { 2, 4, 4, 6, 4, 5, 4, 5, 5, 5 }, + { 2, 5, 4, 6, 5, 5, 5, 4, 5, 4 }, + { 2, 7, 4, 5, 4, 5, 4, 5, 5, 5 }, + { 2, 5, 4, 6, 7, 6, 5, 6, 5, 4 }, + { 3, 6, 7, 4, 6, 5, 5, 6, 4, 5 }, + { 2, 7, 6, 4, 4, 4, 5, 4, 5, 5 }, + { 4, 5, 5, 4, 6, 6, 5, 6, 5, 4 }, + { 2, 5, 5, 6, 5, 6, 4, 6, 4, 4 }, + { 4, 5, 5, 5, 3, 7, 4, 5, 5, 4 }, + { 2, 3, 4, 5, 5, 6, 4, 5, 5, 4 }, + { 2, 3, 2, 3, 3, 4, 2, 3, 3, 3 }, + { 1, 1, 2, 2, 2, 2, 2, 3, 2, 2 }, + { 4, 5, 5, 6, 6, 6, 5, 6, 4, 5 }, + { 3, 5, 5, 4, 4, 4, 4, 3, 3, 2 }, + { 2, 5, 3, 7, 5, 5, 4, 4, 5, 4 }, + { 4, 4, 5, 4, 5, 6, 5, 6, 5, 4 } +}; + +static const uint8_t silk_lsf_s2_model_sel_wb[32][16] = { + { 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 }, + { 10, 11, 11, 11, 11, 11, 10, 10, 10, 10, 10, 9, 9, 9, 8, 11 }, + { 10, 13, 13, 11, 15, 12, 12, 13, 10, 13, 12, 13, 13, 12, 11, 11 }, + { 8, 10, 9, 10, 10, 9, 9, 9, 9, 9, 8, 8, 8, 8, 8, 9 }, + { 8, 14, 13, 12, 14, 12, 15, 13, 12, 12, 12, 13, 13, 12, 12, 11 }, + { 8, 11, 13, 13, 12, 11, 11, 13, 11, 11, 11, 11, 11, 11, 10, 12 }, + { 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 }, + { 8, 10, 14, 11, 15, 10, 13, 11, 12, 13, 13, 12, 11, 11, 10, 11 }, + { 8, 14, 10, 14, 14, 12, 13, 12, 14, 13, 12, 12, 13, 11, 11, 11 }, + { 10, 9, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 }, + { 8, 9, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9 }, + { 10, 10, 11, 12, 13, 11, 11, 11, 11, 11, 11, 11, 10, 10, 9, 11 }, + { 10, 10, 11, 11, 12, 11, 11, 11, 11, 11, 11, 11, 11, 10, 9, 11 }, + { 11, 12, 12, 12, 14, 12, 12, 13, 11, 13, 12, 12, 13, 12, 11, 12 }, + { 8, 14, 12, 13, 12, 15, 13, 10, 14, 13, 15, 12, 12, 11, 13, 11 }, + { 8, 9, 8, 9, 9, 9, 9, 9, 9, 9, 8, 8, 8, 8, 9, 8 }, + { 9, 14, 13, 15, 13, 12, 13, 11, 12, 13, 12, 12, 12, 11, 11, 12 }, + { 9, 11, 11, 12, 12, 11, 11, 13, 10, 11, 11, 13, 13, 13, 11, 12 }, + { 10, 11, 11, 10, 10, 10, 11, 10, 9, 10, 9, 10, 9, 9, 9, 12 }, + { 8, 10, 11, 13, 11, 11, 10, 10, 10, 9, 9, 8, 8, 8, 8, 8 }, + { 11, 12, 11, 13, 11, 11, 10, 10, 9, 9, 9, 9, 9, 10, 10, 12 }, + { 10, 14, 11, 15, 15, 12, 13, 12, 13, 11, 13, 11, 11, 10, 11, 11 }, + { 10, 11, 13, 14, 14, 11, 13, 11, 12, 12, 11, 11, 11, 11, 10, 12 }, + { 9, 11, 11, 12, 12, 12, 12, 11, 13, 13, 13, 11, 9, 9, 9, 9 }, + { 10, 13, 11, 14, 14, 12, 15, 12, 12, 13, 11, 12, 12, 11, 11, 11 }, + { 8, 14, 9, 9, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 }, + { 8, 14, 14, 11, 13, 10, 13, 13, 11, 12, 12, 15, 15, 12, 12, 12 }, + { 11, 11, 15, 11, 13, 12, 11, 11, 11, 10, 10, 11, 11, 11, 10, 11 }, + { 8, 8, 9, 8, 8, 8, 10, 9, 10, 9, 9, 10, 10, 10, 9, 9 }, + { 8, 11, 10, 13, 11, 11, 10, 11, 10, 9, 8, 8, 9, 8, 8, 9 }, + { 11, 13, 13, 12, 15, 13, 11, 11, 10, 11, 10, 10, 9, 8, 9, 8 }, + { 10, 11, 13, 11, 12, 11, 11, 11, 10, 9, 10, 14, 12, 8, 8, 8 } +}; + +static const uint8_t silk_lsf_pred_weights_nbmb[2][9] = { + {179, 138, 140, 148, 151, 149, 153, 151, 163}, + {116, 67, 82, 59, 92, 72, 100, 89, 92} +}; + +static const uint8_t silk_lsf_pred_weights_wb[2][15] = { + {175, 148, 160, 176, 178, 173, 174, 164, 177, 174, 196, 182, 198, 192, 182}, + { 68, 62, 66, 60, 72, 117, 85, 90, 118, 136, 151, 142, 160, 142, 155} +}; + +static const uint8_t silk_lsf_weight_sel_nbmb[32][9] = { + { 0, 1, 0, 0, 0, 0, 0, 0, 0 }, + { 1, 0, 0, 0, 0, 0, 0, 0, 0 }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + { 1, 1, 1, 0, 0, 0, 0, 1, 0 }, + { 0, 1, 0, 0, 0, 0, 0, 0, 0 }, + { 0, 1, 0, 0, 0, 0, 0, 0, 0 }, + { 1, 0, 1, 1, 0, 0, 0, 1, 0 }, + { 0, 1, 1, 0, 0, 1, 1, 0, 0 }, + { 0, 0, 1, 1, 0, 1, 0, 1, 1 }, + { 0, 0, 1, 1, 0, 0, 1, 1, 1 }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + { 0, 1, 0, 1, 1, 1, 1, 1, 0 }, + { 0, 1, 0, 1, 1, 1, 1, 1, 0 }, + { 0, 1, 1, 1, 1, 1, 1, 1, 0 }, + { 1, 0, 1, 1, 0, 1, 1, 1, 1 }, + { 0, 1, 1, 1, 1, 1, 0, 1, 0 }, + { 0, 0, 1, 1, 0, 1, 0, 1, 0 }, + { 0, 0, 1, 1, 1, 0, 1, 1, 1 }, + { 0, 1, 1, 0, 0, 1, 1, 1, 0 }, + { 0, 0, 0, 1, 1, 1, 0, 1, 0 }, + { 0, 1, 1, 0, 0, 1, 0, 1, 0 }, + { 0, 1, 1, 0, 0, 0, 1, 1, 0 }, + { 0, 0, 0, 0, 0, 1, 1, 1, 1 }, + { 0, 0, 1, 1, 0, 0, 0, 1, 1 }, + { 0, 0, 0, 1, 0, 1, 1, 1, 1 }, + { 0, 1, 1, 1, 1, 1, 1, 1, 0 }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + { 0, 0, 1, 0, 1, 1, 0, 1, 0 }, + { 1, 0, 0, 1, 0, 0, 0, 0, 0 }, + { 0, 0, 0, 1, 1, 0, 1, 0, 1 }, + { 1, 0, 1, 1, 0, 1, 1, 1, 1 } +}; + +static const uint8_t silk_lsf_weight_sel_wb[32][15] = { + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + { 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0 }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, + { 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0 }, + { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0 }, + { 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1 }, + { 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1 }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }, + { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + { 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0 }, + { 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0 }, + { 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0 }, + { 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1 }, + { 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0 }, + { 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0 }, + { 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0 }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + { 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0 }, + { 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0 }, + { 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0 }, + { 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }, + { 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1 }, + { 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1 }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }, + { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0 }, + { 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0 } +}; + +static const uint8_t silk_lsf_codebook_nbmb[32][10] = { + { 12, 35, 60, 83, 108, 132, 157, 180, 206, 228 }, + { 15, 32, 55, 77, 101, 125, 151, 175, 201, 225 }, + { 19, 42, 66, 89, 114, 137, 162, 184, 209, 230 }, + { 12, 25, 50, 72, 97, 120, 147, 172, 200, 223 }, + { 26, 44, 69, 90, 114, 135, 159, 180, 205, 225 }, + { 13, 22, 53, 80, 106, 130, 156, 180, 205, 228 }, + { 15, 25, 44, 64, 90, 115, 142, 168, 196, 222 }, + { 19, 24, 62, 82, 100, 120, 145, 168, 190, 214 }, + { 22, 31, 50, 79, 103, 120, 151, 170, 203, 227 }, + { 21, 29, 45, 65, 106, 124, 150, 171, 196, 224 }, + { 30, 49, 75, 97, 121, 142, 165, 186, 209, 229 }, + { 19, 25, 52, 70, 93, 116, 143, 166, 192, 219 }, + { 26, 34, 62, 75, 97, 118, 145, 167, 194, 217 }, + { 25, 33, 56, 70, 91, 113, 143, 165, 196, 223 }, + { 21, 34, 51, 72, 97, 117, 145, 171, 196, 222 }, + { 20, 29, 50, 67, 90, 117, 144, 168, 197, 221 }, + { 22, 31, 48, 66, 95, 117, 146, 168, 196, 222 }, + { 24, 33, 51, 77, 116, 134, 158, 180, 200, 224 }, + { 21, 28, 70, 87, 106, 124, 149, 170, 194, 217 }, + { 26, 33, 53, 64, 83, 117, 152, 173, 204, 225 }, + { 27, 34, 65, 95, 108, 129, 155, 174, 210, 225 }, + { 20, 26, 72, 99, 113, 131, 154, 176, 200, 219 }, + { 34, 43, 61, 78, 93, 114, 155, 177, 205, 229 }, + { 23, 29, 54, 97, 124, 138, 163, 179, 209, 229 }, + { 30, 38, 56, 89, 118, 129, 158, 178, 200, 231 }, + { 21, 29, 49, 63, 85, 111, 142, 163, 193, 222 }, + { 27, 48, 77, 103, 133, 158, 179, 196, 215, 232 }, + { 29, 47, 74, 99, 124, 151, 176, 198, 220, 237 }, + { 33, 42, 61, 76, 93, 121, 155, 174, 207, 225 }, + { 29, 53, 87, 112, 136, 154, 170, 188, 208, 227 }, + { 24, 30, 52, 84, 131, 150, 166, 186, 203, 229 }, + { 37, 48, 64, 84, 104, 118, 156, 177, 201, 230 } +}; + +static const uint8_t silk_lsf_codebook_wb[32][16] = { + { 7, 23, 38, 54, 69, 85, 100, 116, 131, 147, 162, 178, 193, 208, 223, 239 }, + { 13, 25, 41, 55, 69, 83, 98, 112, 127, 142, 157, 171, 187, 203, 220, 236 }, + { 15, 21, 34, 51, 61, 78, 92, 106, 126, 136, 152, 167, 185, 205, 225, 240 }, + { 10, 21, 36, 50, 63, 79, 95, 110, 126, 141, 157, 173, 189, 205, 221, 237 }, + { 17, 20, 37, 51, 59, 78, 89, 107, 123, 134, 150, 164, 184, 205, 224, 240 }, + { 10, 15, 32, 51, 67, 81, 96, 112, 129, 142, 158, 173, 189, 204, 220, 236 }, + { 8, 21, 37, 51, 65, 79, 98, 113, 126, 138, 155, 168, 179, 192, 209, 218 }, + { 12, 15, 34, 55, 63, 78, 87, 108, 118, 131, 148, 167, 185, 203, 219, 236 }, + { 16, 19, 32, 36, 56, 79, 91, 108, 118, 136, 154, 171, 186, 204, 220, 237 }, + { 11, 28, 43, 58, 74, 89, 105, 120, 135, 150, 165, 180, 196, 211, 226, 241 }, + { 6, 16, 33, 46, 60, 75, 92, 107, 123, 137, 156, 169, 185, 199, 214, 225 }, + { 11, 19, 30, 44, 57, 74, 89, 105, 121, 135, 152, 169, 186, 202, 218, 234 }, + { 12, 19, 29, 46, 57, 71, 88, 100, 120, 132, 148, 165, 182, 199, 216, 233 }, + { 17, 23, 35, 46, 56, 77, 92, 106, 123, 134, 152, 167, 185, 204, 222, 237 }, + { 14, 17, 45, 53, 63, 75, 89, 107, 115, 132, 151, 171, 188, 206, 221, 240 }, + { 9, 16, 29, 40, 56, 71, 88, 103, 119, 137, 154, 171, 189, 205, 222, 237 }, + { 16, 19, 36, 48, 57, 76, 87, 105, 118, 132, 150, 167, 185, 202, 218, 236 }, + { 12, 17, 29, 54, 71, 81, 94, 104, 126, 136, 149, 164, 182, 201, 221, 237 }, + { 15, 28, 47, 62, 79, 97, 115, 129, 142, 155, 168, 180, 194, 208, 223, 238 }, + { 8, 14, 30, 45, 62, 78, 94, 111, 127, 143, 159, 175, 192, 207, 223, 239 }, + { 17, 30, 49, 62, 79, 92, 107, 119, 132, 145, 160, 174, 190, 204, 220, 235 }, + { 14, 19, 36, 45, 61, 76, 91, 108, 121, 138, 154, 172, 189, 205, 222, 238 }, + { 12, 18, 31, 45, 60, 76, 91, 107, 123, 138, 154, 171, 187, 204, 221, 236 }, + { 13, 17, 31, 43, 53, 70, 83, 103, 114, 131, 149, 167, 185, 203, 220, 237 }, + { 17, 22, 35, 42, 58, 78, 93, 110, 125, 139, 155, 170, 188, 206, 224, 240 }, + { 8, 15, 34, 50, 67, 83, 99, 115, 131, 146, 162, 178, 193, 209, 224, 239 }, + { 13, 16, 41, 66, 73, 86, 95, 111, 128, 137, 150, 163, 183, 206, 225, 241 }, + { 17, 25, 37, 52, 63, 75, 92, 102, 119, 132, 144, 160, 175, 191, 212, 231 }, + { 19, 31, 49, 65, 83, 100, 117, 133, 147, 161, 174, 187, 200, 213, 227, 242 }, + { 18, 31, 52, 68, 88, 103, 117, 126, 138, 149, 163, 177, 192, 207, 223, 239 }, + { 16, 29, 47, 61, 76, 90, 106, 119, 133, 147, 161, 176, 193, 209, 224, 240 }, + { 15, 21, 35, 50, 61, 73, 86, 97, 110, 119, 129, 141, 175, 198, 218, 237 } +}; + +static const uint16_t silk_lsf_min_spacing_nbmb[] = { + 250, 3, 6, 3, 3, 3, 4, 3, 3, 3, 461 +}; + +static const uint16_t silk_lsf_min_spacing_wb[] = { + 100, 3, 40, 3, 3, 3, 5, 14, 14, 10, 11, 3, 8, 9, 7, 3, 347 +}; + +static const uint8_t silk_lsf_ordering_nbmb[] = { + 0, 9, 6, 3, 4, 5, 8, 1, 2, 7 +}; + +static const uint8_t silk_lsf_ordering_wb[] = { + 0, 15, 8, 7, 4, 11, 12, 3, 2, 13, 10, 5, 6, 9, 14, 1 +}; + +static const int16_t silk_cosine[] = { /* (0.12) */ + 4096, 4095, 4091, 4085, + 4076, 4065, 4052, 4036, + 4017, 3997, 3973, 3948, + 3920, 3889, 3857, 3822, + 3784, 3745, 3703, 3659, + 3613, 3564, 3513, 3461, + 3406, 3349, 3290, 3229, + 3166, 3102, 3035, 2967, + 2896, 2824, 2751, 2676, + 2599, 2520, 2440, 2359, + 2276, 2191, 2106, 2019, + 1931, 1842, 1751, 1660, + 1568, 1474, 1380, 1285, + 1189, 1093, 995, 897, + 799, 700, 601, 501, + 401, 301, 201, 101, + 0, -101, -201, -301, + -401, -501, -601, -700, + -799, -897, -995, -1093, + -1189, -1285, -1380, -1474, + -1568, -1660, -1751, -1842, + -1931, -2019, -2106, -2191, + -2276, -2359, -2440, -2520, + -2599, -2676, -2751, -2824, + -2896, -2967, -3035, -3102, + -3166, -3229, -3290, -3349, + -3406, -3461, -3513, -3564, + -3613, -3659, -3703, -3745, + -3784, -3822, -3857, -3889, + -3920, -3948, -3973, -3997, + -4017, -4036, -4052, -4065, + -4076, -4085, -4091, -4095, + -4096 +}; + +static const uint16_t silk_pitch_scale[] = { 4, 6, 8}; + +static const uint16_t silk_pitch_min_lag[] = { 16, 24, 32}; + +static const uint16_t silk_pitch_max_lag[] = {144, 216, 288}; + +static const int8_t silk_pitch_offset_nb10ms[3][2] = { + { 0, 0}, + { 1, 0}, + { 0, 1} +}; + +static const int8_t silk_pitch_offset_nb20ms[11][4] = { + { 0, 0, 0, 0}, + { 2, 1, 0, -1}, + {-1, 0, 1, 2}, + {-1, 0, 0, 1}, + {-1, 0, 0, 0}, + { 0, 0, 0, 1}, + { 0, 0, 1, 1}, + { 1, 1, 0, 0}, + { 1, 0, 0, 0}, + { 0, 0, 0, -1}, + { 1, 0, 0, -1} +}; + +static const int8_t silk_pitch_offset_mbwb10ms[12][2] = { + { 0, 0}, + { 0, 1}, + { 1, 0}, + {-1, 1}, + { 1, -1}, + {-1, 2}, + { 2, -1}, + {-2, 2}, + { 2, -2}, + {-2, 3}, + { 3, -2}, + {-3, 3} +}; + +static const int8_t silk_pitch_offset_mbwb20ms[34][4] = { + { 0, 0, 0, 0}, + { 0, 0, 1, 1}, + { 1, 1, 0, 0}, + {-1, 0, 0, 0}, + { 0, 0, 0, 1}, + { 1, 0, 0, 0}, + {-1, 0, 0, 1}, + { 0, 0, 0, -1}, + {-1, 0, 1, 2}, + { 1, 0, 0, -1}, + {-2, -1, 1, 2}, + { 2, 1, 0, -1}, + {-2, 0, 0, 2}, + {-2, 0, 1, 3}, + { 2, 1, -1, -2}, + {-3, -1, 1, 3}, + { 2, 0, 0, -2}, + { 3, 1, 0, -2}, + {-3, -1, 2, 4}, + {-4, -1, 1, 4}, + { 3, 1, -1, -3}, + {-4, -1, 2, 5}, + { 4, 2, -1, -3}, + { 4, 1, -1, -4}, + {-5, -1, 2, 6}, + { 5, 2, -1, -4}, + {-6, -2, 2, 6}, + {-5, -2, 2, 5}, + { 6, 2, -1, -5}, + {-7, -2, 3, 8}, + { 6, 2, -2, -6}, + { 5, 2, -2, -5}, + { 8, 3, -2, -7}, + {-9, -3, 3, 9} +}; + +static const int8_t silk_ltp_filter0_taps[8][5] = { + { 4, 6, 24, 7, 5}, + { 0, 0, 2, 0, 0}, + { 12, 28, 41, 13, -4}, + { -9, 15, 42, 25, 14}, + { 1, -2, 62, 41, -9}, + {-10, 37, 65, -4, 3}, + { -6, 4, 66, 7, -8}, + { 16, 14, 38, -3, 33} +}; + +static const int8_t silk_ltp_filter1_taps[16][5] = { + { 13, 22, 39, 23, 12}, + { -1, 36, 64, 27, -6}, + { -7, 10, 55, 43, 17}, + { 1, 1, 8, 1, 1}, + { 6, -11, 74, 53, -9}, + {-12, 55, 76, -12, 8}, + { -3, 3, 93, 27, -4}, + { 26, 39, 59, 3, -8}, + { 2, 0, 77, 11, 9}, + { -8, 22, 44, -6, 7}, + { 40, 9, 26, 3, 9}, + { -7, 20, 101, -7, 4}, + { 3, -8, 42, 26, 0}, + {-15, 33, 68, 2, 23}, + { -2, 55, 46, -2, 15}, + { 3, -1, 21, 16, 41} +}; + +static const int8_t silk_ltp_filter2_taps[32][5] = { + { -6, 27, 61, 39, 5}, + {-11, 42, 88, 4, 1}, + { -2, 60, 65, 6, -4}, + { -1, -5, 73, 56, 1}, + { -9, 19, 94, 29, -9}, + { 0, 12, 99, 6, 4}, + { 8, -19, 102, 46, -13}, + { 3, 2, 13, 3, 2}, + { 9, -21, 84, 72, -18}, + {-11, 46, 104, -22, 8}, + { 18, 38, 48, 23, 0}, + {-16, 70, 83, -21, 11}, + { 5, -11, 117, 22, -8}, + { -6, 23, 117, -12, 3}, + { 3, -8, 95, 28, 4}, + {-10, 15, 77, 60, -15}, + { -1, 4, 124, 2, -4}, + { 3, 38, 84, 24, -25}, + { 2, 13, 42, 13, 31}, + { 21, -4, 56, 46, -1}, + { -1, 35, 79, -13, 19}, + { -7, 65, 88, -9, -14}, + { 20, 4, 81, 49, -29}, + { 20, 0, 75, 3, -17}, + { 5, -9, 44, 92, -8}, + { 1, -3, 22, 69, 31}, + { -6, 95, 41, -12, 5}, + { 39, 67, 16, -4, 1}, + { 0, -6, 120, 55, -36}, + {-13, 44, 122, 4, -24}, + { 81, 5, 11, 3, 7}, + { 2, 0, 9, 10, 88} +}; + +static const uint16_t silk_ltp_scale_factor[] = {15565, 12288, 8192}; + +static const uint8_t silk_shell_blocks[3][2] = { + { 5, 10}, // NB + { 8, 15}, // MB + {10, 20} // WB +}; + +static const uint8_t silk_quant_offset[2][2] = { /* (0.23) */ + {25, 60}, // Inactive or Unvoiced + { 8, 25} // Voiced +}; + +static const int silk_stereo_interp_len[3] = { + 64, 96, 128 +}; + +static inline void silk_stabilize_lsf(int16_t nlsf[16], int order, const uint16_t min_delta[17]) +{ + int pass, i; + for (pass = 0; pass < 20; pass++) { + int k, min_diff = 0; + for (i = 0; i < order+1; i++) { + int low = i != 0 ? nlsf[i-1] : 0; + int high = i != order ? nlsf[i] : 32768; + int diff = (high - low) - (min_delta[i]); + + if (diff < min_diff) { + min_diff = diff; + k = i; + + if (pass == 20) + break; + } + } + if (min_diff == 0) /* no issues; stabilized */ + return; + + /* wiggle one or two LSFs */ + if (k == 0) { + /* repel away from lower bound */ + nlsf[0] = min_delta[0]; + } else if (k == order) { + /* repel away from higher bound */ + nlsf[order-1] = 32768 - min_delta[order]; + } else { + /* repel away from current position */ + int min_center = 0, max_center = 32768, center_val; + + /* lower extent */ + for (i = 0; i < k; i++) + min_center += min_delta[i]; + min_center += min_delta[k] >> 1; + + /* upper extent */ + for (i = order; i > k; i--) + max_center -= min_delta[k]; + max_center -= min_delta[k] >> 1; + + /* move apart */ + center_val = nlsf[k - 1] + nlsf[k]; + center_val = (center_val >> 1) + (center_val & 1); // rounded divide by 2 + center_val = FFMIN(max_center, FFMAX(min_center, center_val)); + + nlsf[k - 1] = center_val - (min_delta[k] >> 1); + nlsf[k] = nlsf[k - 1] + min_delta[k]; + } + } + + /* resort to the fall-back method, the standard method for LSF stabilization */ + + /* sort; as the LSFs should be nearly sorted, use insertion sort */ + for (i = 1; i < order; i++) { + int j, value = nlsf[i]; + for (j = i - 1; j >= 0 && nlsf[j] > value; j--) + nlsf[j + 1] = nlsf[j]; + nlsf[j + 1] = value; + } + + /* push forwards to increase distance */ + if (nlsf[0] < min_delta[0]) + nlsf[0] = min_delta[0]; + for (i = 1; i < order; i++) + if (nlsf[i] < nlsf[i - 1] + min_delta[i]) + nlsf[i] = nlsf[i - 1] + min_delta[i]; + + /* push backwards to increase distance */ + if (nlsf[order-1] > 32768 - min_delta[order]) + nlsf[order-1] = 32768 - min_delta[order]; + for (i = order-2; i >= 0; i--) + if (nlsf[i] > nlsf[i + 1] - min_delta[i+1]) + nlsf[i] = nlsf[i + 1] - min_delta[i+1]; + + return; +} + +static inline int silk_is_lpc_stable(const int16_t lpc[16], int order) +{ + int k, j, DC_resp = 0; + int32_t lpc32[2][16]; // Q24 + int totalinvgain = 1 << 30; // 1.0 in Q30 + int32_t *row = lpc32[0], *prevrow; + + /* initialize the first row for the Levinson recursion */ + for (k = 0; k < order; k++) { + DC_resp += lpc[k]; + row[k] = lpc[k] * 4096; + } + + if (DC_resp >= 4096) + return 0; + + /* check if prediction gain pushes any coefficients too far */ + for (k = order - 1; 1; k--) { + int rc; // Q31; reflection coefficient + int gaindiv; // Q30; inverse of the gain (the divisor) + int gain; // gain for this reflection coefficient + int fbits; // fractional bits used for the gain + int error; // Q29; estimate of the error of our partial estimate of 1/gaindiv + + if (FFABS(row[k]) > 16773022) + return 0; + + rc = -(row[k] * 128); + gaindiv = (1 << 30) - MULH(rc, rc); + + totalinvgain = MULH(totalinvgain, gaindiv) << 2; + if (k == 0) + return (totalinvgain >= 107374); + + /* approximate 1.0/gaindiv */ + fbits = opus_ilog(gaindiv); + gain = ((1 << 29) - 1) / (gaindiv >> (fbits + 1 - 16)); // Q + error = (1 << 29) - MULL(gaindiv << (15 + 16 - fbits), gain, 16); + gain = ((gain << 16) + (error * gain >> 13)); + + /* switch to the next row of the LPC coefficients */ + prevrow = row; + row = lpc32[k & 1]; + + for (j = 0; j < k; j++) { + int x = prevrow[j] - ROUND_MULL(prevrow[k - j - 1], rc, 31); + row[j] = ROUND_MULL(x, gain, fbits); + } + } +} + +static void silk_lsp2poly(const int32_t lsp[16], int32_t pol[16], int half_order) +{ + int i, j; + + pol[0] = 65536; // 1.0 in Q16 + pol[1] = -lsp[0]; + + for (i = 1; i < half_order; i++) { + pol[i + 1] = pol[i - 1] * 2 - ROUND_MULL(lsp[2 * i], pol[i], 16); + for (j = i; j > 1; j--) + pol[j] += pol[j - 2] - ROUND_MULL(lsp[2 * i], pol[j - 1], 16); + + pol[1] -= lsp[2 * i]; + } +} + +static void silk_lsf2lpc(const int16_t nlsf[16], float lpcf[16], int order) +{ + int i, k; + int32_t lsp[16]; // Q17; 2*cos(LSF) + int32_t p[9], q[9]; // Q16 + int32_t lpc32[16]; // Q17 + int16_t lpc[16]; // Q12 + + /* convert the LSFs to LSPs, i.e. 2*cos(LSF) */ + for (k = 0; k < order; k++) { + int index = nlsf[k] >> 8; + int offset = nlsf[k] & 255; + int k2 = (order == 10) ? silk_lsf_ordering_nbmb[k] : silk_lsf_ordering_wb[k]; + + /* interpolate and round */ + lsp[k2] = silk_cosine[index] * 256; + lsp[k2] += (silk_cosine[index + 1] - silk_cosine[index]) * offset; + lsp[k2] = (lsp[k2] + 4) >> 3; + } + + silk_lsp2poly(lsp , p, order >> 1); + silk_lsp2poly(lsp + 1, q, order >> 1); + + /* reconstruct A(z) */ + for (k = 0; k < order>>1; k++) { + lpc32[k] = -p[k + 1] - p[k] - q[k + 1] + q[k]; + lpc32[order-k-1] = -p[k + 1] - p[k] + q[k + 1] - q[k]; + } + + /* limit the range of the LPC coefficients to each fit within an int16_t */ + for (i = 0; i < 10; i++) { + int j; + unsigned int maxabs = 0; + for (j = 0, k = 0; j < order; j++) { + unsigned int x = FFABS(lpc32[k]); + if (x > maxabs) { + maxabs = x; // Q17 + k = j; + } + } + + maxabs = (maxabs + 16) >> 5; // convert to Q12 + + if (maxabs > 32767) { + /* perform bandwidth expansion */ + unsigned int chirp, chirp_base; // Q16 + maxabs = FFMIN(maxabs, 163838); // anything above this overflows chirp's numerator + chirp_base = chirp = 65470 - ((maxabs - 32767) << 14) / ((maxabs * (k+1)) >> 2); + + for (k = 0; k < order; k++) { + lpc32[k] = ROUND_MULL(lpc32[k], chirp, 16); + chirp = (chirp_base * chirp + 32768) >> 16; + } + } else break; + } + + if (i == 10) { + /* time's up: just clamp */ + for (k = 0; k < order; k++) { + int x = (lpc32[k] + 16) >> 5; + lpc[k] = av_clip_int16(x); + lpc32[k] = lpc[k] << 5; // shortcut mandated by the spec; drops lower 5 bits + } + } else { + for (k = 0; k < order; k++) + lpc[k] = (lpc32[k] + 16) >> 5; + } + + /* if the prediction gain causes the LPC filter to become unstable, + apply further bandwidth expansion on the Q17 coefficients */ + for (i = 1; i <= 16 && !silk_is_lpc_stable(lpc, order); i++) { + unsigned int chirp, chirp_base; + chirp_base = chirp = 65536 - (1 << i); + + for (k = 0; k < order; k++) { + lpc32[k] = ROUND_MULL(lpc32[k], chirp, 16); + lpc[k] = (lpc32[k] + 16) >> 5; + chirp = (chirp_base * chirp + 32768) >> 16; + } + } + + for (i = 0; i < order; i++) + lpcf[i] = lpc[i] / 4096.0f; +} + +static inline void silk_decode_lpc(SilkContext *s, SilkFrame *frame, + OpusRangeCoder *rc, + float lpc_leadin[16], float lpc[16], + int *lpc_order, int *has_lpc_leadin, int voiced) +{ + int i; + int order; // order of the LP polynomial; 10 for NB/MB and 16 for WB + int8_t lsf_i1, lsf_i2[16]; // stage-1 and stage-2 codebook indices + int16_t lsf_res[16]; // residual as a Q10 value + int16_t nlsf[16]; // Q15 + + *lpc_order = order = s->wb ? 16 : 10; + + /* obtain LSF stage-1 and stage-2 indices */ + lsf_i1 = opus_rc_getsymbol(rc, silk_model_lsf_s1[s->wb][voiced]); + for (i = 0; i < order; i++) { + int index = s->wb ? silk_lsf_s2_model_sel_wb [lsf_i1][i] : + silk_lsf_s2_model_sel_nbmb[lsf_i1][i]; + lsf_i2[i] = opus_rc_getsymbol(rc, silk_model_lsf_s2[index]) - 4; + if (lsf_i2[i] == -4) + lsf_i2[i] -= opus_rc_getsymbol(rc, silk_model_lsf_s2_ext); + else if (lsf_i2[i] == 4) + lsf_i2[i] += opus_rc_getsymbol(rc, silk_model_lsf_s2_ext); + } + + /* reverse the backwards-prediction step */ + for (i = order - 1; i >= 0; i--) { + int qstep = s->wb ? 9830 : 11796; + + lsf_res[i] = lsf_i2[i] * 1024; + if (lsf_i2[i] < 0) lsf_res[i] += 102; + else if (lsf_i2[i] > 0) lsf_res[i] -= 102; + lsf_res[i] = (lsf_res[i] * qstep) >> 16; + + if (i + 1 < order) { + int weight = s->wb ? silk_lsf_pred_weights_wb [silk_lsf_weight_sel_wb [lsf_i1][i]][i] : + silk_lsf_pred_weights_nbmb[silk_lsf_weight_sel_nbmb[lsf_i1][i]][i]; + lsf_res[i] += (lsf_res[i+1] * weight) >> 8; + } + } + + /* reconstruct the NLSF coefficients from the supplied indices */ + for (i = 0; i < order; i++) { + const uint8_t * codebook = s->wb ? silk_lsf_codebook_wb [lsf_i1] : + silk_lsf_codebook_nbmb[lsf_i1]; + int cur, prev, next, weight_sq, weight, ipart, fpart, y, value; + + /* find the weight of the residual */ + /* TODO: precompute */ + cur = codebook[i]; + prev = i ? codebook[i - 1] : 0; + next = i + 1 < order ? codebook[i + 1] : 256; + weight_sq = (1024 / (cur - prev) + 1024 / (next - cur)) << 16; + + /* approximate square-root with mandated fixed-point arithmetic */ + ipart = opus_ilog(weight_sq); + fpart = (weight_sq >> (ipart-8)) & 127; + y = ((ipart & 1) ? 32768 : 46214) >> ((32 - ipart)>>1); + weight = y + ((213 * fpart * y) >> 16); + + value = cur * 128 + (lsf_res[i] * 16384) / weight; + nlsf[i] = av_clip(value, 0, 32767); + } + + /* stabilize the NLSF coefficients */ + silk_stabilize_lsf(nlsf, order, s->wb ? silk_lsf_min_spacing_wb : + silk_lsf_min_spacing_nbmb); + + /* produce an interpolation for the first 2 subframes, */ + /* and then convert both sets of NLSFs to LPC coefficients */ + *has_lpc_leadin = 0; + if (s->subframes == 4) { + int offset = opus_rc_getsymbol(rc, silk_model_lsf_interpolation_offset); + if (offset != 4 && frame->coded) { + *has_lpc_leadin = 1; + if (offset != 0) { + int16_t nlsf_leadin[16]; + for (i = 0; i < order; i++) + nlsf_leadin[i] = frame->nlsf[i] + + ((nlsf[i] - frame->nlsf[i]) * offset >> 2); + silk_lsf2lpc(nlsf_leadin, lpc_leadin, order); + } else /* avoid re-computation for a (roughly) 1-in-4 occurrence */ + memcpy(lpc_leadin, frame->lpc, 16 * sizeof(float)); + } else + offset = 4; + s->nlsf_interp_factor = offset; + + silk_lsf2lpc(nlsf, lpc, order); + } else { + s->nlsf_interp_factor = 4; + silk_lsf2lpc(nlsf, lpc, order); + } + + memcpy(frame->nlsf, nlsf, order * sizeof(nlsf[0])); + memcpy(frame->lpc, lpc, order * sizeof(lpc[0])); +} + +static inline void silk_count_children(OpusRangeCoder *rc, int model, int32_t total, + int32_t child[2]) +{ + if (total != 0) { + child[0] = opus_rc_getsymbol(rc, + silk_model_pulse_location[model] + (((total - 1 + 5) * (total - 1)) >> 1)); + child[1] = total - child[0]; + } else { + child[0] = 0; + child[1] = 0; + } +} + +static inline void silk_decode_excitation(SilkContext *s, OpusRangeCoder *rc, + float* excitationf, + int qoffset_high, int active, int voiced) +{ + int i; + uint32_t seed; + int shellblocks; + int ratelevel; + uint8_t pulsecount[20]; // total pulses in each shell block + uint8_t lsbcount[20] = {0}; // raw lsbits defined for each pulse in each shell block + int32_t excitation[320]; // Q23 + + /* excitation parameters */ + seed = opus_rc_getsymbol(rc, silk_model_lcg_seed); + shellblocks = silk_shell_blocks[s->bandwidth][s->subframes >> 2]; + ratelevel = opus_rc_getsymbol(rc, silk_model_exc_rate[voiced]); + + for (i = 0; i < shellblocks; i++) { + pulsecount[i] = opus_rc_getsymbol(rc, silk_model_pulse_count[ratelevel]); + if (pulsecount[i] == 17) { + while (pulsecount[i] == 17 && ++lsbcount[i] != 10) + pulsecount[i] = opus_rc_getsymbol(rc, silk_model_pulse_count[9]); + if (lsbcount[i] == 10) + pulsecount[i] = opus_rc_getsymbol(rc, silk_model_pulse_count[10]); + } + } + + /* decode pulse locations using PVQ */ + for (i = 0; i < shellblocks; i++) { + if (pulsecount[i] != 0) { + int a, b, c, d; + int32_t * location = excitation + 16*i; + int32_t branch[4][2]; + branch[0][0] = pulsecount[i]; + + /* unrolled tail recursion */ + for (a = 0; a < 1; a++) { + silk_count_children(rc, 0, branch[0][a], branch[1]); + for (b = 0; b < 2; b++) { + silk_count_children(rc, 1, branch[1][b], branch[2]); + for (c = 0; c < 2; c++) { + silk_count_children(rc, 2, branch[2][c], branch[3]); + for (d = 0; d < 2; d++) { + silk_count_children(rc, 3, branch[3][d], location); + location += 2; + } + } + } + } + } else + memset(excitation + 16*i, 0, 16*sizeof(int32_t)); + } + + /* decode least significant bits */ + for (i = 0; i < shellblocks << 4; i++) { + int bit; + for (bit = 0; bit < lsbcount[i >> 4]; bit++) + excitation[i] = (excitation[i] << 1) | + opus_rc_getsymbol(rc, silk_model_excitation_lsb); + } + + /* decode signs */ + for (i = 0; i < shellblocks << 4; i++) { + if (excitation[i] != 0) { + int sign = opus_rc_getsymbol(rc, silk_model_excitation_sign[active + + voiced][qoffset_high][FFMIN(pulsecount[i >> 4], 6)]); + if (sign == 0) + excitation[i] *= -1; + } + } + + /* assemble the excitation */ + for (i = 0; i < shellblocks << 4; i++) { + int value = excitation[i]; + excitation[i] = value * 256 | silk_quant_offset[voiced][qoffset_high]; + if (value < 0) excitation[i] += 20; + else if (value > 0) excitation[i] -= 20; + + /* invert samples pseudorandomly */ + seed = 196314165 * seed + 907633515; + if (seed & 0x80000000) + excitation[i] *= -1; + seed += value; + + excitationf[i] = excitation[i] / 8388608.0f; + } +} + +/** Maximum residual history according to 4.2.7.6.1 */ +#define SILK_MAX_LAG (288 + LTP_ORDER / 2) + +/** Order of the LTP filter */ +#define LTP_ORDER 5 + +static void silk_decode_frame(SilkContext *s, OpusRangeCoder *rc, + int frame_num, int channel, int coded_channels, int active, int active1) +{ + /* per frame */ + int voiced; // combines with active to indicate inactive, active, or active+voiced + int qoffset_high; + int order; // order of the LPC coefficients + float lpc_leadin[16], lpc_body[16], residual[SILK_MAX_LAG + SILK_HISTORY]; + int has_lpc_leadin; + float ltpscale; + + /* per subframe */ + struct { + float gain; + int pitchlag; + float ltptaps[5]; + } sf[4]; + + SilkFrame * const frame = s->frame + channel; + + int i; + + /* obtain stereo weights */ + if (coded_channels == 2 && channel == 0) { + int n, wi[2], ws[2], w[2]; + n = opus_rc_getsymbol(rc, silk_model_stereo_s1); + wi[0] = opus_rc_getsymbol(rc, silk_model_stereo_s2) + 3 * (n / 5); + ws[0] = opus_rc_getsymbol(rc, silk_model_stereo_s3); + wi[1] = opus_rc_getsymbol(rc, silk_model_stereo_s2) + 3 * (n % 5); + ws[1] = opus_rc_getsymbol(rc, silk_model_stereo_s3); + + for (i = 0; i < 2; i++) + w[i] = silk_stereo_weights[wi[i]] + + (((silk_stereo_weights[wi[i] + 1] - silk_stereo_weights[wi[i]]) * 6554) >> 16) + * (ws[i]*2 + 1); + + s->stereo_weights[0] = (w[0] - w[1]) / 8192.0; + s->stereo_weights[1] = w[1] / 8192.0; + + /* and read the mid-only flag */ + s->midonly = active1 ? 0 : opus_rc_getsymbol(rc, silk_model_mid_only); + } + + /* obtain frame type */ + if (!active) { + qoffset_high = opus_rc_getsymbol(rc, silk_model_frame_type_inactive); + voiced = 0; + } else { + int type = opus_rc_getsymbol(rc, silk_model_frame_type_active); + qoffset_high = type & 1; + voiced = type >> 1; + } + + /* obtain subframe quantization gains */ + for (i = 0; i < s->subframes; i++) { + int log_gain; //Q7 + int ipart, fpart, lingain; + + if (i == 0 && (frame_num == 0 || !frame->coded)) { + /* gain is coded absolute */ + int x = opus_rc_getsymbol(rc, silk_model_gain_highbits[active + voiced]); + log_gain = (x<<3) | opus_rc_getsymbol(rc, silk_model_gain_lowbits); + + if (frame->coded) + log_gain = FFMAX(log_gain, frame->log_gain - 16); + } else { + /* gain is coded relative */ + int delta_gain = opus_rc_getsymbol(rc, silk_model_gain_delta); + log_gain = av_clip(FFMAX((delta_gain<<1) - 16, + frame->log_gain + delta_gain - 4), 0, 63); + } + + frame->log_gain = log_gain; + + /* approximate 2**(x/128) with a Q7 (i.e. non-integer) input */ + log_gain = (log_gain * 0x1D1C71 >> 16) + 2090; + ipart = log_gain >> 7; + fpart = log_gain & 127; + lingain = (1 << ipart) + ((-174 * fpart * (128-fpart) >>16) + fpart) * ((1<> 7); + sf[i].gain = lingain / 65536.0f; + } + + /* obtain LPC filter coefficients */ + silk_decode_lpc(s, frame, rc, lpc_leadin, lpc_body, &order, &has_lpc_leadin, voiced); + + /* obtain pitch lags, if this is a voiced frame */ + if (voiced) { + int lag_absolute = (!frame_num || !frame->prev_voiced); + int primarylag; // primary pitch lag for the entire SILK frame + int ltpfilter; + const int8_t * offsets; + + if (!lag_absolute) { + int delta = opus_rc_getsymbol(rc, silk_model_pitch_delta); + if (delta) + primarylag = frame->primarylag + delta - 9; + else + lag_absolute = 1; + } + + if (lag_absolute) { + /* primary lag is coded absolute */ + int highbits, lowbits; + const uint16_t *model[] = { + silk_model_pitch_lowbits_nb, silk_model_pitch_lowbits_mb, + silk_model_pitch_lowbits_wb + }; + highbits = opus_rc_getsymbol(rc, silk_model_pitch_highbits); + lowbits = opus_rc_getsymbol(rc, model[s->bandwidth]); + + primarylag = silk_pitch_min_lag[s->bandwidth] + + highbits*silk_pitch_scale[s->bandwidth] + lowbits; + } + frame->primarylag = primarylag; + + if (s->subframes == 2) + offsets = (s->bandwidth == OPUS_BANDWIDTH_NARROWBAND) + ? silk_pitch_offset_nb10ms[opus_rc_getsymbol(rc, + silk_model_pitch_contour_nb10ms)] + : silk_pitch_offset_mbwb10ms[opus_rc_getsymbol(rc, + silk_model_pitch_contour_mbwb10ms)]; + else + offsets = (s->bandwidth == OPUS_BANDWIDTH_NARROWBAND) + ? silk_pitch_offset_nb20ms[opus_rc_getsymbol(rc, + silk_model_pitch_contour_nb20ms)] + : silk_pitch_offset_mbwb20ms[opus_rc_getsymbol(rc, + silk_model_pitch_contour_mbwb20ms)]; + + for (i = 0; i < s->subframes; i++) + sf[i].pitchlag = av_clip(primarylag + offsets[i], + silk_pitch_min_lag[s->bandwidth], + silk_pitch_max_lag[s->bandwidth]); + + /* obtain LTP filter coefficients */ + ltpfilter = opus_rc_getsymbol(rc, silk_model_ltp_filter); + for (i = 0; i < s->subframes; i++) { + int index, j; + const uint16_t *filter_sel[] = { + silk_model_ltp_filter0_sel, silk_model_ltp_filter1_sel, + silk_model_ltp_filter2_sel + }; + const int8_t (*filter_taps[])[5] = { + silk_ltp_filter0_taps, silk_ltp_filter1_taps, silk_ltp_filter2_taps + }; + index = opus_rc_getsymbol(rc, filter_sel[ltpfilter]); + for (j = 0; j < 5; j++) + sf[i].ltptaps[j] = filter_taps[ltpfilter][index][j] / 128.0f; + } + } + + /* obtain LTP scale factor */ + if (voiced && frame_num == 0) + ltpscale = silk_ltp_scale_factor[opus_rc_getsymbol(rc, + silk_model_ltp_scale_index)] / 16384.0f; + else ltpscale = 15565.0f/16384.0f; + + /* generate the excitation signal for the entire frame */ + silk_decode_excitation(s, rc, residual + SILK_MAX_LAG, qoffset_high, + active, voiced); + + /* skip synthesising the side channel if we want mono-only */ + if (s->output_channels == channel) + return; + + /* generate the output signal */ + for (i = 0; i < s->subframes; i++) { + const float * lpc_coeff = (i < 2 && has_lpc_leadin) ? lpc_leadin : lpc_body; + float *dst = frame->output + SILK_HISTORY + i * s->sflength; + float *resptr = residual + SILK_MAX_LAG + i * s->sflength; + float *lpc = frame->lpc_history + SILK_HISTORY + i * s->sflength; + float sum; + int j, k; + + if (voiced) { + int out_end; + float scale; + + if (i < 2 || s->nlsf_interp_factor == 4) { + out_end = -i * s->sflength; + scale = ltpscale; + } else { + out_end = -(i - 2) * s->sflength; + scale = 1.0f; + } + + /* when the LPC coefficients change, a re-whitening filter is used */ + /* to produce a residual that accounts for the change */ + for (j = - sf[i].pitchlag - LTP_ORDER/2; j < out_end; j++) { + sum = dst[j]; + for (k = 0; k < order; k++) + sum -= lpc_coeff[k] * dst[j - k - 1]; + resptr[j] = av_clipf(sum, -1.0f, 1.0f) * scale / sf[i].gain; + } + + if (out_end) { + float rescale = sf[i-1].gain / sf[i].gain; + for (j = out_end; j < 0; j++) + resptr[j] *= rescale; + } + + /* LTP synthesis */ + for (j = 0; j < s->sflength; j++) { + sum = resptr[j]; + for (k = 0; k < LTP_ORDER; k++) + sum += sf[i].ltptaps[k] * resptr[j - sf[i].pitchlag + LTP_ORDER/2 - k]; + resptr[j] = sum; + } + } + + /* LPC synthesis */ + for (j = 0; j < s->sflength; j++) { + sum = resptr[j] * sf[i].gain; + for (k = 1; k <= order; k++) + sum += lpc_coeff[k - 1] * lpc[j - k]; + + lpc[j] = sum; + dst[j] = av_clipf(sum, -1.0f, 1.0f); + } + } + + frame->prev_voiced = voiced; + memmove(frame->lpc_history, frame->lpc_history + s->flength, SILK_HISTORY * sizeof(float)); + memmove(frame->output, frame->output + s->flength, SILK_HISTORY * sizeof(float)); + + frame->coded = 1; +} + +static void silk_unmix_ms(SilkContext *s, float *l, float *r) +{ + float *mid = s->frame[0].output + SILK_HISTORY - s->flength; + float *side = s->frame[1].output + SILK_HISTORY - s->flength; + float w0_prev = s->prev_stereo_weights[0]; + float w1_prev = s->prev_stereo_weights[1]; + float w0 = s->stereo_weights[0]; + float w1 = s->stereo_weights[1]; + int n1 = silk_stereo_interp_len[s->bandwidth]; + int i; + + for (i = 0; i < n1; i++) { + float interp0 = w0_prev + i * (w0 - w0_prev) / n1; + float interp1 = w1_prev + i * (w1 - w1_prev) / n1; + float p0 = 0.25 * (mid[i - 2] + 2 * mid[i - 1] + mid[i]); + + l[i] = av_clipf((1 + interp1) * mid[i - 1] + side[i - 1] + interp0 * p0, -1.0, 1.0); + r[i] = av_clipf((1 - interp1) * mid[i - 1] - side[i - 1] - interp0 * p0, -1.0, 1.0); + } + + for (; i < s->flength; i++) { + float p0 = 0.25 * (mid[i - 2] + 2 * mid[i - 1] + mid[i]); + + l[i] = av_clipf((1 + w1) * mid[i - 1] + side[i - 1] + w0 * p0, -1.0, 1.0); + r[i] = av_clipf((1 - w1) * mid[i - 1] - side[i - 1] - w0 * p0, -1.0, 1.0); + } + + memcpy(s->prev_stereo_weights, s->stereo_weights, sizeof(s->stereo_weights)); +} + +static void silk_flush_frame(SilkFrame *frame) +{ + if (!frame->coded) + return; + + memset(frame->output, 0, sizeof(frame->output)); + memset(frame->lpc_history, 0, sizeof(frame->lpc_history)); + + memset(frame->lpc, 0, sizeof(frame->lpc)); + memset(frame->nlsf, 0, sizeof(frame->nlsf)); + + frame->log_gain = 0; + + frame->primarylag = 0; + frame->prev_voiced = 0; + frame->coded = 0; +} + +int ff_silk_decode_superframe(SilkContext *s, OpusRangeCoder *rc, + float *output[2], + enum OpusBandwidth bandwidth, + int coded_channels, + int duration_ms) +{ + int active[2][6], redundancy[2]; + int nb_frames, i, j; + + if (bandwidth > OPUS_BANDWIDTH_WIDEBAND || + coded_channels > 2 || duration_ms > 60) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid parameters passed " + "to the SILK decoder.\n"); + return AVERROR(EINVAL); + } + + nb_frames = 1 + (duration_ms > 20) + (duration_ms > 40); + s->subframes = duration_ms / nb_frames / 5; // 5ms subframes + s->sflength = 20 * (bandwidth + 2); + s->flength = s->sflength * s->subframes; + s->bandwidth = bandwidth; + s->wb = bandwidth == OPUS_BANDWIDTH_WIDEBAND; + + /* make sure to flush the side channel when switching from mono to stereo */ + if (coded_channels > s->prev_coded_channels) + silk_flush_frame(&s->frame[1]); + s->prev_coded_channels = coded_channels; + + /* read the LP-layer header bits */ + for (i = 0; i < coded_channels; i++) { + for (j = 0; j < nb_frames; j++) + active[i][j] = opus_rc_p2model(rc, 1); + + redundancy[i] = opus_rc_p2model(rc, 1); + if (redundancy[i]) { + av_log(s->avctx, AV_LOG_ERROR, "LBRR frames present; this is unsupported\n"); + return AVERROR_PATCHWELCOME; + } + } + + for (i = 0; i < nb_frames; i++) { + for (j = 0; j < coded_channels && !s->midonly; j++) + silk_decode_frame(s, rc, i, j, coded_channels, active[j][i], active[1][i]); + + /* reset the side channel if it is not coded */ + if (s->midonly && s->frame[1].coded) + silk_flush_frame(&s->frame[1]); + + if (coded_channels == 1 || s->output_channels == 1) { + for (j = 0; j < s->output_channels; j++) { + memcpy(output[j] + i * s->flength, + s->frame[0].output + SILK_HISTORY - s->flength - 2, + s->flength * sizeof(float)); + } + } else { + silk_unmix_ms(s, output[0] + i * s->flength, output[1] + i * s->flength); + } + + s->midonly = 0; + } + + return nb_frames * s->flength; +} + +void ff_silk_free(SilkContext **ps) +{ + av_freep(ps); +} + +void ff_silk_flush(SilkContext *s) +{ + silk_flush_frame(&s->frame[0]); + silk_flush_frame(&s->frame[1]); + + memset(s->prev_stereo_weights, 0, sizeof(s->prev_stereo_weights)); +} + +int ff_silk_init(AVCodecContext *avctx, SilkContext **ps, int output_channels) +{ + SilkContext *s; + + if (output_channels != 1 && output_channels != 2) { + av_log(avctx, AV_LOG_ERROR, "Invalid number of output channels: %d\n", + output_channels); + return AVERROR(EINVAL); + } + + s = av_mallocz(sizeof(*s)); + if (!s) + return AVERROR(ENOMEM); + + s->avctx = avctx; + s->output_channels = output_channels; + + ff_silk_flush(s); + + *ps = s; + + return 0; +} diff --git a/libavcodec/opusdec.c b/libavcodec/opusdec.c new file mode 100644 index 0000000000..bf3a54b16b --- /dev/null +++ b/libavcodec/opusdec.c @@ -0,0 +1,674 @@ +/* + * Opus decoder + * Copyright (c) 2012 Andrew D'Addesio + * Copyright (c) 2013-2014 Mozilla Corporation + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Opus decoder + * @author Andrew D'Addesio, Anton Khirnov + * + * Codec homepage: http://opus-codec.org/ + * Specification: http://tools.ietf.org/html/rfc6716 + * Ogg Opus specification: https://tools.ietf.org/html/draft-ietf-codec-oggopus-03 + * + * Ogg-contained .opus files can be produced with opus-tools: + * http://git.xiph.org/?p=opus-tools.git + */ + +#include + +#include "libavutil/attributes.h" +#include "libavutil/audio_fifo.h" +#include "libavutil/channel_layout.h" +#include "libavutil/opt.h" + +#include "libavresample/avresample.h" + +#include "avcodec.h" +#include "celp_filters.h" +#include "fft.h" +#include "get_bits.h" +#include "internal.h" +#include "mathops.h" +#include "opus.h" + +static const uint16_t silk_frame_duration_ms[16] = { + 10, 20, 40, 60, + 10, 20, 40, 60, + 10, 20, 40, 60, + 10, 20, + 10, 20, +}; + +/* number of samples of silence to feed to the resampler + * at the beginning */ +static const int silk_resample_delay[] = { + 4, 8, 11, 11, 11 +}; + +static const uint8_t celt_band_end[] = { 13, 17, 17, 19, 21 }; + +static int get_silk_samplerate(int config) +{ + if (config < 4) + return 8000; + else if (config < 8) + return 12000; + return 16000; +} + +/** + * Range decoder + */ +static int opus_rc_init(OpusRangeCoder *rc, const uint8_t *data, int size) +{ + int ret = init_get_bits8(&rc->gb, data, size); + if (ret < 0) + return ret; + + rc->range = 128; + rc->value = 127 - get_bits(&rc->gb, 7); + rc->total_read_bits = 9; + opus_rc_normalize(rc); + + return 0; +} + +static void opus_raw_init(OpusRangeCoder *rc, const uint8_t *rightend, + unsigned int bytes) +{ + rc->rb.position = rightend; + rc->rb.bytes = bytes; + rc->rb.cachelen = 0; + rc->rb.cacheval = 0; +} + +static void opus_fade(float *out, + const float *in1, const float *in2, + const float *window, int len) +{ + int i; + for (i = 0; i < len; i++) + out[i] = in2[i] * window[i] + in1[i] * (1.0 - window[i]); +} + +static int opus_flush_resample(OpusStreamContext *s, int nb_samples) +{ + int celt_size = av_audio_fifo_size(s->celt_delay); + int ret, i; + + ret = avresample_convert(s->avr, (uint8_t**)s->out, s->out_size, nb_samples, + NULL, 0, 0); + if (ret < 0) + return ret; + else if (ret != nb_samples) { + av_log(s->avctx, AV_LOG_ERROR, "Wrong number of flushed samples: %d\n", + ret); + return AVERROR_BUG; + } + + if (celt_size) { + if (celt_size != nb_samples) { + av_log(s->avctx, AV_LOG_ERROR, "Wrong number of CELT delay samples.\n"); + return AVERROR_BUG; + } + av_audio_fifo_read(s->celt_delay, (void**)s->celt_output, nb_samples); + for (i = 0; i < s->output_channels; i++) { + s->fdsp->vector_fmac_scalar(s->out[i], + s->celt_output[i], 1.0, + nb_samples); + } + } + + if (s->redundancy_idx) { + for (i = 0; i < s->output_channels; i++) + opus_fade(s->out[i], s->out[i], + s->redundancy_output[i] + 120 + s->redundancy_idx, + ff_celt_window2 + s->redundancy_idx, 120 - s->redundancy_idx); + s->redundancy_idx = 0; + } + + s->out[0] += nb_samples; + s->out[1] += nb_samples; + s->out_size -= nb_samples * sizeof(float); + + return 0; +} + +static int opus_init_resample(OpusStreamContext *s) +{ + float delay[16] = { 0.0 }; + uint8_t *delayptr[2] = { (uint8_t*)delay, (uint8_t*)delay }; + int ret; + + av_opt_set_int(s->avr, "in_sample_rate", s->silk_samplerate, 0); + ret = avresample_open(s->avr); + if (ret < 0) { + av_log(s->avctx, AV_LOG_ERROR, "Error opening the resampler.\n"); + return ret; + } + + ret = avresample_convert(s->avr, NULL, 0, 0, delayptr, sizeof(delay), + silk_resample_delay[s->packet.bandwidth]); + if (ret < 0) { + av_log(s->avctx, AV_LOG_ERROR, + "Error feeding initial silence to the resampler.\n"); + return ret; + } + + return 0; +} + +static int opus_decode_redundancy(OpusStreamContext *s, const uint8_t *data, int size) +{ + int ret; + enum OpusBandwidth bw = s->packet.bandwidth; + + if (s->packet.mode == OPUS_MODE_SILK && + bw == OPUS_BANDWIDTH_MEDIUMBAND) + bw = OPUS_BANDWIDTH_WIDEBAND; + + ret = opus_rc_init(&s->redundancy_rc, data, size); + if (ret < 0) + goto fail; + opus_raw_init(&s->redundancy_rc, data + size, size); + + ret = ff_celt_decode_frame(s->celt, &s->redundancy_rc, + s->redundancy_output, + s->packet.stereo + 1, 240, + 0, celt_band_end[s->packet.bandwidth]); + if (ret < 0) + goto fail; + + return 0; +fail: + av_log(s->avctx, AV_LOG_ERROR, "Error decoding the redundancy frame.\n"); + return ret; +} + +static int opus_decode_frame(OpusStreamContext *s, const uint8_t *data, int size) +{ + int samples = s->packet.frame_duration; + int redundancy = 0; + int redundancy_size, redundancy_pos; + int ret, i, consumed; + int delayed_samples = s->delayed_samples; + + ret = opus_rc_init(&s->rc, data, size); + if (ret < 0) + return ret; + + /* decode the silk frame */ + if (s->packet.mode == OPUS_MODE_SILK || s->packet.mode == OPUS_MODE_HYBRID) { + if (!avresample_is_open(s->avr)) { + ret = opus_init_resample(s); + if (ret < 0) + return ret; + } + + samples = ff_silk_decode_superframe(s->silk, &s->rc, s->silk_output, + FFMIN(s->packet.bandwidth, OPUS_BANDWIDTH_WIDEBAND), + s->packet.stereo + 1, + silk_frame_duration_ms[s->packet.config]); + if (samples < 0) { + av_log(s->avctx, AV_LOG_ERROR, "Error decoding a SILK frame.\n"); + return samples; + } + + samples = avresample_convert(s->avr, (uint8_t**)s->out, s->out_size, + s->packet.frame_duration, + (uint8_t**)s->silk_output, + sizeof(s->silk_buf[0]), + samples); + if (samples < 0) { + av_log(s->avctx, AV_LOG_ERROR, "Error resampling SILK data.\n"); + return samples; + } + s->delayed_samples += s->packet.frame_duration - samples; + } else + ff_silk_flush(s->silk); + + // decode redundancy information + consumed = opus_rc_tell(&s->rc); + if (s->packet.mode == OPUS_MODE_HYBRID && consumed + 37 <= size * 8) + redundancy = opus_rc_p2model(&s->rc, 12); + else if (s->packet.mode == OPUS_MODE_SILK && consumed + 17 <= size * 8) + redundancy = 1; + + if (redundancy) { + redundancy_pos = opus_rc_p2model(&s->rc, 1); + + if (s->packet.mode == OPUS_MODE_HYBRID) + redundancy_size = opus_rc_unimodel(&s->rc, 256) + 2; + else + redundancy_size = size - (consumed + 7) / 8; + size -= redundancy_size; + if (size < 0) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid redundancy frame size.\n"); + return AVERROR_INVALIDDATA; + } + + if (redundancy_pos) { + ret = opus_decode_redundancy(s, data + size, redundancy_size); + if (ret < 0) + return ret; + ff_celt_flush(s->celt); + } + } + + /* decode the CELT frame */ + if (s->packet.mode == OPUS_MODE_CELT || s->packet.mode == OPUS_MODE_HYBRID) { + float *out_tmp[2] = { s->out[0], s->out[1] }; + float **dst = (s->packet.mode == OPUS_MODE_CELT) ? + out_tmp : s->celt_output; + int celt_output_samples = samples; + int delay_samples = av_audio_fifo_size(s->celt_delay); + + if (delay_samples) { + if (s->packet.mode == OPUS_MODE_HYBRID) { + av_audio_fifo_read(s->celt_delay, (void**)s->celt_output, delay_samples); + + for (i = 0; i < s->output_channels; i++) { + s->fdsp->vector_fmac_scalar(out_tmp[i], s->celt_output[i], 1.0, + delay_samples); + out_tmp[i] += delay_samples; + } + celt_output_samples -= delay_samples; + } else { + av_log(s->avctx, AV_LOG_WARNING, + "Spurious CELT delay samples present.\n"); + av_audio_fifo_drain(s->celt_delay, delay_samples); + if (s->avctx->err_recognition & AV_EF_EXPLODE) + return AVERROR_BUG; + } + } + + opus_raw_init(&s->rc, data + size, size); + + ret = ff_celt_decode_frame(s->celt, &s->rc, dst, + s->packet.stereo + 1, + s->packet.frame_duration, + (s->packet.mode == OPUS_MODE_HYBRID) ? 17 : 0, + celt_band_end[s->packet.bandwidth]); + if (ret < 0) + return ret; + + if (s->packet.mode == OPUS_MODE_HYBRID) { + int celt_delay = s->packet.frame_duration - celt_output_samples; + void *delaybuf[2] = { s->celt_output[0] + celt_output_samples, + s->celt_output[1] + celt_output_samples }; + + for (i = 0; i < s->output_channels; i++) { + s->fdsp->vector_fmac_scalar(out_tmp[i], + s->celt_output[i], 1.0, + celt_output_samples); + } + + ret = av_audio_fifo_write(s->celt_delay, delaybuf, celt_delay); + if (ret < 0) + return ret; + } + } else + ff_celt_flush(s->celt); + + if (s->redundancy_idx) { + for (i = 0; i < s->output_channels; i++) + opus_fade(s->out[i], s->out[i], + s->redundancy_output[i] + 120 + s->redundancy_idx, + ff_celt_window2 + s->redundancy_idx, 120 - s->redundancy_idx); + s->redundancy_idx = 0; + } + if (redundancy) { + if (!redundancy_pos) { + ff_celt_flush(s->celt); + ret = opus_decode_redundancy(s, data + size, redundancy_size); + if (ret < 0) + return ret; + + for (i = 0; i < s->output_channels; i++) { + opus_fade(s->out[i] + samples - 120 + delayed_samples, + s->out[i] + samples - 120 + delayed_samples, + s->redundancy_output[i] + 120, + ff_celt_window2, 120 - delayed_samples); + if (delayed_samples) + s->redundancy_idx = 120 - delayed_samples; + } + } else { + for (i = 0; i < s->output_channels; i++) { + memcpy(s->out[i] + delayed_samples, s->redundancy_output[i], 120 * sizeof(float)); + opus_fade(s->out[i] + 120 + delayed_samples, + s->redundancy_output[i] + 120, + s->out[i] + 120 + delayed_samples, + ff_celt_window2, 120); + } + } + } + + return samples; +} + +static int opus_decode_subpacket(OpusStreamContext *s, + const uint8_t *buf, int buf_size, + int nb_samples) +{ + int output_samples = 0; + int flush_needed = 0; + int i, j, ret; + + /* check if we need to flush the resampler */ + if (avresample_is_open(s->avr)) { + if (buf) { + int64_t cur_samplerate; + av_opt_get_int(s->avr, "in_sample_rate", 0, &cur_samplerate); + flush_needed = (s->packet.mode == OPUS_MODE_CELT) || (cur_samplerate != s->silk_samplerate); + } else { + flush_needed = !!s->delayed_samples; + } + } + + if (!buf && !flush_needed) + return 0; + + /* use dummy output buffers if the channel is not mapped to anything */ + if (!s->out[0] || + (s->output_channels == 2 && !s->out[1])) { + av_fast_malloc(&s->out_dummy, &s->out_dummy_allocated_size, s->out_size); + if (!s->out_dummy) + return AVERROR(ENOMEM); + if (!s->out[0]) + s->out[0] = s->out_dummy; + if (!s->out[1]) + s->out[1] = s->out_dummy; + } + + /* flush the resampler if necessary */ + if (flush_needed) { + ret = opus_flush_resample(s, s->delayed_samples); + if (ret < 0) { + av_log(s->avctx, AV_LOG_ERROR, "Error flushing the resampler.\n"); + return ret; + } + avresample_close(s->avr); + output_samples += s->delayed_samples; + s->delayed_samples = 0; + + if (!buf) + goto finish; + } + + /* decode all the frames in the packet */ + for (i = 0; i < s->packet.frame_count; i++) { + int size = s->packet.frame_size[i]; + int samples = opus_decode_frame(s, buf + s->packet.frame_offset[i], size); + + if (samples < 0) { + av_log(s->avctx, AV_LOG_ERROR, "Error decoding an Opus frame.\n"); + if (s->avctx->err_recognition & AV_EF_EXPLODE) + return samples; + + for (j = 0; j < s->output_channels; j++) + memset(s->out[j], 0, s->packet.frame_duration * sizeof(float)); + samples = s->packet.frame_duration; + } + output_samples += samples; + + for (j = 0; j < s->output_channels; j++) + s->out[j] += samples; + s->out_size -= samples * sizeof(float); + } + +finish: + s->out[0] = s->out[1] = NULL; + s->out_size = 0; + + return output_samples; +} + +static int opus_decode_packet(AVCodecContext *avctx, void *data, + int *got_frame_ptr, AVPacket *avpkt) +{ + OpusContext *c = avctx->priv_data; + AVFrame *frame = data; + const uint8_t *buf = avpkt->data; + int buf_size = avpkt->size; + int coded_samples = 0; + int decoded_samples = 0; + int i, ret; + + /* decode the header of the first sub-packet to find out the sample count */ + if (buf) { + OpusPacket *pkt = &c->streams[0].packet; + ret = ff_opus_parse_packet(pkt, buf, buf_size, c->nb_streams > 1); + if (ret < 0) { + av_log(avctx, AV_LOG_ERROR, "Error parsing the packet header.\n"); + return ret; + } + coded_samples += pkt->frame_count * pkt->frame_duration; + c->streams[0].silk_samplerate = get_silk_samplerate(pkt->config); + } + + frame->nb_samples = coded_samples + c->streams[0].delayed_samples; + + /* no input or buffered data => nothing to do */ + if (!frame->nb_samples) { + *got_frame_ptr = 0; + return 0; + } + + /* setup the data buffers */ + ret = ff_get_buffer(avctx, frame, 0); + if (ret < 0) { + av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); + return ret; + } + frame->nb_samples = 0; + + for (i = 0; i < avctx->channels; i++) { + ChannelMap *map = &c->channel_maps[i]; + if (!map->copy) + c->streams[map->stream_idx].out[map->channel_idx] = (float*)frame->extended_data[i]; + } + + for (i = 0; i < c->nb_streams; i++) + c->streams[i].out_size = frame->linesize[0]; + + /* decode each sub-packet */ + for (i = 0; i < c->nb_streams; i++) { + OpusStreamContext *s = &c->streams[i]; + + if (i && buf) { + ret = ff_opus_parse_packet(&s->packet, buf, buf_size, i != c->nb_streams - 1); + if (ret < 0) { + av_log(avctx, AV_LOG_ERROR, "Error parsing the packet header.\n"); + return ret; + } + s->silk_samplerate = get_silk_samplerate(s->packet.config); + } + + ret = opus_decode_subpacket(&c->streams[i], buf, + s->packet.data_size, coded_samples); + if (ret < 0) + return ret; + if (decoded_samples && ret != decoded_samples) { + av_log(avctx, AV_LOG_ERROR, "Different numbers of decoded samples " + "in a multi-channel stream\n"); + return AVERROR_INVALIDDATA; + } + decoded_samples = ret; + buf += s->packet.packet_size; + buf_size -= s->packet.packet_size; + } + + for (i = 0; i < avctx->channels; i++) { + ChannelMap *map = &c->channel_maps[i]; + + /* handle copied channels */ + if (map->copy) { + memcpy(frame->extended_data[i], + frame->extended_data[map->copy_idx], + frame->linesize[0]); + } else if (map->silence) { + memset(frame->extended_data[i], 0, frame->linesize[0]); + } + + if (c->gain_i) { + c->fdsp.vector_fmul_scalar((float*)frame->extended_data[i], + (float*)frame->extended_data[i], + c->gain, FFALIGN(decoded_samples, 8)); + } + } + + frame->nb_samples = decoded_samples; + *got_frame_ptr = !!decoded_samples; + + return avpkt->size; +} + +static av_cold void opus_decode_flush(AVCodecContext *ctx) +{ + OpusContext *c = ctx->priv_data; + int i; + + for (i = 0; i < c->nb_streams; i++) { + OpusStreamContext *s = &c->streams[i]; + + memset(&s->packet, 0, sizeof(s->packet)); + s->delayed_samples = 0; + + if (s->celt_delay) + av_audio_fifo_drain(s->celt_delay, av_audio_fifo_size(s->celt_delay)); + avresample_close(s->avr); + + ff_silk_flush(s->silk); + ff_celt_flush(s->celt); + } +} + +static av_cold int opus_decode_close(AVCodecContext *avctx) +{ + OpusContext *c = avctx->priv_data; + int i; + + for (i = 0; i < c->nb_streams; i++) { + OpusStreamContext *s = &c->streams[i]; + + ff_silk_free(&s->silk); + ff_celt_free(&s->celt); + + av_freep(&s->out_dummy); + s->out_dummy_allocated_size = 0; + + av_audio_fifo_free(s->celt_delay); + avresample_free(&s->avr); + } + + av_freep(&c->streams); + c->nb_streams = 0; + + av_freep(&c->channel_maps); + + return 0; +} + +static av_cold int opus_decode_init(AVCodecContext *avctx) +{ + OpusContext *c = avctx->priv_data; + int ret, i, j; + + avctx->sample_fmt = AV_SAMPLE_FMT_FLTP; + avctx->sample_rate = 48000; + + avpriv_float_dsp_init(&c->fdsp, 0); + + /* find out the channel configuration */ + ret = ff_opus_parse_extradata(avctx, c); + if (ret < 0) + return ret; + + /* allocate and init each independent decoder */ + c->streams = av_mallocz_array(c->nb_streams, sizeof(*c->streams)); + if (!c->streams) { + c->nb_streams = 0; + ret = AVERROR(ENOMEM); + goto fail; + } + + for (i = 0; i < c->nb_streams; i++) { + OpusStreamContext *s = &c->streams[i]; + uint64_t layout; + + s->output_channels = (i < c->nb_stereo_streams) ? 2 : 1; + + s->avctx = avctx; + + for (j = 0; j < s->output_channels; j++) { + s->silk_output[j] = s->silk_buf[j]; + s->celt_output[j] = s->celt_buf[j]; + s->redundancy_output[j] = s->redundancy_buf[j]; + } + + s->fdsp = &c->fdsp; + + s->avr = avresample_alloc_context(); + if (!s->avr) + goto fail; + + layout = (s->output_channels == 1) ? AV_CH_LAYOUT_MONO : AV_CH_LAYOUT_STEREO; + av_opt_set_int(s->avr, "in_sample_fmt", avctx->sample_fmt, 0); + av_opt_set_int(s->avr, "out_sample_fmt", avctx->sample_fmt, 0); + av_opt_set_int(s->avr, "in_channel_layout", layout, 0); + av_opt_set_int(s->avr, "out_channel_layout", layout, 0); + av_opt_set_int(s->avr, "out_sample_rate", avctx->sample_rate, 0); + + ret = ff_silk_init(avctx, &s->silk, s->output_channels); + if (ret < 0) + goto fail; + + ret = ff_celt_init(avctx, &s->celt, s->output_channels); + if (ret < 0) + goto fail; + + s->celt_delay = av_audio_fifo_alloc(avctx->sample_fmt, + s->output_channels, 1024); + if (!s->celt_delay) { + ret = AVERROR(ENOMEM); + goto fail; + } + } + + return 0; +fail: + opus_decode_close(avctx); + return ret; +} + +AVCodec ff_opus_decoder = { + .name = "opus", + .long_name = NULL_IF_CONFIG_SMALL("Opus"), + .type = AVMEDIA_TYPE_AUDIO, + .id = AV_CODEC_ID_OPUS, + .priv_data_size = sizeof(OpusContext), + .init = opus_decode_init, + .close = opus_decode_close, + .decode = opus_decode_packet, + .flush = opus_decode_flush, + .capabilities = CODEC_CAP_DR1 | CODEC_CAP_DELAY, +}; diff --git a/libavcodec/version.h b/libavcodec/version.h index fd7aa5d94c..22343d5396 100644 --- a/libavcodec/version.h +++ b/libavcodec/version.h @@ -29,8 +29,8 @@ #include "libavutil/version.h" #define LIBAVCODEC_VERSION_MAJOR 55 -#define LIBAVCODEC_VERSION_MINOR 50 -#define LIBAVCODEC_VERSION_MICRO 3 +#define LIBAVCODEC_VERSION_MINOR 51 +#define LIBAVCODEC_VERSION_MICRO 0 #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ LIBAVCODEC_VERSION_MINOR, \ diff --git a/tests/Makefile b/tests/Makefile index 004b44cc33..414c8f7dcd 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -93,6 +93,7 @@ include $(SRC_PATH)/tests/fate/microsoft.mak include $(SRC_PATH)/tests/fate/monkeysaudio.mak include $(SRC_PATH)/tests/fate/mp3.mak include $(SRC_PATH)/tests/fate/mpc.mak +include $(SRC_PATH)/tests/fate/opus.mak include $(SRC_PATH)/tests/fate/pcm.mak include $(SRC_PATH)/tests/fate/probe.mak include $(SRC_PATH)/tests/fate/prores.mak diff --git a/tests/fate/opus.mak b/tests/fate/opus.mak new file mode 100644 index 0000000000..6c8bc9bd2c --- /dev/null +++ b/tests/fate/opus.mak @@ -0,0 +1,39 @@ +# The samples were produced by simply rewrapping the official test vectors from +# their custom format into Matroska. +# The reference files were created with our decoder and tested against the +# libopus output with the official opus_compare tool. We cannot use libopus +# output as reference directly, because the use of different resamplers would +# require too high fuzz values, which can hide bugs. +# Before adding new tests here, always make sure they pass opus_compare. + +OPUS_CELT_SAMPLES = $(addprefix testvector, 01 07 11) tron.6ch.tinypkts +OPUS_HYBRID_SAMPLES = $(addprefix testvector, 05 06) +OPUS_SILK_SAMPLES = $(addprefix testvector, 02 03 04) +OPUS_SAMPLES = $(addprefix testvector, 08 09 10 12) + +define FATE_OPUS_TEST +FATE_OPUS += fate-opus-$(1) +FATE_OPUS$(2) += fate-opus-$(1) +fate-opus-$(1): CMD = avconv -i $(TARGET_SAMPLES)/opus/$(1).mka -f f32le - +fate-opus-$(1): REF = $(TARGET_SAMPLES)/opus/$(1).f32 +endef + +$(foreach N,$(OPUS_CELT_SAMPLES), $(eval $(call FATE_OPUS_TEST,$(N),_CELT))) +$(foreach N,$(OPUS_HYBRID_SAMPLES),$(eval $(call FATE_OPUS_TEST,$(N),_HYBRID))) +$(foreach N,$(OPUS_SILK_SAMPLES), $(eval $(call FATE_OPUS_TEST,$(N),_SILK))) +$(foreach N,$(OPUS_SAMPLES), $(eval $(call FATE_OPUS_TEST,$(N),))) + +FATE_OPUS := $(sort $(FATE_OPUS)) + +$(FATE_OPUS): CMP = stddev +$(FATE_OPUS): CMP_UNIT = f32 +$(FATE_OPUS): FUZZ = 3 + +$(FATE_OPUS_CELT): CMP = oneoff +$(FATE_OPUS_CELT): FUZZ = 5 + +FATE_SAMPLES_AVCONV-$(call DEMDEC, MATROSKA, OPUS) += $(FATE_OPUS) +fate-opus-celt: $(FATE_OPUS_CELT) +fate-opus-hybrid: $(FATE_OPUS_HYBRID) +fate-opus-silk: $(FATE_OPUS_SILK) +fate-opus: $(FATE_OPUS)