Add initial support for RTP hinting in the mov muxer

Originally committed as revision 23164 to svn://svn.ffmpeg.org/ffmpeg/trunk
2025-02-20 04:00:45 +00:00 · 2010-05-18 19:47:24 +00:00 · 2010-05-18 19:47:24 +00:00 · e977af6f2e
commit e977af6f2e
parent 27a826c941
4 changed files with 414 additions and 3 deletions
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@ -114,7 +114,7 @@ OBJS-$(CONFIG_MM_DEMUXER)                += mm.o
 OBJS-$(CONFIG_MMF_DEMUXER)               += mmf.o raw.o
 OBJS-$(CONFIG_MMF_MUXER)                 += mmf.o riff.o
 OBJS-$(CONFIG_MOV_DEMUXER)               += mov.o riff.o isom.o
-OBJS-$(CONFIG_MOV_MUXER)                 += movenc.o riff.o isom.o avc.o
+OBJS-$(CONFIG_MOV_MUXER)                 += movenc.o riff.o isom.o avc.o movenchint.o
 OBJS-$(CONFIG_MP2_MUXER)                 += mp3.o id3v1.o
 OBJS-$(CONFIG_MP3_DEMUXER)               += mp3.o id3v1.o id3v2.o
 OBJS-$(CONFIG_MP3_MUXER)                 += mp3.o id3v1.o id3v2.o
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@ -29,6 +29,8 @@
 #include "avc.h"
 #include "libavcodec/get_bits.h"
 #include "libavcodec/put_bits.h"
+#include "internal.h"
+#include "libavutil/avstring.h"

 #undef NDEBUG
 #include <assert.h>
@ -806,6 +808,26 @@ static int mov_write_video_tag(ByteIOContext *pb, MOVTrack *track)
    return updateSize(pb, pos);
 }

+static int mov_write_rtp_tag(ByteIOContext *pb, MOVTrack *track)
+{
+    int64_t pos = url_ftell(pb);
+    put_be32(pb, 0); /* size */
+    put_tag(pb, "rtp ");
+    put_be32(pb, 0); /* Reserved */
+    put_be16(pb, 0); /* Reserved */
+    put_be16(pb, 1); /* Data-reference index */
+
+    put_be16(pb, 1); /* Hint track version */
+    put_be16(pb, 1); /* Highest compatible version */
+    put_be32(pb, track->max_packet_size); /* Max packet size */
+
+    put_be32(pb, 12); /* size */
+    put_tag(pb, "tims");
+    put_be32(pb, track->timescale);
+
+    return updateSize(pb, pos);
+}
+
 static int mov_write_stsd_tag(ByteIOContext *pb, MOVTrack *track)
 {
    int64_t pos = url_ftell(pb);
@ -819,6 +841,8 @@ static int mov_write_stsd_tag(ByteIOContext *pb, MOVTrack *track)
        mov_write_audio_tag(pb, track);
    else if (track->enc->codec_type == AVMEDIA_TYPE_SUBTITLE)
        mov_write_subtitle_tag(pb, track);
+    else if (track->enc->codec_tag == MKTAG('r','t','p',' '))
+        mov_write_rtp_tag(pb, track);
    return updateSize(pb, pos);
 }

@ -918,7 +942,8 @@ static int mov_write_stbl_tag(ByteIOContext *pb, MOVTrack *track)
    put_tag(pb, "stbl");
    mov_write_stsd_tag(pb, track);
    mov_write_stts_tag(pb, track);
-    if (track->enc->codec_type == AVMEDIA_TYPE_VIDEO &&
+    if ((track->enc->codec_type == AVMEDIA_TYPE_VIDEO ||
+         track->enc->codec_tag == MKTAG('r','t','p',' ')) &&
        track->hasKeyframes && track->hasKeyframes < track->entry)
        mov_write_stss_tag(pb, track, MOV_SYNC_SAMPLE);
    if (track->mode == MODE_MOV && track->flags & MOV_TRACK_STPS)
@ -1005,6 +1030,9 @@ static int mov_write_hdlr_tag(ByteIOContext *pb, MOVTrack *track)
            if (track->tag == MKTAG('t','x','3','g')) hdlr_type = "sbtl";
            else                                      hdlr_type = "text";
            descr = "SubtitleHandler";
+        } else if (track->enc->codec_tag == MKTAG('r','t','p',' ')) {
+            hdlr_type = "hint";
+            descr = "HintHandler";
        }
    }

@ -1024,6 +1052,21 @@ static int mov_write_hdlr_tag(ByteIOContext *pb, MOVTrack *track)
    return updateSize(pb, pos);
 }

+static int mov_write_hmhd_tag(ByteIOContext *pb)
+{
+    /* This atom must be present, but leaving the values at zero
+     * seems harmless. */
+    put_be32(pb, 28); /* size */
+    put_tag(pb, "hmhd");
+    put_be32(pb, 0); /* version, flags */
+    put_be16(pb, 0); /* maxPDUsize */
+    put_be16(pb, 0); /* avgPDUsize */
+    put_be32(pb, 0); /* maxbitrate */
+    put_be32(pb, 0); /* avgbitrate */
+    put_be32(pb, 0); /* reserved */
+    return 28;
+}
+
 static int mov_write_minf_tag(ByteIOContext *pb, MOVTrack *track)
 {
    int64_t pos = url_ftell(pb);
@ -1036,6 +1079,8 @@ static int mov_write_minf_tag(ByteIOContext *pb, MOVTrack *track)
    else if (track->enc->codec_type == AVMEDIA_TYPE_SUBTITLE) {
        if (track->tag == MKTAG('t','e','x','t')) mov_write_gmhd_tag(pb);
        else                                      mov_write_nmhd_tag(pb);
+    } else if (track->tag == MKTAG('r','t','p',' ')) {
+        mov_write_hmhd_tag(pb);
    }
    if (track->mode == MODE_MOV) /* FIXME: Why do it for MODE_MOV only ? */
        mov_write_hdlr_tag(pb, NULL);
@ -1191,6 +1236,25 @@ static int mov_write_uuid_tag_psp(ByteIOContext *pb, MOVTrack *mov)
    return 0x34;
 }

+static int mov_write_udta_sdp(ByteIOContext *pb, AVCodecContext *ctx, int index)
+{
+    char buf[1000] = "";
+    int len;
+
+    ff_sdp_write_media(buf, sizeof(buf), ctx, NULL, 0, 0);
+    av_strlcatf(buf, sizeof(buf), "a=control:streamid=%d\r\n", index);
+    len = strlen(buf);
+
+    put_be32(pb, len + 24);
+    put_tag (pb, "udta");
+    put_be32(pb, len + 16);
+    put_tag (pb, "hnti");
+    put_be32(pb, len + 8);
+    put_tag (pb, "sdp ");
+    put_buffer(pb, buf, len);
+    return len + 24;
+}
+
 static int mov_write_trak_tag(ByteIOContext *pb, MOVTrack *track, AVStream *st)
 {
    int64_t pos = url_ftell(pb);
@ -1204,6 +1268,8 @@ static int mov_write_trak_tag(ByteIOContext *pb, MOVTrack *track, AVStream *st)
    mov_write_mdia_tag(pb, track);
    if (track->mode == MODE_PSP)
        mov_write_uuid_tag_psp(pb,track);  // PSP Movies require this uuid box
+    if (track->tag == MKTAG('r','t','p',' '))
+        mov_write_udta_sdp(pb, track->rtp_ctx->streams[0]->codec, track->trackID);
    return updateSize(pb, pos);
 }

@ -1618,6 +1684,13 @@ static int mov_write_moov_tag(ByteIOContext *pb, MOVMuxContext *mov,
            mov->tracks[i].tref_tag = MKTAG('c','h','a','p');
            mov->tracks[i].tref_id = mov->tracks[mov->chapter_track].trackID;
        }
+    for (i = 0; i < mov->nb_streams; i++) {
+        if (mov->tracks[i].tag == MKTAG('r','t','p',' ')) {
+            mov->tracks[i].tref_tag = MKTAG('h','i','n','t');
+            mov->tracks[i].tref_id =
+                mov->tracks[mov->tracks[i].src_track].trackID;
+        }
+    }

    mov_write_mvhd_tag(pb, mov);
    //mov_write_iods_tag(pb, mov);
@ -1878,6 +1951,9 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
    mov->mdat_size += size;

    put_flush_packet(pb);
+
+    if (trk->hint_track >= 0 && trk->hint_track < mov->nb_streams)
+        ff_mov_add_hinted_packet(s, pkt, trk->hint_track, trk->entry);
    return 0;
 }

@ -1920,7 +1996,7 @@ static int mov_write_header(AVFormatContext *s)
 {
    ByteIOContext *pb = s->pb;
    MOVMuxContext *mov = s->priv_data;
-    int i;
+    int i, hint_track = 0;

    if (url_is_streamed(s->pb)) {
        av_log(s, AV_LOG_ERROR, "muxer does not support non seekable output\n");
@ -1951,6 +2027,18 @@ static int mov_write_header(AVFormatContext *s)
    if (mov->mode & (MODE_MOV|MODE_IPOD) && s->nb_chapters)
        mov->chapter_track = mov->nb_streams++;

+    if (s->flags & AVFMT_FLAG_RTP_HINT) {
+        /* Add hint tracks for each audio and video stream */
+        hint_track = mov->nb_streams;
+        for (i = 0; i < s->nb_streams; i++) {
+            AVStream *st = s->streams[i];
+            if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO ||
+                st->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
+                mov->nb_streams++;
+            }
+        }
+    }
+
    mov->tracks = av_mallocz(mov->nb_streams*sizeof(*mov->tracks));
    if (!mov->tracks)
        return AVERROR(ENOMEM);
@ -1971,6 +2059,9 @@ static int mov_write_header(AVFormatContext *s)
                   "codec not currently supported in container\n", i);
            goto error;
        }
+        /* If hinting of this track is enabled by a later hint track,
+         * this is updated. */
+        track->hint_track = -1;
        if(st->codec->codec_type == AVMEDIA_TYPE_VIDEO){
            if (track->tag == MKTAG('m','x','3','p') || track->tag == MKTAG('m','x','3','n') ||
                track->tag == MKTAG('m','x','4','p') || track->tag == MKTAG('m','x','4','n') ||
@ -2025,6 +2116,18 @@ static int mov_write_header(AVFormatContext *s)
    if (mov->chapter_track)
        mov_create_chapter_track(s, mov->chapter_track);

+    if (s->flags & AVFMT_FLAG_RTP_HINT) {
+        /* Initialize the hint tracks for each audio and video stream */
+        for (i = 0; i < s->nb_streams; i++) {
+            AVStream *st = s->streams[i];
+            if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO ||
+                st->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
+                ff_mov_init_hinting(s, hint_track, i);
+                hint_track++;
+            }
+        }
+    }
+
    put_flush_packet(pb);

    return 0;
@ -2061,6 +2164,8 @@ static int mov_write_trailer(AVFormatContext *s)
        av_freep(&mov->tracks[mov->chapter_track].enc);

    for (i=0; i<mov->nb_streams; i++) {
+        if (mov->tracks[i].tag == MKTAG('r','t','p',' '))
+            ff_mov_close_hinting(&mov->tracks[i]);
        av_freep(&mov->tracks[i].cluster);

        if(mov->tracks[i].vosLen) av_free(mov->tracks[i].vosData);
--- a/libavformat/movenc.h
+++ b/libavformat/movenc.h
@ -29,6 +29,8 @@
 #define MOV_INDEX_CLUSTER_SIZE 16384
 #define MOV_TIMESCALE 1000

+#define RTP_MAX_PACKET_SIZE 1450
+
 #define MODE_MP4  0x01
 #define MODE_MOV  0x02
 #define MODE_3GP  0x04
@ -73,6 +75,13 @@ typedef struct MOVIndex {
    int         height; ///< active picture (w/o VBI) height for D-10/IMX
    uint32_t    tref_tag;
    int         tref_id; ///< trackID of the referenced track
+
+    int         hint_track;   ///< the track that hints this track, -1 if no hint track is set
+    int         src_track;    ///< the track that this hint track describes
+    AVFormatContext *rtp_ctx; ///< the format context for the hinting rtp muxer
+    uint32_t    prev_rtp_ts;
+    int64_t     cur_rtp_ts_unwrapped;
+    uint32_t    max_packet_size;
 } MOVTrack;

 typedef struct MOVMuxContext {
@ -87,4 +96,9 @@ typedef struct MOVMuxContext {

 int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt);

+int ff_mov_init_hinting(AVFormatContext *s, int index, int src_index);
+int ff_mov_add_hinted_packet(AVFormatContext *s, AVPacket *pkt,
+                             int track_index, int sample);
+void ff_mov_close_hinting(MOVTrack *track);
+
 #endif /* AVFORMAT_MOVENC_H */
--- a/libavformat/movenchint.c
+++ b/libavformat/movenchint.c
@ -0,0 +1,292 @@
+/*
+ * MOV, 3GP, MP4 muxer RTP hinting
+ * Copyright (c) 2010 Martin Storsjo
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "movenc.h"
+#include "libavutil/intreadwrite.h"
+
+int ff_mov_init_hinting(AVFormatContext *s, int index, int src_index)
+{
+    MOVMuxContext *mov  = s->priv_data;
+    MOVTrack *track     = &mov->tracks[index];
+    MOVTrack *src_track = &mov->tracks[src_index];
+    AVStream *src_st    = s->streams[src_index];
+    int ret = AVERROR(ENOMEM);
+    AVOutputFormat *rtp_format = av_guess_format("rtp", NULL, NULL);
+
+    track->tag = MKTAG('r','t','p',' ');
+    track->src_track = src_index;
+
+    if (!rtp_format) {
+        ret = AVERROR(ENOENT);
+        goto fail;
+    }
+
+    track->enc = avcodec_alloc_context();
+    if (!track->enc)
+        goto fail;
+    track->enc->codec_type = AVMEDIA_TYPE_DATA;
+    track->enc->codec_tag  = track->tag;
+
+    track->rtp_ctx = avformat_alloc_context();
+    if (!track->rtp_ctx)
+        goto fail;
+    track->rtp_ctx->oformat = rtp_format;
+    if (!av_new_stream(track->rtp_ctx, 0))
+        goto fail;
+
+    /* Copy stream parameters */
+    track->rtp_ctx->streams[0]->sample_aspect_ratio =
+                        src_st->sample_aspect_ratio;
+
+    /* Remove the allocated codec context, link to the original one
+     * instead, to give the rtp muxer access to codec parameters. */
+    av_free(track->rtp_ctx->streams[0]->codec);
+    track->rtp_ctx->streams[0]->codec = src_st->codec;
+
+    if ((ret = url_open_dyn_packet_buf(&track->rtp_ctx->pb,
+                                       RTP_MAX_PACKET_SIZE)) < 0)
+        goto fail;
+    ret = av_write_header(track->rtp_ctx);
+    if (ret)
+        goto fail;
+
+    /* Copy the RTP AVStream timebase back to the hint AVStream */
+    track->timescale = track->rtp_ctx->streams[0]->time_base.den;
+
+    /* Mark the hinted track that packets written to it should be
+     * sent to this track for hinting. */
+    src_track->hint_track = index;
+    return 0;
+fail:
+    av_log(s, AV_LOG_WARNING,
+           "Unable to initialize hinting of stream %d\n", src_index);
+    if (track->rtp_ctx && track->rtp_ctx->pb) {
+        uint8_t *buf;
+        url_close_dyn_buf(track->rtp_ctx->pb, &buf);
+        av_free(buf);
+    }
+    if (track->rtp_ctx && track->rtp_ctx->streams[0]) {
+        av_metadata_free(&track->rtp_ctx->streams[0]->metadata);
+        av_free(track->rtp_ctx->streams[0]);
+    }
+    if (track->rtp_ctx) {
+        av_metadata_free(&track->rtp_ctx->metadata);
+        av_free(track->rtp_ctx->priv_data);
+        av_freep(&track->rtp_ctx);
+    }
+    av_freep(&track->enc);
+    /* Set a default timescale, to avoid crashes in dump_format */
+    track->timescale = 90000;
+    return ret;
+}
+
+static void output_immediate(const uint8_t *data, int size,
+                             ByteIOContext *out, int *entries)
+{
+    while (size > 0) {
+        int len = size;
+        if (len > 14)
+            len = 14;
+        put_byte(out, 1); /* immediate constructor */
+        put_byte(out, len); /* amount of valid data */
+        put_buffer(out, data, len);
+        data += len;
+        size -= len;
+
+        for (; len < 14; len++)
+            put_byte(out, 0);
+
+        (*entries)++;
+    }
+}
+
+static void describe_payload(const uint8_t *data, int size,
+                             ByteIOContext *out, int *entries)
+{
+    /* Describe the payload using different constructors */
+    output_immediate(data, size, out, entries);
+}
+
+/**
+ * Write an RTP hint (that may contain one or more RTP packets)
+ * for the packets in data. data contains one or more packets with a
+ * BE32 size header.
+ *
+ * @param out buffer where the hints are written
+ * @param data buffer containing RTP packets
+ * @param size the size of the data buffer
+ * @param trk the MOVTrack for the hint track
+ * @param pts pointer where the timestamp for the written RTP hint is stored
+ * @return the number of RTP packets in the written hint
+ */
+static int write_hint_packets(ByteIOContext *out, const uint8_t *data,
+                              int size, MOVTrack *trk, int64_t *pts)
+{
+    int64_t curpos;
+    int64_t count_pos, entries_pos;
+    int count = 0, entries;
+
+    count_pos = url_ftell(out);
+    /* RTPsample header */
+    put_be16(out, 0); /* packet count */
+    put_be16(out, 0); /* reserved */
+
+    while (size > 4) {
+        uint32_t packet_len = AV_RB32(data);
+        uint16_t seq;
+        uint32_t ts;
+
+        data += 4;
+        size -= 4;
+        if (packet_len > size || packet_len <= 12)
+            break;
+        if (data[1] >= 200 && data[1] <= 204) {
+            /* RTCP packet, just skip */
+            data += packet_len;
+            size -= packet_len;
+            continue;
+        }
+
+        if (packet_len > trk->max_packet_size)
+            trk->max_packet_size = packet_len;
+
+        seq = AV_RB16(&data[2]);
+        ts = AV_RB32(&data[4]);
+
+        if (trk->prev_rtp_ts == 0)
+            trk->prev_rtp_ts = ts;
+        /* Unwrap the 32-bit RTP timestamp that wraps around often
+         * into a not (as often) wrapping 64-bit timestamp. */
+        trk->cur_rtp_ts_unwrapped += (int32_t) (ts - trk->prev_rtp_ts);
+        trk->prev_rtp_ts = ts;
+        if (*pts == AV_NOPTS_VALUE)
+            *pts = trk->cur_rtp_ts_unwrapped;
+
+        count++;
+        /* RTPpacket header */
+        put_be32(out, 0); /* relative_time */
+        put_buffer(out, data, 2); /* RTP header */
+        put_be16(out, seq); /* RTPsequenceseed */
+        put_be16(out, 0); /* reserved + flags */
+        entries_pos = url_ftell(out);
+        put_be16(out, 0); /* entry count */
+
+        data += 12;
+        size -= 12;
+        packet_len -= 12;
+
+        entries = 0;
+        /* Write one or more constructors describing the payload data */
+        describe_payload(data, packet_len, out, &entries);
+        data += packet_len;
+        size -= packet_len;
+
+        curpos = url_ftell(out);
+        url_fseek(out, entries_pos, SEEK_SET);
+        put_be16(out, entries);
+        url_fseek(out, curpos, SEEK_SET);
+    }
+
+    curpos = url_ftell(out);
+    url_fseek(out, count_pos, SEEK_SET);
+    put_be16(out, count);
+    url_fseek(out, curpos, SEEK_SET);
+    return count;
+}
+
+int ff_mov_add_hinted_packet(AVFormatContext *s, AVPacket *pkt,
+                             int track_index, int sample)
+{
+    MOVMuxContext *mov = s->priv_data;
+    MOVTrack *trk = &mov->tracks[track_index];
+    AVFormatContext *rtp_ctx = trk->rtp_ctx;
+    uint8_t *buf = NULL;
+    int size;
+    ByteIOContext *hintbuf = NULL;
+    AVPacket hint_pkt;
+    AVPacket local_pkt;
+    int ret = 0, count;
+
+    if (!rtp_ctx)
+        return AVERROR(ENOENT);
+    if (!rtp_ctx->pb)
+        return AVERROR(ENOMEM);
+
+    /* Feed the packet to the RTP muxer */
+    local_pkt = *pkt;
+    local_pkt.stream_index = 0;
+    local_pkt.pts = av_rescale_q(pkt->pts,
+        s->streams[pkt->stream_index]->time_base,
+        rtp_ctx->streams[0]->time_base);
+    local_pkt.dts = av_rescale_q(pkt->dts,
+        s->streams[pkt->stream_index]->time_base,
+        rtp_ctx->streams[0]->time_base);
+    av_write_frame(rtp_ctx, &local_pkt);
+
+    /* Fetch the output from the RTP muxer, open a new output buffer
+     * for next time. */
+    size = url_close_dyn_buf(rtp_ctx->pb, &buf);
+    if ((ret = url_open_dyn_packet_buf(&rtp_ctx->pb,
+                                       RTP_MAX_PACKET_SIZE)) < 0)
+        goto done;
+
+    if (size <= 0)
+        goto done;
+
+    /* Open a buffer for writing the hint */
+    if ((ret = url_open_dyn_buf(&hintbuf)) < 0)
+        goto done;
+    av_init_packet(&hint_pkt);
+    count = write_hint_packets(hintbuf, buf, size, trk, &hint_pkt.dts);
+    av_freep(&buf);
+
+    /* Write the hint data into the hint track */
+    hint_pkt.size = size = url_close_dyn_buf(hintbuf, &buf);
+    hint_pkt.data = buf;
+    hint_pkt.pts  = hint_pkt.dts;
+    hint_pkt.stream_index = track_index;
+    if (pkt->flags & AV_PKT_FLAG_KEY)
+        hint_pkt.flags |= AV_PKT_FLAG_KEY;
+    if (count > 0)
+        ff_mov_write_packet(s, &hint_pkt);
+done:
+    av_free(buf);
+    return ret;
+}
+
+void ff_mov_close_hinting(MOVTrack *track) {
+    AVFormatContext* rtp_ctx = track->rtp_ctx;
+    uint8_t *ptr;
+
+    av_freep(&track->enc);
+    if (!rtp_ctx)
+        return;
+    if (rtp_ctx->pb) {
+        av_write_trailer(rtp_ctx);
+        url_close_dyn_buf(rtp_ctx->pb, &ptr);
+        av_free(ptr);
+    }
+    av_metadata_free(&rtp_ctx->streams[0]->metadata);
+    av_metadata_free(&rtp_ctx->metadata);
+    av_free(rtp_ctx->streams[0]);
+    av_freep(&rtp_ctx);
+}
+