third_party_ffmpeg/libavcodec/assdec.c

/*
 * SSA/ASS decoder
 * Copyright (c) 2010  Aurelien Jacobs <aurel@gnuage.org>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include <string.h>

#include "avcodec.h"
#include "ass.h"
#include "codec_internal.h"
#include "config_components.h"
#include "libavutil/internal.h"
#include "libavutil/mem.h"

static av_cold int ass_decode_init(AVCodecContext *avctx)
{
    avctx->subtitle_header = av_malloc(avctx->extradata_size + 1);
    if (!avctx->subtitle_header)
        return AVERROR(ENOMEM);
    if (avctx->extradata_size)
        memcpy(avctx->subtitle_header, avctx->extradata, avctx->extradata_size);
    avctx->subtitle_header[avctx->extradata_size] = 0;
    avctx->subtitle_header_size = avctx->extradata_size;
    return 0;
}

static int ass_decode_frame(AVCodecContext *avctx, AVSubtitle *sub,
                            int *got_sub_ptr, const AVPacket *avpkt)
{
    if (avpkt->size <= 0)
        return avpkt->size;

    sub->rects = av_malloc(sizeof(*sub->rects));
    if (!sub->rects)
        return AVERROR(ENOMEM);
    sub->rects[0] = av_mallocz(sizeof(*sub->rects[0]));
    if (!sub->rects[0])
        return AVERROR(ENOMEM);
    sub->num_rects = 1;
    sub->rects[0]->type = SUBTITLE_ASS;
    sub->rects[0]->ass  = av_strdup(avpkt->data);
    if (!sub->rects[0]->ass)
        return AVERROR(ENOMEM);
    *got_sub_ptr = 1;
    return avpkt->size;
}

#if CONFIG_SSA_DECODER
const FFCodec ff_ssa_decoder = {
    .p.name       = "ssa",
    .p.long_name  = NULL_IF_CONFIG_SMALL("ASS (Advanced SubStation Alpha) subtitle"),
    .p.type       = AVMEDIA_TYPE_SUBTITLE,
    .p.id         = AV_CODEC_ID_ASS,
    .init         = ass_decode_init,
    FF_CODEC_DECODE_SUB_CB(ass_decode_frame),
    .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE,
};
#endif

#if CONFIG_ASS_DECODER
const FFCodec ff_ass_decoder = {
    .p.name       = "ass",
    .p.long_name  = NULL_IF_CONFIG_SMALL("ASS (Advanced SubStation Alpha) subtitle"),
    .p.type       = AVMEDIA_TYPE_SUBTITLE,
    .p.id         = AV_CODEC_ID_ASS,
    .init         = ass_decode_init,
    FF_CODEC_DECODE_SUB_CB(ass_decode_frame),
    .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE,
};
#endif
add missing files in previous commit (ASS encoder and decoder) Originally committed as revision 25747 to svn://svn.ffmpeg.org/ffmpeg/trunk 2010-11-13 14:18:59 +00:00			`/*`
			`* SSA/ASS decoder`
			`* Copyright (c) 2010 Aurelien Jacobs <aurel@gnuage.org>`
			`*`
			`* This file is part of FFmpeg.`
			`*`
			`* FFmpeg is free software; you can redistribute it and/or`
			`* modify it under the terms of the GNU Lesser General Public`
			`* License as published by the Free Software Foundation; either`
			`* version 2.1 of the License, or (at your option) any later version.`
			`*`
			`* FFmpeg is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`* Lesser General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU Lesser General Public`
			`* License along with FFmpeg; if not, write to the Free Software`
			`* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA`
			`*/`

Don't include common.h from avutil.h Signed-off-by: Martin Storsjö <martin@martin.st> 2012-08-06 13:49:32 +00:00			`#include <string.h>`

add missing files in previous commit (ASS encoder and decoder) Originally committed as revision 25747 to svn://svn.ffmpeg.org/ffmpeg/trunk 2010-11-13 14:18:59 +00:00			`#include "avcodec.h"`
			`#include "ass.h"`
upgrade ffmpeg from 4.4.1 to 5.1.4 Signed-off-by: cyberbox <468042667@qq.com> Change-Id: I63cc2a8c9ff6197c67d6b6b47c124882ad942a22 2024-04-25 09:18:18 +00:00			`#include "codec_internal.h"`
			`#include "config_components.h"`
Don't include common.h from avutil.h Signed-off-by: Martin Storsjö <martin@martin.st> 2012-08-06 13:49:32 +00:00			`#include "libavutil/internal.h"`
			`#include "libavutil/mem.h"`
add missing files in previous commit (ASS encoder and decoder) Originally committed as revision 25747 to svn://svn.ffmpeg.org/ffmpeg/trunk 2010-11-13 14:18:59 +00:00
			`static av_cold int ass_decode_init(AVCodecContext *avctx)`
			`{`
lavc: add ff_bprint_to_extradata() helper and use it. This commit also makes sure the extradata and subtitle_header are NUL terminated, without taking into account the trailing '\0' in account in the size. At the same time, it should fix 'warning: dereferencing type-punned pointer will break strict-aliasing rules' warning for compilers who don't consider uint8_t and char compatibles. 2012-12-29 21:09:59 +00:00			`avctx->subtitle_header = av_malloc(avctx->extradata_size + 1);`
assdec: fix wrong alloc pointer check. 2012-01-29 13:47:04 +00:00			`if (!avctx->subtitle_header)`
add missing files in previous commit (ASS encoder and decoder) Originally committed as revision 25747 to svn://svn.ffmpeg.org/ffmpeg/trunk 2010-11-13 14:18:59 +00:00			`return AVERROR(ENOMEM);`
avcodec/assdec: undefined use of memcpy() Fixes: null pointer passed as argument 2, which is declared to never be null Fixes: 16008/clusterfuzz-testcase-minimized-ffmpeg_AV_CODEC_ID_SSA_fuzzer-5650582821404672 (this is a separate issue found in this testcase) Found-by: continuous fuzzing process https://github.com/google/oss-fuzz/tree/master/projects/ffmpeg Reviewed-by: Paul B Mahol <onemda@gmail.com> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc> 2019-07-24 20:55:15 +00:00			`if (avctx->extradata_size)`
			`memcpy(avctx->subtitle_header, avctx->extradata, avctx->extradata_size);`
lavc: add ff_bprint_to_extradata() helper and use it. This commit also makes sure the extradata and subtitle_header are NUL terminated, without taking into account the trailing '\0' in account in the size. At the same time, it should fix 'warning: dereferencing type-punned pointer will break strict-aliasing rules' warning for compilers who don't consider uint8_t and char compatibles. 2012-12-29 21:09:59 +00:00			`avctx->subtitle_header[avctx->extradata_size] = 0;`
add missing files in previous commit (ASS encoder and decoder) Originally committed as revision 25747 to svn://svn.ffmpeg.org/ffmpeg/trunk 2010-11-13 14:18:59 +00:00			`avctx->subtitle_header_size = avctx->extradata_size;`
			`return 0;`
			`}`

upgrade ffmpeg from 4.4.1 to 5.1.4 Signed-off-by: cyberbox <468042667@qq.com> Change-Id: I63cc2a8c9ff6197c67d6b6b47c124882ad942a22 2024-04-25 09:18:18 +00:00			`static int ass_decode_frame(AVCodecContext avctx, AVSubtitle sub,`
			`int got_sub_ptr, const AVPacket avpkt)`
compute rects duration in ASS decoder Signed-off-by: Aurelien Jacobs <aurel@gnuage.org> 2010-12-16 08:16:33 +00:00			`{`
subtitles: introduce ASS codec id and use it. Currently, we have a AV_CODEC_ID_SSA, which matches the way the ASS/SSA markup is muxed in a standalone .ass/.ssa file. This means the AVPacket data starts with a "Dialogue:" string, followed by a timing information (start and end of the event as string) and a trailing CRLF after each line. One packet can contain several lines. We'll refer to this layout as "SSA" or "SSA lines". In matroska, this markup is not stored as such: it has no "Dialogue:" prefix, it contains a ReadOrder field, the timing information is not in the payload, and it doesn't contain the trailing CRLF. See [1] for more info. We'll refer to this layout as "ASS". Since we have only one common codec for both formats, the matroska demuxer is constructing an AVPacket following the "SSA lines" format. This causes several problems, so it was decided to change this into clean ASS packets. Some insight about what is changed or unchanged in this commit: CODECS ------ - the decoding process still writes "SSA lines" markup inside the ass fields of the subtitles rectangles (sub->rects[n]->ass), which is still the current common way of representing decoded subtitles markup. It is meant to change later. - new ASS codec id: AV_CODEC_ID_ASS (which is different from the legacy AV_CODEC_ID_SSA) - lavc/assdec: the "ass" decoder is renamed into "ssa" (instead of "ass") for consistency with the codec id and allows to add a real ass decoder. This ass decoder receives clean ASS lines (so it starts with a ReadOrder, is followed by the Layer, etc). We make sure this is decoded properly in a new ass-line rectangle of the decoded subtitles (the ssa decoder OTOH is doing a simple straightforward copy). Using the packet timing instead of data string makes sure the ass-line now contains the appropriate timing. - lavc/assenc: just like the ass decoder, the "ssa" encoder is renamed into "ssa" (instead of "ass") for consistency with the codec id, and allows to add a real "ass" encoder. One important thing about this encoder is that it only supports one ass rectangle: we could have put several dialogue events in the AVPacket (separated by a \0 for instance) but this would have cause trouble for the muxer which needs not only the start time, but also the duration: typically, you have merged events with the same start time (stored in the AVPacket->pts) but a different duration. At the moment, only the matroska do the merge with the SSA-line codec. We will need to make sure all the decoders in the future can't add more than one rectangle (and only one Dialogue line in it obviously). FORMATS ------- - lavf/assenc: the .ass/.ssa muxer can take both SSA and ASS packets. In the case of ASS packets as input, it adds the timing based on the AVPacket pts and duration, and mux it with "Dialogue:", trailing CRLF, etc. - lavf/assdec: unchanged; it currently still only outputs SSA-lines packets. - lavf/mkv: the demuxer can now output ASS packets without the need of any "SSA-lines" reconstruction hack. It will become the default at next libavformat bump, and the SSA support will be dropped from the demuxer. The muxer can take ASS packets since it's muxed normally, and still supports the old SSA packets. All the SSA support and hacks in Matroska code will be dropped at next lavf bump. [1]: http://www.matroska.org/technical/specs/subtitles/ssa.html 2013-01-03 02:06:43 +00:00			`if (avpkt->size <= 0)`
			`return avpkt->size;`

lavc: allow subtitle text format to be ASS without timing 2016-01-06 12:43:23 +00:00			`sub->rects = av_malloc(sizeof(*sub->rects));`
			`if (!sub->rects)`
			`return AVERROR(ENOMEM);`
			`sub->rects[0] = av_mallocz(sizeof(*sub->rects[0]));`
			`if (!sub->rects[0])`
			`return AVERROR(ENOMEM);`
			`sub->num_rects = 1;`
			`sub->rects[0]->type = SUBTITLE_ASS;`
			`sub->rects[0]->ass = av_strdup(avpkt->data);`
			`if (!sub->rects[0]->ass)`
			`return AVERROR(ENOMEM);`
			`*got_sub_ptr = 1;`
subtitles: introduce ASS codec id and use it. Currently, we have a AV_CODEC_ID_SSA, which matches the way the ASS/SSA markup is muxed in a standalone .ass/.ssa file. This means the AVPacket data starts with a "Dialogue:" string, followed by a timing information (start and end of the event as string) and a trailing CRLF after each line. One packet can contain several lines. We'll refer to this layout as "SSA" or "SSA lines". In matroska, this markup is not stored as such: it has no "Dialogue:" prefix, it contains a ReadOrder field, the timing information is not in the payload, and it doesn't contain the trailing CRLF. See [1] for more info. We'll refer to this layout as "ASS". Since we have only one common codec for both formats, the matroska demuxer is constructing an AVPacket following the "SSA lines" format. This causes several problems, so it was decided to change this into clean ASS packets. Some insight about what is changed or unchanged in this commit: CODECS ------ - the decoding process still writes "SSA lines" markup inside the ass fields of the subtitles rectangles (sub->rects[n]->ass), which is still the current common way of representing decoded subtitles markup. It is meant to change later. - new ASS codec id: AV_CODEC_ID_ASS (which is different from the legacy AV_CODEC_ID_SSA) - lavc/assdec: the "ass" decoder is renamed into "ssa" (instead of "ass") for consistency with the codec id and allows to add a real ass decoder. This ass decoder receives clean ASS lines (so it starts with a ReadOrder, is followed by the Layer, etc). We make sure this is decoded properly in a new ass-line rectangle of the decoded subtitles (the ssa decoder OTOH is doing a simple straightforward copy). Using the packet timing instead of data string makes sure the ass-line now contains the appropriate timing. - lavc/assenc: just like the ass decoder, the "ssa" encoder is renamed into "ssa" (instead of "ass") for consistency with the codec id, and allows to add a real "ass" encoder. One important thing about this encoder is that it only supports one ass rectangle: we could have put several dialogue events in the AVPacket (separated by a \0 for instance) but this would have cause trouble for the muxer which needs not only the start time, but also the duration: typically, you have merged events with the same start time (stored in the AVPacket->pts) but a different duration. At the moment, only the matroska do the merge with the SSA-line codec. We will need to make sure all the decoders in the future can't add more than one rectangle (and only one Dialogue line in it obviously). FORMATS ------- - lavf/assenc: the .ass/.ssa muxer can take both SSA and ASS packets. In the case of ASS packets as input, it adds the timing based on the AVPacket pts and duration, and mux it with "Dialogue:", trailing CRLF, etc. - lavf/assdec: unchanged; it currently still only outputs SSA-lines packets. - lavf/mkv: the demuxer can now output ASS packets without the need of any "SSA-lines" reconstruction hack. It will become the default at next libavformat bump, and the SSA support will be dropped from the demuxer. The muxer can take ASS packets since it's muxed normally, and still supports the old SSA packets. All the SSA support and hacks in Matroska code will be dropped at next lavf bump. [1]: http://www.matroska.org/technical/specs/subtitles/ssa.html 2013-01-03 02:06:43 +00:00			`return avpkt->size;`
compute rects duration in ASS decoder Signed-off-by: Aurelien Jacobs <aurel@gnuage.org> 2010-12-16 08:16:33 +00:00			`}`

Kill timed SSA 2014-09-14 18:55:36 +00:00			`#if CONFIG_SSA_DECODER`
upgrade ffmpeg from 4.4.1 to 5.1.4 Signed-off-by: cyberbox <468042667@qq.com> Change-Id: I63cc2a8c9ff6197c67d6b6b47c124882ad942a22 2024-04-25 09:18:18 +00:00			`const FFCodec ff_ssa_decoder = {`
			`.p.name = "ssa",`
			`.p.long_name = NULL_IF_CONFIG_SMALL("ASS (Advanced SubStation Alpha) subtitle"),`
			`.p.type = AVMEDIA_TYPE_SUBTITLE,`
			`.p.id = AV_CODEC_ID_ASS,`
Kill timed SSA 2014-09-14 18:55:36 +00:00			`.init = ass_decode_init,`
upgrade ffmpeg from 4.4.1 to 5.1.4 Signed-off-by: cyberbox <468042667@qq.com> Change-Id: I63cc2a8c9ff6197c67d6b6b47c124882ad942a22 2024-04-25 09:18:18 +00:00			`FF_CODEC_DECODE_SUB_CB(ass_decode_frame),`
			`.caps_internal = FF_CODEC_CAP_INIT_THREADSAFE,`
Kill timed SSA 2014-09-14 18:55:36 +00:00			`};`
			`#endif`

			`#if CONFIG_ASS_DECODER`
upgrade ffmpeg from 4.4.1 to 5.1.4 Signed-off-by: cyberbox <468042667@qq.com> Change-Id: I63cc2a8c9ff6197c67d6b6b47c124882ad942a22 2024-04-25 09:18:18 +00:00			`const FFCodec ff_ass_decoder = {`
			`.p.name = "ass",`
			`.p.long_name = NULL_IF_CONFIG_SMALL("ASS (Advanced SubStation Alpha) subtitle"),`
			`.p.type = AVMEDIA_TYPE_SUBTITLE,`
			`.p.id = AV_CODEC_ID_ASS,`
add missing files in previous commit (ASS encoder and decoder) Originally committed as revision 25747 to svn://svn.ffmpeg.org/ffmpeg/trunk 2010-11-13 14:18:59 +00:00			`.init = ass_decode_init,`
upgrade ffmpeg from 4.4.1 to 5.1.4 Signed-off-by: cyberbox <468042667@qq.com> Change-Id: I63cc2a8c9ff6197c67d6b6b47c124882ad942a22 2024-04-25 09:18:18 +00:00			`FF_CODEC_DECODE_SUB_CB(ass_decode_frame),`
			`.caps_internal = FF_CODEC_CAP_INIT_THREADSAFE,`
add missing files in previous commit (ASS encoder and decoder) Originally committed as revision 25747 to svn://svn.ffmpeg.org/ffmpeg/trunk 2010-11-13 14:18:59 +00:00			`};`
subtitles: introduce ASS codec id and use it. Currently, we have a AV_CODEC_ID_SSA, which matches the way the ASS/SSA markup is muxed in a standalone .ass/.ssa file. This means the AVPacket data starts with a "Dialogue:" string, followed by a timing information (start and end of the event as string) and a trailing CRLF after each line. One packet can contain several lines. We'll refer to this layout as "SSA" or "SSA lines". In matroska, this markup is not stored as such: it has no "Dialogue:" prefix, it contains a ReadOrder field, the timing information is not in the payload, and it doesn't contain the trailing CRLF. See [1] for more info. We'll refer to this layout as "ASS". Since we have only one common codec for both formats, the matroska demuxer is constructing an AVPacket following the "SSA lines" format. This causes several problems, so it was decided to change this into clean ASS packets. Some insight about what is changed or unchanged in this commit: CODECS ------ - the decoding process still writes "SSA lines" markup inside the ass fields of the subtitles rectangles (sub->rects[n]->ass), which is still the current common way of representing decoded subtitles markup. It is meant to change later. - new ASS codec id: AV_CODEC_ID_ASS (which is different from the legacy AV_CODEC_ID_SSA) - lavc/assdec: the "ass" decoder is renamed into "ssa" (instead of "ass") for consistency with the codec id and allows to add a real ass decoder. This ass decoder receives clean ASS lines (so it starts with a ReadOrder, is followed by the Layer, etc). We make sure this is decoded properly in a new ass-line rectangle of the decoded subtitles (the ssa decoder OTOH is doing a simple straightforward copy). Using the packet timing instead of data string makes sure the ass-line now contains the appropriate timing. - lavc/assenc: just like the ass decoder, the "ssa" encoder is renamed into "ssa" (instead of "ass") for consistency with the codec id, and allows to add a real "ass" encoder. One important thing about this encoder is that it only supports one ass rectangle: we could have put several dialogue events in the AVPacket (separated by a \0 for instance) but this would have cause trouble for the muxer which needs not only the start time, but also the duration: typically, you have merged events with the same start time (stored in the AVPacket->pts) but a different duration. At the moment, only the matroska do the merge with the SSA-line codec. We will need to make sure all the decoders in the future can't add more than one rectangle (and only one Dialogue line in it obviously). FORMATS ------- - lavf/assenc: the .ass/.ssa muxer can take both SSA and ASS packets. In the case of ASS packets as input, it adds the timing based on the AVPacket pts and duration, and mux it with "Dialogue:", trailing CRLF, etc. - lavf/assdec: unchanged; it currently still only outputs SSA-lines packets. - lavf/mkv: the demuxer can now output ASS packets without the need of any "SSA-lines" reconstruction hack. It will become the default at next libavformat bump, and the SSA support will be dropped from the demuxer. The muxer can take ASS packets since it's muxed normally, and still supports the old SSA packets. All the SSA support and hacks in Matroska code will be dropped at next lavf bump. [1]: http://www.matroska.org/technical/specs/subtitles/ssa.html 2013-01-03 02:06:43 +00:00			`#endif`