diff --git a/dom/media/DecoderTraits.cpp b/dom/media/DecoderTraits.cpp index 231de5d4de43..f607204a3b2d 100644 --- a/dom/media/DecoderTraits.cpp +++ b/dom/media/DecoderTraits.cpp @@ -18,6 +18,7 @@ #endif #ifdef MOZ_WEBM #include "WebMDecoder.h" +#include "WebMReader.h" #include "WebMDemuxer.h" #endif #ifdef MOZ_RAW @@ -676,8 +677,9 @@ MediaDecoderReader* DecoderTraits::CreateReader(const nsACString& aType, Abstrac #endif #ifdef MOZ_WEBM if (IsWebMSupportedType(aType)) { - decoderReader = - new MediaFormatReader(aDecoder, new WebMDemuxer(aDecoder->GetResource())); + decoderReader = Preferences::GetBool("media.format-reader.webm", true) ? + static_cast(new MediaFormatReader(aDecoder, new WebMDemuxer(aDecoder->GetResource()))) : + new WebMReader(aDecoder); } else #endif #ifdef MOZ_DIRECTSHOW diff --git a/dom/media/webm/AudioDecoder.cpp b/dom/media/webm/AudioDecoder.cpp new file mode 100644 index 000000000000..cd2bd0342318 --- /dev/null +++ b/dom/media/webm/AudioDecoder.cpp @@ -0,0 +1,472 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "WebMReader.h" + +#ifdef MOZ_TREMOR +#include "tremor/ivorbiscodec.h" +#else +#include "vorbis/codec.h" +#endif + +#include "OpusParser.h" + +#include "VorbisUtils.h" +#include "OggReader.h" + +#undef LOG + +#ifdef PR_LOGGING +#include "prprf.h" +#define LOG(type, msg) MOZ_LOG(gMediaDecoderLog, type, msg) +#else +#define LOG(type, msg) +#endif + +namespace mozilla { + +extern LazyLogModule gMediaDecoderLog; + +ogg_packet InitOggPacket(const unsigned char* aData, size_t aLength, + bool aBOS, bool aEOS, + int64_t aGranulepos, int64_t aPacketNo) +{ + ogg_packet packet; + packet.packet = const_cast(aData); + packet.bytes = aLength; + packet.b_o_s = aBOS; + packet.e_o_s = aEOS; + packet.granulepos = aGranulepos; + packet.packetno = aPacketNo; + return packet; +} + +class VorbisDecoder : public WebMAudioDecoder +{ +public: + nsresult Init() override; + void Shutdown() override; + nsresult ResetDecode() override; + nsresult DecodeHeader(const unsigned char* aData, size_t aLength) override; + nsresult FinishInit(AudioInfo& aInfo) override; + bool Decode(const unsigned char* aData, size_t aLength, + int64_t aOffset, uint64_t aTstampUsecs, + int64_t aDiscardPadding, int32_t* aTotalFrames) override; + explicit VorbisDecoder(WebMReader* aReader); + ~VorbisDecoder(); +private: + RefPtr mReader; + + // Vorbis decoder state + vorbis_info mVorbisInfo; + vorbis_comment mVorbisComment; + vorbis_dsp_state mVorbisDsp; + vorbis_block mVorbisBlock; + int64_t mPacketCount; +}; + +VorbisDecoder::VorbisDecoder(WebMReader* aReader) + : mReader(aReader) + , mPacketCount(0) +{ + // Zero these member vars to avoid crashes in Vorbis clear functions when + // destructor is called before |Init|. + PodZero(&mVorbisBlock); + PodZero(&mVorbisDsp); + PodZero(&mVorbisInfo); + PodZero(&mVorbisComment); +} + +VorbisDecoder::~VorbisDecoder() +{ + vorbis_block_clear(&mVorbisBlock); + vorbis_dsp_clear(&mVorbisDsp); + vorbis_info_clear(&mVorbisInfo); + vorbis_comment_clear(&mVorbisComment); +} + +void +VorbisDecoder::Shutdown() +{ + mReader = nullptr; +} + +nsresult +VorbisDecoder::Init() +{ + vorbis_info_init(&mVorbisInfo); + vorbis_comment_init(&mVorbisComment); + PodZero(&mVorbisDsp); + PodZero(&mVorbisBlock); + return NS_OK; +} + +nsresult +VorbisDecoder::ResetDecode() +{ + // Ignore failed results from vorbis_synthesis_restart. They + // aren't fatal and it fails when ResetDecode is called at a + // time when no vorbis data has been read. + vorbis_synthesis_restart(&mVorbisDsp); + return NS_OK; +} + +nsresult +VorbisDecoder::DecodeHeader(const unsigned char* aData, size_t aLength) +{ + bool bos = mPacketCount == 0; + ogg_packet pkt = InitOggPacket(aData, aLength, bos, false, 0, mPacketCount++); + MOZ_ASSERT(mPacketCount <= 3); + + int r = vorbis_synthesis_headerin(&mVorbisInfo, + &mVorbisComment, + &pkt); + return r == 0 ? NS_OK : NS_ERROR_FAILURE; +} + +nsresult +VorbisDecoder::FinishInit(AudioInfo& aInfo) +{ + MOZ_ASSERT(mPacketCount == 3); + + int r = vorbis_synthesis_init(&mVorbisDsp, &mVorbisInfo); + if (r) { + return NS_ERROR_FAILURE; + } + + r = vorbis_block_init(&mVorbisDsp, &mVorbisBlock); + if (r) { + return NS_ERROR_FAILURE; + } + + aInfo.mRate = mVorbisDsp.vi->rate; + aInfo.mChannels = mVorbisDsp.vi->channels; + + return NS_OK; +} + +bool +VorbisDecoder::Decode(const unsigned char* aData, size_t aLength, + int64_t aOffset, uint64_t aTstampUsecs, + int64_t aDiscardPadding, int32_t* aTotalFrames) +{ + MOZ_ASSERT(mPacketCount >= 3); + ogg_packet pkt = InitOggPacket(aData, aLength, false, false, -1, mPacketCount++); + bool first_packet = mPacketCount == 4; + + if (vorbis_synthesis(&mVorbisBlock, &pkt)) { + return false; + } + + if (vorbis_synthesis_blockin(&mVorbisDsp, + &mVorbisBlock)) { + return false; + } + + VorbisPCMValue** pcm = 0; + int32_t frames = vorbis_synthesis_pcmout(&mVorbisDsp, &pcm); + // If the first packet of audio in the media produces no data, we + // still need to produce an AudioData for it so that the correct media + // start time is calculated. Otherwise we'd end up with a media start + // time derived from the timecode of the first packet that produced + // data. + if (frames == 0 && first_packet) { + mReader->AudioQueue().Push(new AudioData(aOffset, aTstampUsecs, 0, 0, nullptr, + mVorbisDsp.vi->channels, + mVorbisDsp.vi->rate)); + } + while (frames > 0) { + uint32_t channels = mVorbisDsp.vi->channels; + auto buffer = MakeUnique(frames*channels); + for (uint32_t j = 0; j < channels; ++j) { + VorbisPCMValue* channel = pcm[j]; + for (uint32_t i = 0; i < uint32_t(frames); ++i) { + buffer[i*channels + j] = MOZ_CONVERT_VORBIS_SAMPLE(channel[i]); + } + } + + CheckedInt64 duration = FramesToUsecs(frames, mVorbisDsp.vi->rate); + if (!duration.isValid()) { + NS_WARNING("Int overflow converting WebM audio duration"); + return false; + } + CheckedInt64 total_duration = FramesToUsecs(*aTotalFrames, + mVorbisDsp.vi->rate); + if (!total_duration.isValid()) { + NS_WARNING("Int overflow converting WebM audio total_duration"); + return false; + } + + CheckedInt64 time = total_duration + aTstampUsecs; + if (!time.isValid()) { + NS_WARNING("Int overflow adding total_duration and aTstampUsecs"); + return false; + }; + + *aTotalFrames += frames; + mReader->AudioQueue().Push(new AudioData(aOffset, + time.value(), + duration.value(), + frames, + Move(buffer), + mVorbisDsp.vi->channels, + mVorbisDsp.vi->rate)); + if (vorbis_synthesis_read(&mVorbisDsp, frames)) { + return false; + } + + frames = vorbis_synthesis_pcmout(&mVorbisDsp, &pcm); + } + + return true; +} + +// ------------------------------------------------------------------------ + +class OpusDecoder : public WebMAudioDecoder +{ +public: + nsresult Init() override; + void Shutdown() override; + nsresult ResetDecode() override; + nsresult DecodeHeader(const unsigned char* aData, size_t aLength) override; + nsresult FinishInit(AudioInfo& aInfo) override; + bool Decode(const unsigned char* aData, size_t aLength, + int64_t aOffset, uint64_t aTstampUsecs, + int64_t aDiscardPadding, int32_t* aTotalFrames) override; + explicit OpusDecoder(WebMReader* aReader); + ~OpusDecoder(); +private: + RefPtr mReader; + + // Opus decoder state + nsAutoPtr mOpusParser; + OpusMSDecoder* mOpusDecoder; + uint16_t mSkip; // Samples left to trim before playback. + bool mDecodedHeader; + + // Opus padding should only be discarded on the final packet. Once this + // is set to true, if the reader attempts to decode any further packets it + // will raise an error so we can indicate that the file is invalid. + bool mPaddingDiscarded; +}; + +OpusDecoder::OpusDecoder(WebMReader* aReader) + : mReader(aReader) + , mOpusDecoder(nullptr) + , mSkip(0) + , mDecodedHeader(false) + , mPaddingDiscarded(false) +{ +} + +OpusDecoder::~OpusDecoder() +{ + if (mOpusDecoder) { + opus_multistream_decoder_destroy(mOpusDecoder); + mOpusDecoder = nullptr; + } +} + +void +OpusDecoder::Shutdown() +{ + mReader = nullptr; +} + +nsresult +OpusDecoder::Init() +{ + return NS_OK; +} + +nsresult +OpusDecoder::ResetDecode() +{ + if (mOpusDecoder) { + // Reset the decoder. + opus_multistream_decoder_ctl(mOpusDecoder, OPUS_RESET_STATE); + mSkip = mOpusParser->mPreSkip; + mPaddingDiscarded = false; + } + return NS_OK; +} + +nsresult +OpusDecoder::DecodeHeader(const unsigned char* aData, size_t aLength) +{ + MOZ_ASSERT(!mOpusParser); + MOZ_ASSERT(!mOpusDecoder); + MOZ_ASSERT(!mDecodedHeader); + mDecodedHeader = true; + + mOpusParser = new OpusParser; + if (!mOpusParser->DecodeHeader(const_cast(aData), aLength)) { + return NS_ERROR_FAILURE; + } + + return NS_OK; +} + +nsresult +OpusDecoder::FinishInit(AudioInfo& aInfo) +{ + MOZ_ASSERT(mDecodedHeader); + + int r; + mOpusDecoder = opus_multistream_decoder_create(mOpusParser->mRate, + mOpusParser->mChannels, + mOpusParser->mStreams, + mOpusParser->mCoupledStreams, + mOpusParser->mMappingTable, + &r); + mSkip = mOpusParser->mPreSkip; + mPaddingDiscarded = false; + + if (int64_t(mReader->GetCodecDelay()) != FramesToUsecs(mOpusParser->mPreSkip, + mOpusParser->mRate).value()) { + LOG(LogLevel::Warning, + ("Invalid Opus header: CodecDelay and pre-skip do not match!")); + return NS_ERROR_FAILURE; + } + + aInfo.mRate = mOpusParser->mRate; + aInfo.mChannels = mOpusParser->mChannels; + + return r == OPUS_OK ? NS_OK : NS_ERROR_FAILURE; +} + +bool +OpusDecoder::Decode(const unsigned char* aData, size_t aLength, + int64_t aOffset, uint64_t aTstampUsecs, + int64_t aDiscardPadding, int32_t* aTotalFrames) +{ + uint32_t channels = mOpusParser->mChannels; + // No channel mapping for more than 8 channels. + if (channels > 8) { + return false; + } + + if (mPaddingDiscarded) { + // Discard padding should be used only on the final packet, so + // decoding after a padding discard is invalid. + LOG(LogLevel::Debug, ("Opus error, discard padding on interstitial packet")); + return false; + } + + // Maximum value is 63*2880, so there's no chance of overflow. + int32_t frames_number = opus_packet_get_nb_frames(aData, aLength); + if (frames_number <= 0) { + return false; // Invalid packet header. + } + + int32_t samples = + opus_packet_get_samples_per_frame(aData, opus_int32(mOpusParser->mRate)); + + // A valid Opus packet must be between 2.5 and 120 ms long (48kHz). + int32_t frames = frames_number*samples; + if (frames < 120 || frames > 5760) + return false; + + auto buffer = MakeUnique(frames * channels); + + // Decode to the appropriate sample type. +#ifdef MOZ_SAMPLE_TYPE_FLOAT32 + int ret = opus_multistream_decode_float(mOpusDecoder, + aData, aLength, + buffer.get(), frames, false); +#else + int ret = opus_multistream_decode(mOpusDecoder, + aData, aLength, + buffer.get(), frames, false); +#endif + if (ret < 0) + return false; + NS_ASSERTION(ret == frames, "Opus decoded too few audio samples"); + CheckedInt64 startTime = aTstampUsecs; + + // Trim the initial frames while the decoder is settling. + if (mSkip > 0) { + int32_t skipFrames = std::min(mSkip, frames); + int32_t keepFrames = frames - skipFrames; + LOG(LogLevel::Debug, ("Opus decoder skipping %d of %d frames", + skipFrames, frames)); + PodMove(buffer.get(), + buffer.get() + skipFrames * channels, + keepFrames * channels); + startTime = startTime + FramesToUsecs(skipFrames, mOpusParser->mRate); + frames = keepFrames; + mSkip -= skipFrames; + } + + if (aDiscardPadding < 0) { + // Negative discard padding is invalid. + LOG(LogLevel::Debug, ("Opus error, negative discard padding")); + return false; + } + if (aDiscardPadding > 0) { + CheckedInt64 discardFrames = UsecsToFrames(aDiscardPadding / NS_PER_USEC, + mOpusParser->mRate); + if (!discardFrames.isValid()) { + NS_WARNING("Int overflow in DiscardPadding"); + return false; + } + if (discardFrames.value() > frames) { + // Discarding more than the entire packet is invalid. + LOG(LogLevel::Debug, ("Opus error, discard padding larger than packet")); + return false; + } + LOG(LogLevel::Debug, ("Opus decoder discarding %d of %d frames", + int32_t(discardFrames.value()), frames)); + // Padding discard is only supposed to happen on the final packet. + // Record the discard so we can return an error if another packet is + // decoded. + mPaddingDiscarded = true; + int32_t keepFrames = frames - discardFrames.value(); + frames = keepFrames; + } + + // Apply the header gain if one was specified. +#ifdef MOZ_SAMPLE_TYPE_FLOAT32 + if (mOpusParser->mGain != 1.0f) { + float gain = mOpusParser->mGain; + int samples = frames * channels; + for (int i = 0; i < samples; i++) { + buffer[i] *= gain; + } + } +#else + if (mOpusParser->mGain_Q16 != 65536) { + int64_t gain_Q16 = mOpusParser->mGain_Q16; + int samples = frames * channels; + for (int i = 0; i < samples; i++) { + int32_t val = static_cast((gain_Q16*buffer[i] + 32768)>>16); + buffer[i] = static_cast(MOZ_CLIP_TO_15(val)); + } + } +#endif + + CheckedInt64 duration = FramesToUsecs(frames, mOpusParser->mRate); + if (!duration.isValid()) { + NS_WARNING("Int overflow converting WebM audio duration"); + return false; + } + CheckedInt64 time = startTime - mReader->GetCodecDelay(); + if (!time.isValid()) { + NS_WARNING("Int overflow shifting tstamp by codec delay"); + return false; + }; + mReader->AudioQueue().Push(new AudioData(aOffset, + time.value(), + duration.value(), + frames, + Move(buffer), + mOpusParser->mChannels, + mOpusParser->mRate)); + return true; +} + +} // namespace mozilla diff --git a/dom/media/webm/EbmlComposer.cpp b/dom/media/webm/EbmlComposer.cpp index 9a107b76c74d..1bda401e09c3 100644 --- a/dom/media/webm/EbmlComposer.cpp +++ b/dom/media/webm/EbmlComposer.cpp @@ -4,7 +4,6 @@ * You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "EbmlComposer.h" -#include "mozilla/UniquePtr.h" #include "libmkv/EbmlIDs.h" #include "libmkv/EbmlWriter.h" #include "libmkv/WebMElement.h" diff --git a/dom/media/webm/SoftwareWebMVideoDecoder.cpp b/dom/media/webm/SoftwareWebMVideoDecoder.cpp new file mode 100644 index 000000000000..d63e24c73414 --- /dev/null +++ b/dom/media/webm/SoftwareWebMVideoDecoder.cpp @@ -0,0 +1,233 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include "SoftwareWebMVideoDecoder.h" +#include "AbstractMediaDecoder.h" +#include "gfx2DGlue.h" +#include "MediaDecoderStateMachine.h" +#include "MediaResource.h" +#include "nsError.h" +#include "OggReader.h" +#include "TimeUnits.h" +#include "VorbisUtils.h" +#include "WebMBufferedParser.h" +#include "NesteggPacketHolder.h" + +#include + +#define VPX_DONT_DEFINE_STDINT_TYPES +#include "vpx/vp8dx.h" +#include "vpx/vpx_decoder.h" + +namespace mozilla { + +using namespace gfx; +using namespace layers; + +SoftwareWebMVideoDecoder::SoftwareWebMVideoDecoder(WebMReader* aReader) + : WebMVideoDecoder(), + mReader(aReader) +{ + MOZ_COUNT_CTOR(SoftwareWebMVideoDecoder); + PodZero(&mVPX); +} + +SoftwareWebMVideoDecoder::~SoftwareWebMVideoDecoder() +{ + MOZ_COUNT_DTOR(SoftwareWebMVideoDecoder); +} + +void +SoftwareWebMVideoDecoder::Shutdown() +{ + vpx_codec_destroy(&mVPX); + mReader = nullptr; +} + +/* static */ +WebMVideoDecoder* +SoftwareWebMVideoDecoder::Create(WebMReader* aReader) +{ + return new SoftwareWebMVideoDecoder(aReader); +} + +nsresult +SoftwareWebMVideoDecoder::Init(unsigned int aWidth, unsigned int aHeight) +{ + return InitDecoder(aWidth, aHeight); +} + +nsresult +SoftwareWebMVideoDecoder::InitDecoder(unsigned int aWidth, unsigned int aHeight) +{ + vpx_codec_iface_t* dx = nullptr; + switch(mReader->GetVideoCodec()) { + case NESTEGG_CODEC_VP8: + dx = vpx_codec_vp8_dx(); + break; + case NESTEGG_CODEC_VP9: + dx = vpx_codec_vp9_dx(); + break; + } + if (!dx || vpx_codec_dec_init(&mVPX, dx, nullptr, 0)) { + return NS_ERROR_FAILURE; + } + return NS_OK; +} + +bool +SoftwareWebMVideoDecoder::DecodeVideoFrame(bool &aKeyframeSkip, + int64_t aTimeThreshold) +{ + MOZ_ASSERT(mReader->OnTaskQueue()); + + // Record number of frames decoded and parsed. Automatically update the + // stats counters using the AutoNotifyDecoded stack-based class. + AbstractMediaDecoder::AutoNotifyDecoded a(mReader->GetDecoder()); + + RefPtr holder(mReader->NextPacket(WebMReader::VIDEO)); + if (!holder) { + return false; + } + + nestegg_packet* packet = holder->Packet(); + unsigned int track = 0; + int r = nestegg_packet_track(packet, &track); + if (r == -1) { + return false; + } + + unsigned int count = 0; + r = nestegg_packet_count(packet, &count); + if (r == -1) { + return false; + } + + if (count > 1) { + NS_WARNING("Packet contains more than one video frame"); + return false; + } + + int64_t tstamp = holder->Timestamp(); + + // The end time of this frame is the start time of the next frame. Fetch + // the timestamp of the next packet for this track. If we've reached the + // end of the resource, use the file's duration as the end time of this + // video frame. + int64_t next_tstamp = 0; + RefPtr next_holder(mReader->NextPacket(WebMReader::VIDEO)); + if (next_holder) { + next_tstamp = next_holder->Timestamp(); + mReader->PushVideoPacket(next_holder); + } else { + next_tstamp = tstamp; + next_tstamp += tstamp - mReader->GetLastVideoFrameTime(); + } + mReader->SetLastVideoFrameTime(tstamp); + + unsigned char* data; + size_t length; + r = nestegg_packet_data(packet, 0, &data, &length); + if (r == -1) { + return false; + } + + vpx_codec_stream_info_t si; + PodZero(&si); + si.sz = sizeof(si); + if (mReader->GetVideoCodec() == NESTEGG_CODEC_VP8) { + vpx_codec_peek_stream_info(vpx_codec_vp8_dx(), data, length, &si); + } else if (mReader->GetVideoCodec() == NESTEGG_CODEC_VP9) { + vpx_codec_peek_stream_info(vpx_codec_vp9_dx(), data, length, &si); + } + if (aKeyframeSkip && (!si.is_kf || tstamp < aTimeThreshold)) { + // Skipping to next keyframe... + a.mParsed++; + a.mDropped++; + return true; + } + + if (aKeyframeSkip && si.is_kf) { + aKeyframeSkip = false; + } + + if (vpx_codec_decode(&mVPX, data, length, nullptr, 0)) { + return false; + } + + // If the timestamp of the video frame is less than + // the time threshold required then it is not added + // to the video queue and won't be displayed. + if (tstamp < aTimeThreshold) { + a.mParsed++; + a.mDropped++; + return true; + } + + vpx_codec_iter_t iter = nullptr; + vpx_image_t *img; + + while ((img = vpx_codec_get_frame(&mVPX, &iter))) { + NS_ASSERTION(img->fmt == VPX_IMG_FMT_I420, "WebM image format not I420"); + + // Chroma shifts are rounded down as per the decoding examples in the SDK + VideoData::YCbCrBuffer b; + b.mPlanes[0].mData = img->planes[0]; + b.mPlanes[0].mStride = img->stride[0]; + b.mPlanes[0].mHeight = img->d_h; + b.mPlanes[0].mWidth = img->d_w; + b.mPlanes[0].mOffset = b.mPlanes[0].mSkip = 0; + + b.mPlanes[1].mData = img->planes[1]; + b.mPlanes[1].mStride = img->stride[1]; + b.mPlanes[1].mHeight = (img->d_h + 1) >> img->y_chroma_shift; + b.mPlanes[1].mWidth = (img->d_w + 1) >> img->x_chroma_shift; + b.mPlanes[1].mOffset = b.mPlanes[1].mSkip = 0; + + b.mPlanes[2].mData = img->planes[2]; + b.mPlanes[2].mStride = img->stride[2]; + b.mPlanes[2].mHeight = (img->d_h + 1) >> img->y_chroma_shift; + b.mPlanes[2].mWidth = (img->d_w + 1) >> img->x_chroma_shift; + b.mPlanes[2].mOffset = b.mPlanes[2].mSkip = 0; + + nsIntRect pictureRect = mReader->GetPicture(); + IntRect picture = pictureRect; + nsIntSize initFrame = mReader->GetInitialFrame(); + if (img->d_w != static_cast(initFrame.width) || + img->d_h != static_cast(initFrame.height)) { + // Frame size is different from what the container reports. This is + // legal in WebM, and we will preserve the ratio of the crop rectangle + // as it was reported relative to the picture size reported by the + // container. + picture.x = (pictureRect.x * img->d_w) / initFrame.width; + picture.y = (pictureRect.y * img->d_h) / initFrame.height; + picture.width = (img->d_w * pictureRect.width) / initFrame.width; + picture.height = (img->d_h * pictureRect.height) / initFrame.height; + } + + VideoInfo videoInfo = mReader->GetMediaInfo().mVideo; + RefPtr v = VideoData::Create(videoInfo, + mReader->GetDecoder()->GetImageContainer(), + holder->Offset(), + tstamp, + next_tstamp - tstamp, + b, + si.is_kf, + -1, + picture); + if (!v) { + return false; + } + a.mParsed++; + a.mDecoded++; + NS_ASSERTION(a.mDecoded <= a.mParsed, + "Expect only 1 frame per chunk per packet in WebM..."); + mReader->VideoQueue().Push(v); + } + + return true; +} + +} // namespace mozilla diff --git a/dom/media/webm/SoftwareWebMVideoDecoder.h b/dom/media/webm/SoftwareWebMVideoDecoder.h new file mode 100644 index 000000000000..52ff85b625e9 --- /dev/null +++ b/dom/media/webm/SoftwareWebMVideoDecoder.h @@ -0,0 +1,41 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#if !defined(SoftwareWebMVideoDecoder_h_) +#define SoftwareWebMVideoDecoder_h_ + +#include + +#include "WebMReader.h" + +namespace mozilla { + +class SoftwareWebMVideoDecoder : public WebMVideoDecoder +{ +public: + static WebMVideoDecoder* Create(WebMReader* aReader); + + virtual nsresult Init(unsigned int aWidth = 0, + unsigned int aHeight = 0) override; + + virtual bool DecodeVideoFrame(bool &aKeyframeSkip, + int64_t aTimeThreshold) override; + + virtual void Shutdown() override; + + explicit SoftwareWebMVideoDecoder(WebMReader* aReader); + ~SoftwareWebMVideoDecoder(); + +private: + nsresult InitDecoder(unsigned int aWidth, unsigned int aHeight); + RefPtr mReader; + + // VPx decoder state + vpx_codec_ctx_t mVPX; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/webm/WebMDecoder.cpp b/dom/media/webm/WebMDecoder.cpp index 0f00ec8fe0a4..8156f14fda3c 100644 --- a/dom/media/webm/WebMDecoder.cpp +++ b/dom/media/webm/WebMDecoder.cpp @@ -8,6 +8,7 @@ #include "MediaDecoderStateMachine.h" #include "MediaFormatReader.h" #include "WebMDemuxer.h" +#include "WebMReader.h" #include "WebMDecoder.h" #include "VideoUtils.h" @@ -15,8 +16,11 @@ namespace mozilla { MediaDecoderStateMachine* WebMDecoder::CreateStateMachine() { - RefPtr reader = - new MediaFormatReader(this, new WebMDemuxer(GetResource())); + bool useFormatDecoder = + Preferences::GetBool("media.format-reader.webm", true); + RefPtr reader = useFormatDecoder ? + static_cast(new MediaFormatReader(this, new WebMDemuxer(GetResource()), GetVideoFrameContainer())) : + new WebMReader(this); return new MediaDecoderStateMachine(this, reader); } diff --git a/dom/media/webm/WebMDemuxer.cpp b/dom/media/webm/WebMDemuxer.cpp index 4cf3fe57d42a..1d3d6e36cc32 100644 --- a/dom/media/webm/WebMDemuxer.cpp +++ b/dom/media/webm/WebMDemuxer.cpp @@ -34,7 +34,7 @@ namespace mozilla { using namespace gfx; LazyLogModule gWebMDemuxerLog("WebMDemuxer"); -LazyLogModule gNesteggLog("Nestegg"); +extern LazyLogModule gNesteggLog; // How far ahead will we look when searching future keyframe. In microseconds. // This value is based on what appears to be a reasonable value as most webm diff --git a/dom/media/webm/WebMReader.cpp b/dom/media/webm/WebMReader.cpp new file mode 100644 index 000000000000..3897e5cbc089 --- /dev/null +++ b/dom/media/webm/WebMReader.cpp @@ -0,0 +1,828 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include "nsError.h" +#include "MediaDecoderStateMachine.h" +#include "AbstractMediaDecoder.h" +#include "SoftwareWebMVideoDecoder.h" +#include "nsContentUtils.h" +#include "WebMReader.h" +#include "WebMBufferedParser.h" +#include "gfx2DGlue.h" +#include "Layers.h" +#include "mozilla/Preferences.h" +#include "mozilla/SharedThreadPool.h" + +#include + +#define VPX_DONT_DEFINE_STDINT_TYPES +#include "vpx/vp8dx.h" +#include "vpx/vpx_decoder.h" + +// Un-comment to enable logging of seek bisections. +//#define SEEK_LOGGING + +#undef LOG + +#include "prprf.h" +#define LOG(type, msg) MOZ_LOG(gMediaDecoderLog, type, msg) +#ifdef SEEK_LOGGING +#define SEEK_LOG(type, msg) MOZ_LOG(gMediaDecoderLog, type, msg) +#else +#define SEEK_LOG(type, msg) +#endif + +namespace mozilla { + +using namespace gfx; +using namespace layers; +using namespace media; + +extern LazyLogModule gMediaDecoderLog; +LazyLogModule gNesteggLog("Nestegg"); + +// Functions for reading and seeking using MediaResource required for +// nestegg_io. The 'user data' passed to these functions is the +// decoder from which the media resource is obtained. +static int webm_read(void *aBuffer, size_t aLength, void *aUserData) +{ + MOZ_ASSERT(aUserData); + MediaResourceIndex* resource = + reinterpret_cast(aUserData); + + nsresult rv = NS_OK; + uint32_t bytes = 0; + + rv = resource->Read(static_cast(aBuffer), aLength, &bytes); + + bool eof = !bytes; + + return NS_FAILED(rv) ? -1 : eof ? 0 : 1; +} + +static int webm_seek(int64_t aOffset, int aWhence, void *aUserData) +{ + MOZ_ASSERT(aUserData); + MediaResourceIndex* resource = + reinterpret_cast(aUserData); + nsresult rv = resource->Seek(aWhence, aOffset); + return NS_SUCCEEDED(rv) ? 0 : -1; +} + +static int64_t webm_tell(void *aUserData) +{ + MOZ_ASSERT(aUserData); + MediaResourceIndex* resource = + reinterpret_cast(aUserData); + return resource->Tell(); +} + +static void webm_log(nestegg * context, + unsigned int severity, + char const * format, ...) +{ + if (!MOZ_LOG_TEST(gNesteggLog, LogLevel::Debug)) { + return; + } + + va_list args; + char msg[256]; + const char * sevStr; + + switch(severity) { + case NESTEGG_LOG_DEBUG: + sevStr = "DBG"; + break; + case NESTEGG_LOG_INFO: + sevStr = "INF"; + break; + case NESTEGG_LOG_WARNING: + sevStr = "WRN"; + break; + case NESTEGG_LOG_ERROR: + sevStr = "ERR"; + break; + case NESTEGG_LOG_CRITICAL: + sevStr = "CRT"; + break; + default: + sevStr = "UNK"; + break; + } + + va_start(args, format); + + PR_snprintf(msg, sizeof(msg), "%p [Nestegg-%s] ", context, sevStr); + PR_vsnprintf(msg+strlen(msg), sizeof(msg)-strlen(msg), format, args); + MOZ_LOG(gNesteggLog, LogLevel::Debug, (msg)); + + va_end(args); +} + +WebMReader::WebMReader(AbstractMediaDecoder* aDecoder) + : MediaDecoderReader(aDecoder) + , mContext(nullptr) + , mVideoTrack(0) + , mAudioTrack(0) + , mAudioStartUsec(-1) + , mAudioFrames(0) + , mSeekPreroll(0) + , mLastVideoFrameTime(0) + , mAudioCodec(-1) + , mVideoCodec(-1) + , mLayersBackendType(layers::LayersBackend::LAYERS_NONE) + , mHasVideo(false) + , mHasAudio(false) + , mResource(aDecoder->GetResource()) +{ + MOZ_COUNT_CTOR(WebMReader); +} + +WebMReader::~WebMReader() +{ + Cleanup(); + mVideoPackets.Reset(); + mAudioPackets.Reset(); + MOZ_ASSERT(!mAudioDecoder); + MOZ_ASSERT(!mVideoDecoder); + MOZ_COUNT_DTOR(WebMReader); +} + +RefPtr +WebMReader::Shutdown() +{ + if (mAudioDecoder) { + mAudioDecoder->Shutdown(); + mAudioDecoder = nullptr; + } + + if (mVideoDecoder) { + mVideoDecoder->Shutdown(); + mVideoDecoder = nullptr; + } + + return MediaDecoderReader::Shutdown(); +} + +nsresult WebMReader::Init() +{ + mBufferedState = new WebMBufferedState; + return NS_OK; +} + +void WebMReader::InitLayersBackendType() +{ + if (!IsVideoContentType(GetDecoder()->GetResource()->GetContentType())) { + // Not playing video, we don't care about the layers backend type. + return; + } + // Extract the layer manager backend type so that platform decoders + // can determine whether it's worthwhile using hardware accelerated + // video decoding. + MediaDecoderOwner* owner = mDecoder->GetOwner(); + if (!owner) { + NS_WARNING("WebMReader without a decoder owner, can't get HWAccel"); + return; + } + + dom::HTMLMediaElement* element = owner->GetMediaElement(); + NS_ENSURE_TRUE_VOID(element); + + RefPtr layerManager = + nsContentUtils::LayerManagerForDocument(element->OwnerDoc()); + NS_ENSURE_TRUE_VOID(layerManager); + + mLayersBackendType = layerManager->GetCompositorBackendType(); +} + +nsresult WebMReader::ResetDecode() +{ + mAudioFrames = 0; + mAudioStartUsec = -1; + nsresult res = NS_OK; + if (NS_FAILED(MediaDecoderReader::ResetDecode())) { + res = NS_ERROR_FAILURE; + } + + if (mAudioDecoder) { + mAudioDecoder->ResetDecode(); + } + + mVideoPackets.Reset(); + mAudioPackets.Reset(); + + return res; +} + +void WebMReader::Cleanup() +{ + if (mContext) { + nestegg_destroy(mContext); + mContext = nullptr; + } +} + +RefPtr +WebMReader::AsyncReadMetadata() +{ + RefPtr metadata = new MetadataHolder(); + + if (NS_FAILED(RetrieveWebMMetadata(&metadata->mInfo)) || + !metadata->mInfo.HasValidMedia()) { + return MetadataPromise::CreateAndReject(ReadMetadataFailureReason::METADATA_ERROR, + __func__); + } + + return MetadataPromise::CreateAndResolve(metadata, __func__); +} + +nsresult +WebMReader::RetrieveWebMMetadata(MediaInfo* aInfo) +{ + MOZ_ASSERT(OnTaskQueue()); + + nestegg_io io; + io.read = webm_read; + io.seek = webm_seek; + io.tell = webm_tell; + io.userdata = &mResource; + int r = nestegg_init(&mContext, io, &webm_log, -1); + if (r == -1) { + return NS_ERROR_FAILURE; + } + + uint64_t duration = 0; + r = nestegg_duration(mContext, &duration); + if (r == 0) { + mInfo.mMetadataDuration.emplace(TimeUnit::FromNanoseconds(duration)); + } + + unsigned int ntracks = 0; + r = nestegg_track_count(mContext, &ntracks); + if (r == -1) { + Cleanup(); + return NS_ERROR_FAILURE; + } + + for (uint32_t track = 0; track < ntracks; ++track) { + int id = nestegg_track_codec_id(mContext, track); + if (id == -1) { + Cleanup(); + return NS_ERROR_FAILURE; + } + int type = nestegg_track_type(mContext, track); + if (!mHasVideo && type == NESTEGG_TRACK_VIDEO && + mDecoder->GetImageContainer()) { + nestegg_video_params params; + r = nestegg_track_video_params(mContext, track, ¶ms); + if (r == -1) { + Cleanup(); + return NS_ERROR_FAILURE; + } + + mVideoCodec = nestegg_track_codec_id(mContext, track); + + if (!mVideoDecoder) { + mVideoDecoder = SoftwareWebMVideoDecoder::Create(this); + } + + if (!mVideoDecoder || + NS_FAILED(mVideoDecoder->Init(params.display_width, + params.display_height))) { + Cleanup(); + return NS_ERROR_FAILURE; + } + + // Picture region, taking into account cropping, before scaling + // to the display size. + unsigned int cropH = params.crop_right + params.crop_left; + unsigned int cropV = params.crop_bottom + params.crop_top; + nsIntRect pictureRect(params.crop_left, + params.crop_top, + params.width - cropH, + params.height - cropV); + + // If the cropping data appears invalid then use the frame data + if (pictureRect.width <= 0 || + pictureRect.height <= 0 || + pictureRect.x < 0 || + pictureRect.y < 0) { + pictureRect.x = 0; + pictureRect.y = 0; + pictureRect.width = params.width; + pictureRect.height = params.height; + } + + // Validate the container-reported frame and pictureRect sizes. This + // ensures that our video frame creation code doesn't overflow. + nsIntSize displaySize(params.display_width, params.display_height); + nsIntSize frameSize(params.width, params.height); + if (!IsValidVideoRegion(frameSize, pictureRect, displaySize)) { + // Video track's frame sizes will overflow. Ignore the video track. + continue; + } + + mVideoTrack = track; + mHasVideo = true; + + mInfo.mVideo.mDisplay = displaySize; + mPicture = pictureRect; + mInitialFrame = frameSize; + + switch (params.stereo_mode) { + case NESTEGG_VIDEO_MONO: + mInfo.mVideo.mStereoMode = StereoMode::MONO; + break; + case NESTEGG_VIDEO_STEREO_LEFT_RIGHT: + mInfo.mVideo.mStereoMode = StereoMode::LEFT_RIGHT; + break; + case NESTEGG_VIDEO_STEREO_BOTTOM_TOP: + mInfo.mVideo.mStereoMode = StereoMode::BOTTOM_TOP; + break; + case NESTEGG_VIDEO_STEREO_TOP_BOTTOM: + mInfo.mVideo.mStereoMode = StereoMode::TOP_BOTTOM; + break; + case NESTEGG_VIDEO_STEREO_RIGHT_LEFT: + mInfo.mVideo.mStereoMode = StereoMode::RIGHT_LEFT; + break; + } + } else if (!mHasAudio && type == NESTEGG_TRACK_AUDIO) { + nestegg_audio_params params; + r = nestegg_track_audio_params(mContext, track, ¶ms); + if (r == -1) { + Cleanup(); + return NS_ERROR_FAILURE; + } + + mAudioTrack = track; + mHasAudio = true; + mAudioCodec = nestegg_track_codec_id(mContext, track); + mCodecDelay = params.codec_delay / NS_PER_USEC; + mSeekPreroll = params.seek_preroll; + + if (mAudioCodec == NESTEGG_CODEC_VORBIS) { + mAudioDecoder = new VorbisDecoder(this); + } else if (mAudioCodec == NESTEGG_CODEC_OPUS) { + mAudioDecoder = new OpusDecoder(this); + } else { + Cleanup(); + return NS_ERROR_FAILURE; + } + + if (!mAudioDecoder || NS_FAILED(mAudioDecoder->Init())) { + Cleanup(); + return NS_ERROR_FAILURE; + } + + unsigned int nheaders = 0; + r = nestegg_track_codec_data_count(mContext, track, &nheaders); + if (r == -1) { + Cleanup(); + return NS_ERROR_FAILURE; + } + + for (uint32_t header = 0; header < nheaders; ++header) { + unsigned char* data = 0; + size_t length = 0; + r = nestegg_track_codec_data(mContext, track, header, &data, &length); + if (r == -1) { + Cleanup(); + return NS_ERROR_FAILURE; + } + if (NS_FAILED(mAudioDecoder->DecodeHeader(data, length))) { + Cleanup(); + return NS_ERROR_FAILURE; + } + } + if (NS_FAILED(mAudioDecoder->FinishInit(mInfo.mAudio))) { + Cleanup(); + return NS_ERROR_FAILURE; + } + } + } + + mInfo.mMediaSeekable = nestegg_has_cues(mContext); + + *aInfo = mInfo; + + return NS_OK; +} + +bool WebMReader::DecodeAudioPacket(NesteggPacketHolder* aHolder) +{ + MOZ_ASSERT(OnTaskQueue()); + + int r = 0; + unsigned int count = 0; + r = nestegg_packet_count(aHolder->Packet(), &count); + if (r == -1) { + return false; + } + + int64_t tstamp = aHolder->Timestamp(); + if (mAudioStartUsec == -1) { + // This is the first audio chunk. Assume the start time of our decode + // is the start of this chunk. + mAudioStartUsec = tstamp; + } + // If there's a gap between the start of this audio chunk and the end of + // the previous audio chunk, we need to increment the packet count so that + // the vorbis decode doesn't use data from before the gap to help decode + // from after the gap. + CheckedInt64 tstamp_frames = UsecsToFrames(tstamp, mInfo.mAudio.mRate); + CheckedInt64 decoded_frames = UsecsToFrames(mAudioStartUsec, + mInfo.mAudio.mRate); + if (!tstamp_frames.isValid() || !decoded_frames.isValid()) { + NS_WARNING("Int overflow converting WebM times to frames"); + return false; + } + decoded_frames += mAudioFrames; + if (!decoded_frames.isValid()) { + NS_WARNING("Int overflow adding decoded_frames"); + return false; + } + if (tstamp_frames.value() > decoded_frames.value()) { +#ifdef DEBUG + int64_t gap_frames = tstamp_frames.value() - decoded_frames.value(); + CheckedInt64 usecs = FramesToUsecs(gap_frames, mInfo.mAudio.mRate); + LOG(LogLevel::Debug, ("WebMReader detected gap of %lld, %lld frames, in audio", + usecs.isValid() ? usecs.value() : -1, + gap_frames)); +#endif + mAudioStartUsec = tstamp; + mAudioFrames = 0; + } + + int32_t total_frames = 0; + for (uint32_t i = 0; i < count; ++i) { + unsigned char* data; + size_t length; + r = nestegg_packet_data(aHolder->Packet(), i, &data, &length); + if (r == -1) { + return false; + } + int64_t discardPadding = 0; + (void) nestegg_packet_discard_padding(aHolder->Packet(), &discardPadding); + + if (!mAudioDecoder->Decode(data, length, aHolder->Offset(), tstamp, discardPadding, &total_frames)) { + mHitAudioDecodeError = true; + return false; + } + } + + mAudioFrames += total_frames; + + return true; +} + +RefPtr WebMReader::NextPacket(TrackType aTrackType) +{ + // The packet queue that packets will be pushed on if they + // are not the type we are interested in. + WebMPacketQueue& otherPackets = + aTrackType == VIDEO ? mAudioPackets : mVideoPackets; + + // The packet queue for the type that we are interested in. + WebMPacketQueue &packets = + aTrackType == VIDEO ? mVideoPackets : mAudioPackets; + + // Flag to indicate that we do need to playback these types of + // packets. + bool hasType = aTrackType == VIDEO ? mHasVideo : mHasAudio; + + // Flag to indicate that we do need to playback the other type + // of track. + bool hasOtherType = aTrackType == VIDEO ? mHasAudio : mHasVideo; + + // Track we are interested in + uint32_t ourTrack = aTrackType == VIDEO ? mVideoTrack : mAudioTrack; + + // Value of other track + uint32_t otherTrack = aTrackType == VIDEO ? mAudioTrack : mVideoTrack; + + if (packets.GetSize() > 0) { + return packets.PopFront(); + } + + do { + RefPtr holder = DemuxPacket(); + if (!holder) { + return nullptr; + } + + if (hasOtherType && otherTrack == holder->Track()) { + // Save the packet for when we want these packets + otherPackets.Push(holder); + continue; + } + + // The packet is for the track we want to play + if (hasType && ourTrack == holder->Track()) { + return holder; + } + } while (true); +} + +RefPtr +WebMReader::DemuxPacket() +{ + nestegg_packet* packet; + int r = nestegg_read_packet(mContext, &packet); + if (r <= 0) { + return nullptr; + } + + unsigned int track = 0; + r = nestegg_packet_track(packet, &track); + if (r == -1) { + return nullptr; + } + + // Figure out if this is a keyframe. + bool isKeyframe = false; + if (track == mAudioTrack) { + isKeyframe = true; + } else if (track == mVideoTrack) { + unsigned char* data; + size_t length; + r = nestegg_packet_data(packet, 0, &data, &length); + if (r == -1) { + return nullptr; + } + vpx_codec_stream_info_t si; + memset(&si, 0, sizeof(si)); + si.sz = sizeof(si); + if (mVideoCodec == NESTEGG_CODEC_VP8) { + vpx_codec_peek_stream_info(vpx_codec_vp8_dx(), data, length, &si); + } else if (mVideoCodec == NESTEGG_CODEC_VP9) { + vpx_codec_peek_stream_info(vpx_codec_vp9_dx(), data, length, &si); + } + isKeyframe = si.is_kf; + } + + int64_t offset = mResource.Tell(); + RefPtr holder = new NesteggPacketHolder(); + if (!holder->Init(packet, offset, track, isKeyframe)) { + return nullptr; + } + + return holder; +} + +bool WebMReader::DecodeAudioData() +{ + MOZ_ASSERT(OnTaskQueue()); + + RefPtr holder(NextPacket(AUDIO)); + if (!holder) { + return false; + } + + return DecodeAudioPacket(holder); +} + +bool WebMReader::FilterPacketByTime(int64_t aEndTime, WebMPacketQueue& aOutput) +{ + // Push the video frames to the aOutput which's timestamp is less + // than aEndTime. + while (true) { + RefPtr holder(NextPacket(VIDEO)); + if (!holder) { + break; + } + int64_t tstamp = holder->Timestamp(); + if (tstamp >= aEndTime) { + PushVideoPacket(holder); + return true; + } else { + aOutput.PushFront(holder); + } + } + + return false; +} + +int64_t WebMReader::GetNextKeyframeTime(int64_t aTimeThreshold) +{ + WebMPacketQueue skipPacketQueue; + if (!FilterPacketByTime(aTimeThreshold, skipPacketQueue)) { + // Restore the packets before we return -1. + uint32_t size = skipPacketQueue.GetSize(); + for (uint32_t i = 0; i < size; ++i) { + RefPtr packetHolder = skipPacketQueue.PopFront(); + PushVideoPacket(packetHolder); + } + return -1; + } + + // Find keyframe. + bool foundKeyframe = false; + int64_t keyframeTime = -1; + while (!foundKeyframe) { + RefPtr holder(NextPacket(VIDEO)); + if (!holder) { + break; + } + + if (holder->IsKeyframe()) { + foundKeyframe = true; + keyframeTime = holder->Timestamp(); + } + + skipPacketQueue.PushFront(holder); + } + + uint32_t size = skipPacketQueue.GetSize(); + for (uint32_t i = 0; i < size; ++i) { + RefPtr packetHolder = skipPacketQueue.PopFront(); + PushVideoPacket(packetHolder); + } + + return keyframeTime; +} + +bool WebMReader::ShouldSkipVideoFrame(int64_t aTimeThreshold) +{ + return GetNextKeyframeTime(aTimeThreshold) != -1; +} + +bool WebMReader::DecodeVideoFrame(bool &aKeyframeSkip, int64_t aTimeThreshold) +{ + if (!(aKeyframeSkip && ShouldSkipVideoFrame(aTimeThreshold))) { + LOG(LogLevel::Verbose, ("Reader [%p]: set the aKeyframeSkip to false.",this)); + aKeyframeSkip = false; + } + return mVideoDecoder->DecodeVideoFrame(aKeyframeSkip, aTimeThreshold); +} + +void WebMReader::PushVideoPacket(NesteggPacketHolder* aItem) +{ + mVideoPackets.PushFront(aItem); +} + +RefPtr +WebMReader::Seek(int64_t aTarget, int64_t aEndTime) +{ + nsresult res = SeekInternal(aTarget); + if (NS_FAILED(res)) { + return SeekPromise::CreateAndReject(res, __func__); + } else { + return SeekPromise::CreateAndResolve(aTarget, __func__); + } +} + +nsresult WebMReader::SeekInternal(int64_t aTarget) +{ + MOZ_ASSERT(OnTaskQueue()); + NS_ENSURE_TRUE(HaveStartTime(), NS_ERROR_FAILURE); + if (mVideoDecoder) { + nsresult rv = mVideoDecoder->Flush(); + NS_ENSURE_SUCCESS(rv, rv); + } + + LOG(LogLevel::Debug, ("Reader [%p] for Decoder [%p]: About to seek to %fs", + this, mDecoder, double(aTarget) / USECS_PER_S)); + if (NS_FAILED(ResetDecode())) { + return NS_ERROR_FAILURE; + } + uint32_t trackToSeek = mHasVideo ? mVideoTrack : mAudioTrack; + uint64_t target = aTarget * NS_PER_USEC; + + if (mSeekPreroll) { + uint64_t startTime = uint64_t(StartTime()) * NS_PER_USEC; + if (target < mSeekPreroll || target - mSeekPreroll < startTime) { + target = startTime; + } else { + target -= mSeekPreroll; + } + LOG(LogLevel::Debug, + ("Reader [%p] SeekPreroll: %f StartTime: %f AdjustedTarget: %f", + this, double(mSeekPreroll) / NS_PER_S, + double(startTime) / NS_PER_S, double(target) / NS_PER_S)); + } + int r = nestegg_track_seek(mContext, trackToSeek, target); + if (r != 0) { + LOG(LogLevel::Debug, ("Reader [%p]: track_seek for track %u failed, r=%d", + this, trackToSeek, r)); + + // Try seeking directly based on cluster information in memory. + int64_t offset = 0; + bool rv = mBufferedState->GetOffsetForTime(target, &offset); + if (!rv) { + return NS_ERROR_FAILURE; + } + + r = nestegg_offset_seek(mContext, offset); + LOG(LogLevel::Debug, ("Reader [%p]: attempted offset_seek to %lld r=%d", + this, offset, r)); + if (r != 0) { + return NS_ERROR_FAILURE; + } + } + return NS_OK; +} + +media::TimeIntervals WebMReader::GetBuffered() +{ + MOZ_ASSERT(OnTaskQueue()); + if (!HaveStartTime()) { + return media::TimeIntervals(); + } + AutoPinned resource(mDecoder->GetResource()); + + media::TimeIntervals buffered; + // Special case completely cached files. This also handles local files. + if (mContext && resource->IsDataCachedToEndOfResource(0)) { + uint64_t duration = 0; + if (nestegg_duration(mContext, &duration) == 0) { + buffered += + media::TimeInterval(media::TimeUnit::FromSeconds(0), + media::TimeUnit::FromSeconds(duration / NS_PER_S)); + return buffered; + } + } + + // Either we the file is not fully cached, or we couldn't find a duration in + // the WebM bitstream. + MediaByteRangeSet ranges; + nsresult res = resource->GetCachedRanges(ranges); + NS_ENSURE_SUCCESS(res, media::TimeIntervals::Invalid()); + + for (uint32_t index = 0; index < ranges.Length(); index++) { + uint64_t start, end; + bool rv = mBufferedState->CalculateBufferedForRange(ranges[index].mStart, + ranges[index].mEnd, + &start, &end); + if (rv) { + int64_t startOffset = StartTime() * NS_PER_USEC; + NS_ASSERTION(startOffset >= 0 && uint64_t(startOffset) <= start, + "startOffset negative or larger than start time"); + if (!(startOffset >= 0 && uint64_t(startOffset) <= start)) { + startOffset = 0; + } + double startTime = (start - startOffset) / NS_PER_S; + double endTime = (end - startOffset) / NS_PER_S; + // If this range extends to the end of the file, the true end time + // is the file's duration. + if (mContext && + resource->IsDataCachedToEndOfResource(ranges[index].mStart)) { + uint64_t duration = 0; + if (nestegg_duration(mContext, &duration) == 0) { + endTime = duration / NS_PER_S; + } + } + buffered += media::TimeInterval(media::TimeUnit::FromSeconds(startTime), + media::TimeUnit::FromSeconds(endTime)); + } + } + + return buffered; +} + +void WebMReader::NotifyDataArrivedInternal() +{ + MOZ_ASSERT(OnTaskQueue()); + AutoPinned resource(mDecoder->GetResource()); + MediaByteRangeSet byteRanges; + nsresult rv = resource->GetCachedRanges(byteRanges); + + if (NS_FAILED(rv)) { + return; + } + + for (auto& range : byteRanges) { + RefPtr bytes = + resource->MediaReadAt(range.mStart, range.Length()); + NS_ENSURE_TRUE_VOID(bytes); + mBufferedState->NotifyDataArrived(bytes->Elements(), bytes->Length(), range.mStart); + } +} + +int WebMReader::GetVideoCodec() +{ + return mVideoCodec; +} + +nsIntRect WebMReader::GetPicture() +{ + return mPicture; +} + +nsIntSize WebMReader::GetInitialFrame() +{ + return mInitialFrame; +} + +int64_t WebMReader::GetLastVideoFrameTime() +{ + return mLastVideoFrameTime; +} + +void WebMReader::SetLastVideoFrameTime(int64_t aFrameTime) +{ + mLastVideoFrameTime = aFrameTime; +} + +} // namespace mozilla diff --git a/dom/media/webm/WebMReader.h b/dom/media/webm/WebMReader.h new file mode 100644 index 000000000000..d974e1f7cb6c --- /dev/null +++ b/dom/media/webm/WebMReader.h @@ -0,0 +1,213 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#if !defined(WebMReader_h_) +#define WebMReader_h_ + +#include + +#include "FlushableTaskQueue.h" +#include "MediaDecoderReader.h" +#include "MediaResource.h" +#include "nsAutoRef.h" +#include "nestegg/nestegg.h" + +#define VPX_DONT_DEFINE_STDINT_TYPES +#include "vpx/vpx_codec.h" + +#include "mozilla/layers/LayersTypes.h" + +#include "NesteggPacketHolder.h" + +namespace mozilla { +static const unsigned NS_PER_USEC = 1000; +static const double NS_PER_S = 1e9; + +typedef TrackInfo::TrackType TrackType; + +class WebMBufferedState; +class WebMPacketQueue; + +class WebMReader; + +// Class to handle various video decode paths +class WebMVideoDecoder +{ +public: + virtual nsresult Init(unsigned int aWidth = 0, unsigned int aHeight = 0) = 0; + virtual nsresult Flush() { return NS_OK; } + virtual void Shutdown() = 0; + virtual bool DecodeVideoFrame(bool &aKeyframeSkip, + int64_t aTimeThreshold) = 0; + WebMVideoDecoder() {} + virtual ~WebMVideoDecoder() {} +}; + +// Class to handle various audio decode paths +class WebMAudioDecoder +{ +public: + virtual nsresult Init() = 0; + virtual void Shutdown() = 0; + virtual nsresult ResetDecode() = 0; + virtual nsresult DecodeHeader(const unsigned char* aData, size_t aLength) = 0; + virtual nsresult FinishInit(AudioInfo& aInfo) = 0; + virtual bool Decode(const unsigned char* aData, size_t aLength, + int64_t aOffset, uint64_t aTstampUsecs, + int64_t aDiscardPadding, int32_t* aTotalFrames) = 0; + virtual ~WebMAudioDecoder() {} +}; + +class WebMReader : public MediaDecoderReader +{ +public: + explicit WebMReader(AbstractMediaDecoder* aDecoder); + +protected: + ~WebMReader(); + +public: + // Returns a pointer to the decoder. + AbstractMediaDecoder* GetDecoder() + { + return mDecoder; + } + + MediaInfo GetMediaInfo() { return mInfo; } + + virtual RefPtr Shutdown() override; + virtual nsresult Init() override; + virtual nsresult ResetDecode() override; + virtual bool DecodeAudioData() override; + + virtual bool DecodeVideoFrame(bool &aKeyframeSkip, + int64_t aTimeThreshold) override; + + virtual RefPtr AsyncReadMetadata() override; + + virtual RefPtr + Seek(int64_t aTime, int64_t aEndTime) override; + + virtual media::TimeIntervals GetBuffered() override; + + // Value passed to NextPacket to determine if we are reading a video or an + // audio packet. + enum TrackType { + VIDEO = 0, + AUDIO = 1 + }; + + // Read a packet from the nestegg file. Returns nullptr if all packets for + // the particular track have been read. Pass VIDEO or AUDIO to indicate the + // type of the packet we want to read. + RefPtr NextPacket(TrackType aTrackType); + + // Pushes a packet to the front of the video packet queue. + virtual void PushVideoPacket(NesteggPacketHolder* aItem); + + int GetVideoCodec(); + nsIntRect GetPicture(); + nsIntSize GetInitialFrame(); + int64_t GetLastVideoFrameTime(); + void SetLastVideoFrameTime(int64_t aFrameTime); + layers::LayersBackend GetLayersBackendType() { return mLayersBackendType; } + uint64_t GetCodecDelay() { return mCodecDelay; } + +protected: + virtual void NotifyDataArrivedInternal() override; + + // Decode a nestegg packet of audio data. Push the audio data on the + // audio queue. Returns true when there's more audio to decode, + // false if the audio is finished, end of file has been reached, + // or an un-recoverable read error has occured. The reader's monitor + // must be held during this call. The caller is responsible for freeing + // aPacket. + bool DecodeAudioPacket(NesteggPacketHolder* aHolder); + + // Release context and set to null. Called when an error occurs during + // reading metadata or destruction of the reader itself. + void Cleanup(); + + virtual nsresult SeekInternal(int64_t aTime); + + // Initializes mLayersBackendType if possible. + void InitLayersBackendType(); + + bool ShouldSkipVideoFrame(int64_t aTimeThreshold); + +private: + nsresult RetrieveWebMMetadata(MediaInfo* aInfo); + + // Get the timestamp of keyframe greater than aTimeThreshold. + int64_t GetNextKeyframeTime(int64_t aTimeThreshold); + // Push the packets into aOutput which's timestamp is less than aEndTime. + // Return false if we reach the end of stream or something wrong. + bool FilterPacketByTime(int64_t aEndTime, WebMPacketQueue& aOutput); + + // Internal method that demuxes the next packet from the stream. The caller + // is responsible for making sure it doesn't get lost. + RefPtr DemuxPacket(); + + // libnestegg context for webm container. Access on state machine thread + // or decoder thread only. + nestegg* mContext; + + nsAutoPtr mAudioDecoder; + nsAutoPtr mVideoDecoder; + + // Queue of video and audio packets that have been read but not decoded. These + // must only be accessed from the decode thread. + WebMPacketQueue mVideoPackets; + WebMPacketQueue mAudioPackets; + + // Index of video and audio track to play + uint32_t mVideoTrack; + uint32_t mAudioTrack; + + // Time in microseconds of the start of the first audio frame we've decoded. + int64_t mAudioStartUsec; + + // Number of audio frames we've decoded since decoding began at mAudioStartMs. + uint64_t mAudioFrames; + + // Number of microseconds that must be discarded from the start of the Stream. + uint64_t mCodecDelay; + + // Nanoseconds to discard after seeking. + uint64_t mSeekPreroll; + + // Calculate the frame duration from the last decodeable frame using the + // previous frame's timestamp. In NS. + int64_t mLastVideoFrameTime; + + // Parser state and computed offset-time mappings. Shared by multiple + // readers when decoder has been cloned. Main thread only. + RefPtr mBufferedState; + + // Size of the frame initially present in the stream. The picture region + // is defined as a ratio relative to this. + nsIntSize mInitialFrame; + + // Picture region, as relative to the initial frame size. + nsIntRect mPicture; + + // Codec ID of audio track + int mAudioCodec; + // Codec ID of video track + int mVideoCodec; + + layers::LayersBackend mLayersBackendType; + + // Booleans to indicate if we have audio and/or video data + bool mHasVideo; + bool mHasAudio; + + MediaResourceIndex mResource; + +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/webm/moz.build b/dom/media/webm/moz.build index da11b0d99e50..8dd3e99bf24d 100644 --- a/dom/media/webm/moz.build +++ b/dom/media/webm/moz.build @@ -6,15 +6,20 @@ EXPORTS += [ 'NesteggPacketHolder.h', + 'SoftwareWebMVideoDecoder.h', 'WebMBufferedParser.h', 'WebMDecoder.h', 'WebMDemuxer.h', + 'WebMReader.h', ] UNIFIED_SOURCES += [ + 'AudioDecoder.cpp', + 'SoftwareWebMVideoDecoder.cpp', 'WebMBufferedParser.cpp', 'WebMDecoder.cpp', 'WebMDemuxer.cpp', + 'WebMReader.cpp', ] if CONFIG['MOZ_WEBM_ENCODER']: diff --git a/modules/libpref/init/all.js b/modules/libpref/init/all.js index 946fa39559c2..f3e487ac7194 100644 --- a/modules/libpref/init/all.js +++ b/modules/libpref/init/all.js @@ -502,6 +502,9 @@ pref("media.mediasource.webm.enabled", true); #endif pref("media.mediasource.webm.audio.enabled", true); +// Enable new MediaFormatReader architecture for plain webm. +pref("media.format-reader.webm", true); + #ifdef MOZ_WEBSPEECH pref("media.webspeech.recognition.enable", false); pref("media.webspeech.synth.enabled", false);