gecko-dev/dom/media/mediasink/AudioSink.cpp
Jean-Yves Avenard 4377bd3b8b Bug 1524890 - P11. Remove duration from AudioData construction parameter. r=bryce
It can be determined from the size of the buffer and the number of audio frames. Additionally, it ensures that the duration of the frame is always exactly what the AudioData contains.

Differential Revision: https://phabricator.services.mozilla.com/D20170

--HG--
extra : moz-landing-system : lando
2019-02-22 09:19:47 +00:00

507 lines
17 KiB
C++

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim:set ts=2 sw=2 sts=2 et cindent: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "AudioSink.h"
#include "AudioConverter.h"
#include "MediaQueue.h"
#include "VideoUtils.h"
#include "mozilla/CheckedInt.h"
#include "mozilla/DebugOnly.h"
#include "mozilla/IntegerPrintfMacros.h"
#include "mozilla/StaticPrefs.h"
#include "nsPrintfCString.h"
namespace mozilla {
extern LazyLogModule gMediaDecoderLog;
#define SINK_LOG(msg, ...) \
MOZ_LOG(gMediaDecoderLog, LogLevel::Debug, \
("AudioSink=%p " msg, this, ##__VA_ARGS__))
#define SINK_LOG_V(msg, ...) \
MOZ_LOG(gMediaDecoderLog, LogLevel::Verbose, \
("AudioSink=%p " msg, this, ##__VA_ARGS__))
// The amount of audio frames that is used to fuzz rounding errors.
static const int64_t AUDIO_FUZZ_FRAMES = 1;
// Amount of audio frames we will be processing ahead of use
static const int32_t LOW_AUDIO_USECS = 300000;
using media::TimeUnit;
AudioSink::AudioSink(AbstractThread* aThread,
MediaQueue<AudioData>& aAudioQueue,
const TimeUnit& aStartTime, const AudioInfo& aInfo)
: mStartTime(aStartTime),
mInfo(aInfo),
mPlaying(true),
mMonitor("AudioSink"),
mWritten(0),
mErrored(false),
mPlaybackComplete(false),
mOwnerThread(aThread),
mProcessedQueueLength(0),
mFramesParsed(0),
mIsAudioDataAudible(false),
mAudioQueue(aAudioQueue) {
bool resampling = StaticPrefs::MediaResamplingEnabled();
if (resampling) {
mOutputRate = 48000;
} else if (mInfo.mRate == 44100 || mInfo.mRate == 48000) {
// The original rate is of good quality and we want to minimize unecessary
// resampling. The common scenario being that the sampling rate is one or
// the other, this allows to minimize audio quality regression and hoping
// content provider want change from those rates mid-stream.
mOutputRate = mInfo.mRate;
} else {
// We will resample all data to match cubeb's preferred sampling rate.
mOutputRate = AudioStream::GetPreferredRate();
}
MOZ_DIAGNOSTIC_ASSERT(mOutputRate, "output rate can't be 0.");
mOutputChannels = DecideAudioPlaybackChannels(mInfo);
}
AudioSink::~AudioSink() {}
nsresult AudioSink::Init(const PlaybackParams& aParams,
RefPtr<MediaSink::EndedPromise>& aEndedPromise) {
MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
mAudioQueueListener = mAudioQueue.PushEvent().Connect(
mOwnerThread, this, &AudioSink::OnAudioPushed);
mAudioQueueFinishListener = mAudioQueue.FinishEvent().Connect(
mOwnerThread, this, &AudioSink::NotifyAudioNeeded);
mProcessedQueueListener = mProcessedQueue.PopFrontEvent().Connect(
mOwnerThread, this, &AudioSink::OnAudioPopped);
// To ensure at least one audio packet will be popped from AudioQueue and
// ready to be played.
NotifyAudioNeeded();
aEndedPromise = mEndedPromise.Ensure(__func__);
nsresult rv = InitializeAudioStream(aParams);
if (NS_FAILED(rv)) {
mEndedPromise.Reject(rv, __func__);
}
return rv;
}
TimeUnit AudioSink::GetPosition() {
int64_t tmp;
if (mAudioStream && (tmp = mAudioStream->GetPosition()) >= 0) {
TimeUnit pos = TimeUnit::FromMicroseconds(tmp);
NS_ASSERTION(pos >= mLastGoodPosition,
"AudioStream position shouldn't go backward");
// Update the last good position when we got a good one.
if (pos >= mLastGoodPosition) {
mLastGoodPosition = pos;
}
}
return mStartTime + mLastGoodPosition;
}
bool AudioSink::HasUnplayedFrames() {
// Experimentation suggests that GetPositionInFrames() is zero-indexed,
// so we need to add 1 here before comparing it to mWritten.
int64_t total;
{
MonitorAutoLock mon(mMonitor);
total = mWritten + (mCursor.get() ? mCursor->Available() : 0);
}
return mProcessedQueue.GetSize() ||
(mAudioStream && mAudioStream->GetPositionInFrames() + 1 < total);
}
void AudioSink::Shutdown() {
MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
mAudioQueueListener.Disconnect();
mAudioQueueFinishListener.Disconnect();
mProcessedQueueListener.Disconnect();
if (mAudioStream) {
mAudioStream->Shutdown();
mAudioStream = nullptr;
}
mProcessedQueue.Reset();
mProcessedQueue.Finish();
mEndedPromise.ResolveIfExists(true, __func__);
}
void AudioSink::SetVolume(double aVolume) {
if (mAudioStream) {
mAudioStream->SetVolume(aVolume);
}
}
void AudioSink::SetPlaybackRate(double aPlaybackRate) {
MOZ_ASSERT(aPlaybackRate != 0,
"Don't set the playbackRate to 0 on AudioStream");
if (mAudioStream) {
mAudioStream->SetPlaybackRate(aPlaybackRate);
}
}
void AudioSink::SetPreservesPitch(bool aPreservesPitch) {
if (mAudioStream) {
mAudioStream->SetPreservesPitch(aPreservesPitch);
}
}
void AudioSink::SetPlaying(bool aPlaying) {
if (!mAudioStream || mPlaying == aPlaying || mPlaybackComplete) {
return;
}
// pause/resume AudioStream as necessary.
if (!aPlaying) {
mAudioStream->Pause();
} else if (aPlaying) {
mAudioStream->Resume();
}
mPlaying = aPlaying;
}
nsresult AudioSink::InitializeAudioStream(const PlaybackParams& aParams) {
mAudioStream = new AudioStream(*this);
// When AudioQueue is empty, there is no way to know the channel layout of
// the coming audio data, so we use the predefined channel map instead.
AudioConfig::ChannelLayout::ChannelMap channelMap =
mConverter ? mConverter->OutputConfig().Layout().Map()
: AudioConfig::ChannelLayout(mOutputChannels).Map();
// The layout map used here is already processed by mConverter with
// mOutputChannels into SMPTE format, so there is no need to worry if
// StaticPrefs::accessibility_monoaudio_enable() or
// StaticPrefs::MediaForcestereoEnabled() is applied.
nsresult rv = mAudioStream->Init(mOutputChannels, channelMap, mOutputRate,
aParams.mSink);
if (NS_FAILED(rv)) {
mAudioStream->Shutdown();
mAudioStream = nullptr;
return rv;
}
// Set playback params before calling Start() so they can take effect
// as soon as the 1st DataCallback of the AudioStream fires.
mAudioStream->SetVolume(aParams.mVolume);
mAudioStream->SetPlaybackRate(aParams.mPlaybackRate);
mAudioStream->SetPreservesPitch(aParams.mPreservesPitch);
return mAudioStream->Start();
}
TimeUnit AudioSink::GetEndTime() const {
int64_t written;
{
MonitorAutoLock mon(mMonitor);
written = mWritten;
}
TimeUnit played = FramesToTimeUnit(written, mOutputRate) + mStartTime;
if (!played.IsValid()) {
NS_WARNING("Int overflow calculating audio end time");
return TimeUnit::Zero();
}
// As we may be resampling, rounding errors may occur. Ensure we never get
// past the original end time.
return std::min(mLastEndTime, played);
}
UniquePtr<AudioStream::Chunk> AudioSink::PopFrames(uint32_t aFrames) {
class Chunk : public AudioStream::Chunk {
public:
Chunk(AudioData* aBuffer, uint32_t aFrames, AudioDataValue* aData)
: mBuffer(aBuffer), mFrames(aFrames), mData(aData) {}
Chunk() : mFrames(0), mData(nullptr) {}
const AudioDataValue* Data() const override { return mData; }
uint32_t Frames() const override { return mFrames; }
uint32_t Channels() const override {
return mBuffer ? mBuffer->mChannels : 0;
}
uint32_t Rate() const override { return mBuffer ? mBuffer->mRate : 0; }
AudioDataValue* GetWritable() const override { return mData; }
private:
const RefPtr<AudioData> mBuffer;
const uint32_t mFrames;
AudioDataValue* const mData;
};
bool needPopping = false;
if (!mCurrentData) {
// No data in the queue. Return an empty chunk.
if (!mProcessedQueue.GetSize()) {
return MakeUnique<Chunk>();
}
// We need to update our values prior popping the processed queue in
// order to prevent the pop event to fire too early (prior
// mProcessedQueueLength being updated) or prevent HasUnplayedFrames
// to incorrectly return true during the time interval betweeen the
// when mProcessedQueue is read and mWritten is updated.
needPopping = true;
mCurrentData = mProcessedQueue.PeekFront();
{
MonitorAutoLock mon(mMonitor);
mCursor = MakeUnique<AudioBufferCursor>(mCurrentData->Data(),
mCurrentData->mChannels,
mCurrentData->Frames());
}
MOZ_ASSERT(mCurrentData->Frames() > 0);
mProcessedQueueLength -=
FramesToUsecs(mCurrentData->Frames(), mOutputRate).value();
}
auto framesToPop = std::min(aFrames, mCursor->Available());
SINK_LOG_V("playing audio at time=%" PRId64 " offset=%u length=%u",
mCurrentData->mTime.ToMicroseconds(),
mCurrentData->Frames() - mCursor->Available(), framesToPop);
UniquePtr<AudioStream::Chunk> chunk =
MakeUnique<Chunk>(mCurrentData, framesToPop, mCursor->Ptr());
{
MonitorAutoLock mon(mMonitor);
mWritten += framesToPop;
mCursor->Advance(framesToPop);
}
// All frames are popped. Reset mCurrentData so we can pop new elements from
// the audio queue in next calls to PopFrames().
if (!mCursor->Available()) {
mCurrentData = nullptr;
}
if (needPopping) {
// We can now safely pop the audio packet from the processed queue.
// This will fire the popped event, triggering a call to NotifyAudioNeeded.
RefPtr<AudioData> releaseMe = mProcessedQueue.PopFront();
CheckIsAudible(releaseMe);
}
return chunk;
}
bool AudioSink::Ended() const {
// Return true when error encountered so AudioStream can start draining.
return mProcessedQueue.IsFinished() || mErrored;
}
void AudioSink::Drained() {
SINK_LOG("Drained");
mPlaybackComplete = true;
mEndedPromise.ResolveIfExists(true, __func__);
}
void AudioSink::CheckIsAudible(const AudioData* aData) {
MOZ_ASSERT(aData);
bool isAudible = aData->IsAudible();
if (isAudible != mIsAudioDataAudible) {
mIsAudioDataAudible = isAudible;
mAudibleEvent.Notify(mIsAudioDataAudible);
}
}
void AudioSink::OnAudioPopped(const RefPtr<AudioData>& aSample) {
SINK_LOG_V("AudioStream has used an audio packet.");
NotifyAudioNeeded();
}
void AudioSink::OnAudioPushed(const RefPtr<AudioData>& aSample) {
SINK_LOG_V("One new audio packet available.");
NotifyAudioNeeded();
}
void AudioSink::NotifyAudioNeeded() {
MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn(),
"Not called from the owner's thread");
// Always ensure we have two processed frames pending to allow for processing
// latency.
while (mAudioQueue.GetSize() &&
(mAudioQueue.IsFinished() || mProcessedQueueLength < LOW_AUDIO_USECS ||
mProcessedQueue.GetSize() < 2)) {
RefPtr<AudioData> data = mAudioQueue.PopFront();
// Ignore the element with 0 frames and try next.
if (!data->Frames()) {
continue;
}
if (!mConverter ||
(data->mRate != mConverter->InputConfig().Rate() ||
data->mChannels != mConverter->InputConfig().Channels())) {
SINK_LOG_V("Audio format changed from %u@%uHz to %u@%uHz",
mConverter ? mConverter->InputConfig().Channels() : 0,
mConverter ? mConverter->InputConfig().Rate() : 0,
data->mChannels, data->mRate);
DrainConverter();
// mFramesParsed indicates the current playtime in frames at the current
// input sampling rate. Recalculate it per the new sampling rate.
if (mFramesParsed) {
// We minimize overflow.
uint32_t oldRate = mConverter->InputConfig().Rate();
uint32_t newRate = data->mRate;
CheckedInt64 result = SaferMultDiv(mFramesParsed, newRate, oldRate);
if (!result.isValid()) {
NS_WARNING("Int overflow in AudioSink");
mErrored = true;
return;
}
mFramesParsed = result.value();
}
const AudioConfig::ChannelLayout inputLayout =
data->mChannelMap
? AudioConfig::ChannelLayout::SMPTEDefault(data->mChannelMap)
: AudioConfig::ChannelLayout(data->mChannels);
const AudioConfig::ChannelLayout outputLayout =
mOutputChannels == data->mChannels
? inputLayout
: AudioConfig::ChannelLayout(mOutputChannels);
mConverter = MakeUnique<AudioConverter>(
AudioConfig(inputLayout, data->mChannels, data->mRate),
AudioConfig(outputLayout, mOutputChannels, mOutputRate));
}
// See if there's a gap in the audio. If there is, push silence into the
// audio hardware, so we can play across the gap.
// Calculate the timestamp of the next chunk of audio in numbers of
// samples.
CheckedInt64 sampleTime =
TimeUnitToFrames(data->mTime - mStartTime, data->mRate);
// Calculate the number of frames that have been pushed onto the audio
// hardware.
CheckedInt64 missingFrames = sampleTime - mFramesParsed;
if (!missingFrames.isValid()) {
NS_WARNING("Int overflow in AudioSink");
mErrored = true;
return;
}
if (missingFrames.value() > AUDIO_FUZZ_FRAMES) {
// The next audio packet begins some time after the end of the last packet
// we pushed to the audio hardware. We must push silence into the audio
// hardware so that the next audio packet begins playback at the correct
// time.
missingFrames = std::min<int64_t>(INT32_MAX, missingFrames.value());
mFramesParsed += missingFrames.value();
RefPtr<AudioData> silenceData;
AlignedAudioBuffer silenceBuffer(missingFrames.value() * data->mChannels);
if (!silenceBuffer) {
NS_WARNING("OOM in AudioSink");
mErrored = true;
return;
}
if (mConverter->InputConfig() != mConverter->OutputConfig()) {
AlignedAudioBuffer convertedData =
mConverter->Process(AudioSampleBuffer(std::move(silenceBuffer)))
.Forget();
silenceData = CreateAudioFromBuffer(std::move(convertedData), data);
} else {
silenceData = CreateAudioFromBuffer(std::move(silenceBuffer), data);
}
PushProcessedAudio(silenceData);
}
mLastEndTime = data->GetEndTime();
mFramesParsed += data->Frames();
if (mConverter->InputConfig() != mConverter->OutputConfig()) {
AlignedAudioBuffer buffer(data->MoveableData());
AlignedAudioBuffer convertedData =
mConverter->Process(AudioSampleBuffer(std::move(buffer))).Forget();
data = CreateAudioFromBuffer(std::move(convertedData), data);
}
if (PushProcessedAudio(data)) {
mLastProcessedPacket = Some(data);
}
}
if (mAudioQueue.IsFinished()) {
// We have reached the end of the data, drain the resampler.
DrainConverter();
mProcessedQueue.Finish();
}
}
uint32_t AudioSink::PushProcessedAudio(AudioData* aData) {
if (!aData || !aData->Frames()) {
return 0;
}
mProcessedQueue.Push(aData);
mProcessedQueueLength += FramesToUsecs(aData->Frames(), mOutputRate).value();
return aData->Frames();
}
already_AddRefed<AudioData> AudioSink::CreateAudioFromBuffer(
AlignedAudioBuffer&& aBuffer, AudioData* aReference) {
uint32_t frames = aBuffer.Length() / mOutputChannels;
if (!frames) {
return nullptr;
}
auto duration = FramesToTimeUnit(frames, mOutputRate);
if (!duration.IsValid()) {
NS_WARNING("Int overflow in AudioSink");
mErrored = true;
return nullptr;
}
RefPtr<AudioData> data =
new AudioData(aReference->mOffset, aReference->mTime, std::move(aBuffer),
mOutputChannels, mOutputRate);
MOZ_DIAGNOSTIC_ASSERT(duration == data->mDuration, "must be equal");
return data.forget();
}
uint32_t AudioSink::DrainConverter(uint32_t aMaxFrames) {
MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
if (!mConverter || !mLastProcessedPacket || !aMaxFrames) {
// nothing to drain.
return 0;
}
RefPtr<AudioData> lastPacket = mLastProcessedPacket.ref();
mLastProcessedPacket.reset();
// To drain we simply provide an empty packet to the audio converter.
AlignedAudioBuffer convertedData =
mConverter->Process(AudioSampleBuffer(AlignedAudioBuffer())).Forget();
uint32_t frames = convertedData.Length() / mOutputChannels;
if (!convertedData.SetLength(std::min(frames, aMaxFrames) *
mOutputChannels)) {
// This can never happen as we were reducing the length of convertData.
mErrored = true;
return 0;
}
RefPtr<AudioData> data =
CreateAudioFromBuffer(std::move(convertedData), lastPacket);
if (!data) {
return 0;
}
mProcessedQueue.Push(data);
return data->Frames();
}
nsCString AudioSink::GetDebugInfo() {
MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
return nsPrintfCString(
"AudioSink: StartTime=%" PRId64 " LastGoodPosition=%" PRId64
" Playing=%d OutputRate=%u Written=%" PRId64
" Errored=%d PlaybackComplete=%d",
mStartTime.ToMicroseconds(), mLastGoodPosition.ToMicroseconds(), mPlaying,
mOutputRate, mWritten, bool(mErrored), bool(mPlaybackComplete));
}
} // namespace mozilla