From cf8fbf13e1c2b32a33016917af10c82a1a54ba15 Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Thu, 22 Nov 2012 18:04:27 +1300 Subject: [PATCH] Bug 827537. Refactor AudioChunk to support having separate buffers for each channel. r=jesup --HG-- extra : rebase_source : 0aa26e1c3181d9fe5158520d4b33248bae0fa5d0 --- content/media/AudioSampleFormat.h | 16 ++++- content/media/AudioSegment.cpp | 34 ++++----- content/media/AudioSegment.h | 72 ++++++++++++++----- content/media/MediaDecoderStateMachine.cpp | 8 ++- content/media/SharedBuffer.h | 17 +++-- .../media/webrtc/MediaEngineWebRTCAudio.cpp | 4 +- .../src/mediapipeline/MediaPipeline.cpp | 13 ++-- .../src/peerconnection/PeerConnectionMedia.h | 13 ++-- .../webrtc/signaling/test/FakeMediaStreams.h | 2 +- .../signaling/test/FakeMediaStreamsImpl.h | 12 ++-- 10 files changed, 125 insertions(+), 66 deletions(-) diff --git a/content/media/AudioSampleFormat.h b/content/media/AudioSampleFormat.h index 992561774235..e18b7fb7962b 100644 --- a/content/media/AudioSampleFormat.h +++ b/content/media/AudioSampleFormat.h @@ -139,7 +139,6 @@ ScaleAudioSamples(float* aBuffer, int aCount, float aScale) } } - inline void ScaleAudioSamples(short* aBuffer, int aCount, float aScale) { @@ -149,6 +148,21 @@ ScaleAudioSamples(short* aBuffer, int aCount, float aScale) } } +inline const void* +AddAudioSampleOffset(const void* aBase, AudioSampleFormat aFormat, + int32_t aOffset) +{ + switch (aFormat) { + case AUDIO_FORMAT_FLOAT32: + return static_cast(aBase) + aOffset; + case AUDIO_FORMAT_S16: + return static_cast(aBase) + aOffset; + default: + NS_ERROR("Unknown format"); + return nullptr; + } +} + } // namespace mozilla #endif /* MOZILLA_AUDIOSAMPLEFORMAT_H_ */ diff --git a/content/media/AudioSegment.cpp b/content/media/AudioSegment.cpp index 399409710835..7ee3a636f0b9 100644 --- a/content/media/AudioSegment.cpp +++ b/content/media/AudioSegment.cpp @@ -11,16 +11,15 @@ namespace mozilla { template static void -InterleaveAndConvertBuffer(const SrcT* aSource, int32_t aSourceLength, - int32_t aLength, - float aVolume, +InterleaveAndConvertBuffer(const SrcT** aSourceChannels, + int32_t aLength, float aVolume, int32_t aChannels, DestT* aOutput) { DestT* output = aOutput; for (int32_t i = 0; i < aLength; ++i) { for (int32_t channel = 0; channel < aChannels; ++channel) { - float v = AudioSampleToFloat(aSource[channel*aSourceLength + i])*aVolume; + float v = AudioSampleToFloat(aSourceChannels[channel][i])*aVolume; *output = FloatToAudioSample(v); ++output; } @@ -28,9 +27,8 @@ InterleaveAndConvertBuffer(const SrcT* aSource, int32_t aSourceLength, } static inline void -InterleaveAndConvertBuffer(const int16_t* aSource, int32_t aSourceLength, - int32_t aLength, - float aVolume, +InterleaveAndConvertBuffer(const int16_t** aSourceChannels, + int32_t aLength, float aVolume, int32_t aChannels, int16_t* aOutput) { @@ -39,7 +37,7 @@ InterleaveAndConvertBuffer(const int16_t* aSource, int32_t aSourceLength, int32_t scale = int32_t((1 << 16) * aVolume); for (int32_t i = 0; i < aLength; ++i) { for (int32_t channel = 0; channel < aChannels; ++channel) { - int16_t s = aSource[channel*aSourceLength + i]; + int16_t s = aSourceChannels[channel][i]; *output = int16_t((int32_t(s) * scale) >> 16); ++output; } @@ -49,7 +47,7 @@ InterleaveAndConvertBuffer(const int16_t* aSource, int32_t aSourceLength, for (int32_t i = 0; i < aLength; ++i) { for (int32_t channel = 0; channel < aChannels; ++channel) { - float v = AudioSampleToFloat(aSource[channel*aSourceLength + i])*aVolume; + float v = AudioSampleToFloat(aSourceChannels[channel][i])*aVolume; *output = FloatToAudioSample(v); ++output; } @@ -57,25 +55,22 @@ InterleaveAndConvertBuffer(const int16_t* aSource, int32_t aSourceLength, } static void -InterleaveAndConvertBuffer(const void* aSource, AudioSampleFormat aSourceFormat, - int32_t aSourceLength, - int32_t aOffset, int32_t aLength, - float aVolume, +InterleaveAndConvertBuffer(const void** aSourceChannels, + AudioSampleFormat aSourceFormat, + int32_t aLength, float aVolume, int32_t aChannels, AudioDataValue* aOutput) { switch (aSourceFormat) { case AUDIO_FORMAT_FLOAT32: - InterleaveAndConvertBuffer(static_cast(aSource) + aOffset, - aSourceLength, + InterleaveAndConvertBuffer(reinterpret_cast(aSourceChannels), aLength, aVolume, aChannels, aOutput); break; case AUDIO_FORMAT_S16: - InterleaveAndConvertBuffer(static_cast(aSource) + aOffset, - aSourceLength, + InterleaveAndConvertBuffer(reinterpret_cast(aSourceChannels), aLength, aVolume, aChannels, @@ -107,9 +102,8 @@ AudioSegment::WriteTo(AudioStream* aOutput) } buf.SetLength(int32_t(mChannels*c.mDuration)); if (c.mBuffer) { - InterleaveAndConvertBuffer(c.mBuffer->Data(), c.mBufferFormat, c.mBufferLength, - c.mOffset, int32_t(c.mDuration), - c.mVolume, + InterleaveAndConvertBuffer(c.mChannelData.Elements(), c.mBufferFormat, + int32_t(c.mDuration), c.mVolume, aOutput->GetChannels(), buf.Elements()); } else { diff --git a/content/media/AudioSegment.h b/content/media/AudioSegment.h index a3ab0e2c3828..b22687259908 100644 --- a/content/media/AudioSegment.h +++ b/content/media/AudioSegment.h @@ -15,6 +15,14 @@ namespace mozilla { class AudioStream; +/** + * An AudioChunk represents a multi-channel buffer of audio samples. + * It references an underlying ThreadSharedObject which manages the lifetime + * of the buffer. An AudioChunk maintains its own duration and channel data + * pointers so it can represent a subinterval of a buffer without copying. + * An AudioChunk can store its individual channels anywhere; it maintains + * separate pointers to each channel's buffer. + */ struct AudioChunk { typedef mozilla::AudioSampleFormat SampleFormat; @@ -24,7 +32,11 @@ struct AudioChunk { NS_ASSERTION(aStart >= 0 && aStart < aEnd && aEnd <= mDuration, "Slice out of bounds"); if (mBuffer) { - mOffset += int32_t(aStart); + MOZ_ASSERT(aStart < INT32_MAX, "Can't slice beyond 32-bit sample lengths"); + for (uint32_t channel = 0; channel < mChannelData.Length(); ++channel) { + mChannelData[channel] = AddAudioSampleOffset(mChannelData[channel], + mBufferFormat, int32_t(aStart)); + } } mDuration = aEnd - aStart; } @@ -35,9 +47,19 @@ struct AudioChunk { return false; } if (mBuffer) { - NS_ASSERTION(aOther.mBufferFormat == mBufferFormat && aOther.mBufferLength == mBufferLength, + NS_ASSERTION(aOther.mBufferFormat == mBufferFormat, "Wrong metadata about buffer"); - return aOther.mOffset == mOffset + mDuration && aOther.mVolume == mVolume; + NS_ASSERTION(aOther.mChannelData.Length() == mChannelData.Length(), + "Mismatched channel count"); + if (mDuration > INT32_MAX) { + return false; + } + for (uint32_t channel = 0; channel < mChannelData.Length(); ++channel) { + if (aOther.mChannelData[channel] != AddAudioSampleOffset(mChannelData[channel], + mBufferFormat, int32_t(mDuration))) { + return false; + } + } } return true; } @@ -45,17 +67,16 @@ struct AudioChunk { void SetNull(TrackTicks aDuration) { mBuffer = nullptr; + mChannelData.Clear(); mDuration = aDuration; - mOffset = 0; mVolume = 1.0f; } - TrackTicks mDuration; // in frames within the buffer - nsRefPtr mBuffer; // null means data is all zeroes - int32_t mBufferLength; // number of frames in mBuffer (only meaningful if mBuffer is nonnull) - SampleFormat mBufferFormat; // format of frames in mBuffer (only meaningful if mBuffer is nonnull) - int32_t mOffset; // in frames within the buffer (zero if mBuffer is null) - float mVolume; // volume multiplier to apply (1.0f if mBuffer is nonnull) + TrackTicks mDuration; // in frames within the buffer + nsRefPtr mBuffer; // the buffer object whose lifetime is managed; null means data is all zeroes + nsTArray mChannelData; // one pointer per channel; empty if and only if mBuffer is null + float mVolume; // volume multiplier to apply (1.0f if mBuffer is nonnull) + SampleFormat mBufferFormat; // format of frames in mBuffer (only meaningful if mBuffer is nonnull) }; /** @@ -83,16 +104,35 @@ public: NS_ASSERTION(IsInitialized(), "Not initialized"); return mChannels; } - void AppendFrames(already_AddRefed aBuffer, int32_t aBufferLength, - int32_t aStart, int32_t aEnd, SampleFormat aFormat) + void AppendFrames(already_AddRefed aBuffer, + const nsTArray& aChannelData, + int32_t aDuration) { NS_ASSERTION(mChannels > 0, "Not initialized"); - AudioChunk* chunk = AppendChunk(aEnd - aStart); + NS_ASSERTION(!aBuffer.get() || aChannelData.Length() == uint32_t(mChannels), + "Wrong number of channels"); + AudioChunk* chunk = AppendChunk(aDuration); chunk->mBuffer = aBuffer; - chunk->mBufferFormat = aFormat; - chunk->mBufferLength = aBufferLength; - chunk->mOffset = aStart; + for (uint32_t channel = 0; channel < aChannelData.Length(); ++channel) { + chunk->mChannelData.AppendElement(aChannelData[channel]); + } chunk->mVolume = 1.0f; + chunk->mBufferFormat = AUDIO_FORMAT_FLOAT32; + } + void AppendFrames(already_AddRefed aBuffer, + const nsTArray& aChannelData, + int32_t aDuration) + { + NS_ASSERTION(mChannels > 0, "Not initialized"); + NS_ASSERTION(!aBuffer.get() || aChannelData.Length() == uint32_t(mChannels), + "Wrong number of channels"); + AudioChunk* chunk = AppendChunk(aDuration); + chunk->mBuffer = aBuffer; + for (uint32_t channel = 0; channel < aChannelData.Length(); ++channel) { + chunk->mChannelData.AppendElement(aChannelData[channel]); + } + chunk->mVolume = 1.0f; + chunk->mBufferFormat = AUDIO_FORMAT_S16; } void ApplyVolume(float aVolume); /** diff --git a/content/media/MediaDecoderStateMachine.cpp b/content/media/MediaDecoderStateMachine.cpp index 56dc8d3904d8..896d98a77a85 100644 --- a/content/media/MediaDecoderStateMachine.cpp +++ b/content/media/MediaDecoderStateMachine.cpp @@ -549,8 +549,12 @@ void MediaDecoderStateMachine::SendStreamAudio(AudioData* aAudio, aAudio->EnsureAudioBuffer(); nsRefPtr buffer = aAudio->mAudioBuffer; - aOutput->AppendFrames(buffer.forget(), aAudio->mFrames, int32_t(offset), aAudio->mFrames, - AUDIO_OUTPUT_FORMAT); + AudioDataValue* bufferData = static_cast(buffer->Data()); + nsAutoTArray channels; + for (uint32_t i = 0; i < aAudio->mChannels; ++i) { + channels.AppendElement(bufferData + i*aAudio->mFrames + offset); + } + aOutput->AppendFrames(buffer.forget(), channels, aAudio->mFrames); LOG(PR_LOG_DEBUG, ("%p Decoder writing %d frames of data to MediaStream for AudioData at %lld", mDecoder.get(), aAudio->mFrames - int32_t(offset), aAudio->mTime)); aStream->mAudioFramesWritten += aAudio->mFrames - int32_t(offset); diff --git a/content/media/SharedBuffer.h b/content/media/SharedBuffer.h index 4ad1b2cd00b4..68ca65564eb4 100644 --- a/content/media/SharedBuffer.h +++ b/content/media/SharedBuffer.h @@ -12,6 +12,16 @@ namespace mozilla { +/** + * Base class for objects with a thread-safe refcount and a virtual + * destructor. + */ +class ThreadSharedObject { +public: + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(ThreadSharedObject) + virtual ~ThreadSharedObject() {} +}; + /** * Heap-allocated chunk of arbitrary data with threadsafe refcounting. * Typically you would allocate one of these, fill it in, and then treat it as @@ -20,15 +30,10 @@ namespace mozilla { * simply assume that the refcount is at least 4-byte aligned and its size * is divisible by 4. */ -class SharedBuffer { +class SharedBuffer : public ThreadSharedObject { public: - NS_INLINE_DECL_THREADSAFE_REFCOUNTING(SharedBuffer) - ~SharedBuffer() {} - void* Data() { return this + 1; } - // Takes ownership of aData (which will be freed via moz_free()). - // aData consists of aChannels consecutive buffers, each of aLength samples. static already_AddRefed Create(size_t aSize) { void* m = moz_xmalloc(sizeof(SharedBuffer) + aSize); diff --git a/content/media/webrtc/MediaEngineWebRTCAudio.cpp b/content/media/webrtc/MediaEngineWebRTCAudio.cpp index 321cfd5737b4..3ee868326095 100644 --- a/content/media/webrtc/MediaEngineWebRTCAudio.cpp +++ b/content/media/webrtc/MediaEngineWebRTCAudio.cpp @@ -300,7 +300,9 @@ MediaEngineWebRTCAudioSource::Process(const int channel, AudioSegment segment; segment.Init(CHANNELS); - segment.AppendFrames(buffer.forget(), length, 0, length, AUDIO_FORMAT_S16); + nsAutoTArray channels; + channels.AppendElement(dest); + segment.AppendFrames(buffer.forget(), channels, length); SourceMediaStream *source = mSources[i]; if (source) { diff --git a/media/webrtc/signaling/src/mediapipeline/MediaPipeline.cpp b/media/webrtc/signaling/src/mediapipeline/MediaPipeline.cpp index aab80796a741..d7b9d2462bc9 100644 --- a/media/webrtc/signaling/src/mediapipeline/MediaPipeline.cpp +++ b/media/webrtc/signaling/src/mediapipeline/MediaPipeline.cpp @@ -667,7 +667,7 @@ void MediaPipelineTransmit::PipelineListener::ProcessAudioChunk( nsAutoArrayPtr samples(new int16_t[chunk.mDuration]); if (chunk.mBuffer) { - switch(chunk.mBufferFormat) { + switch (chunk.mBufferFormat) { case AUDIO_FORMAT_FLOAT32: MOZ_MTLOG(PR_LOG_ERROR, "Can't process audio except in 16-bit PCM yet"); MOZ_ASSERT(PR_FALSE); @@ -675,8 +675,7 @@ void MediaPipelineTransmit::PipelineListener::ProcessAudioChunk( break; case AUDIO_FORMAT_S16: { - const short* buf = static_cast(chunk.mBuffer->Data()) + - chunk.mOffset; + const short* buf = static_cast(chunk.mChannelData[0]); ConvertAudioSamplesWithScale(buf, samples, chunk.mDuration, chunk.mVolume); } break; @@ -833,11 +832,12 @@ NotifyPull(MediaStreamGraph* graph, StreamTime desired_time) { while (MillisecondsToMediaTime(played_) < desired_time) { // TODO(ekr@rtfm.com): Is there a way to avoid mallocating here? nsRefPtr samples = SharedBuffer::Create(1000); + int16_t *samples_data = static_cast(samples->Data()); int samples_length; MediaConduitErrorCode err = static_cast(conduit_.get())->GetAudioFrame( - static_cast(samples->Data()), + samples_data, 16000, // Sampling rate fixed at 16 kHz for now 0, // TODO(ekr@rtfm.com): better estimate of capture delay samples_length); @@ -849,8 +849,9 @@ NotifyPull(MediaStreamGraph* graph, StreamTime desired_time) { AudioSegment segment; segment.Init(1); - segment.AppendFrames(samples.forget(), samples_length, - 0, samples_length, AUDIO_FORMAT_S16); + nsAutoTArray channels; + channels.AppendElement(samples_data); + segment.AppendFrames(samples.forget(), channels, samples_length); source_->AppendToTrack(1, // TODO(ekr@rtfm.com): Track ID &segment); diff --git a/media/webrtc/signaling/src/peerconnection/PeerConnectionMedia.h b/media/webrtc/signaling/src/peerconnection/PeerConnectionMedia.h index 38b76580e379..a2243a77af01 100644 --- a/media/webrtc/signaling/src/peerconnection/PeerConnectionMedia.h +++ b/media/webrtc/signaling/src/peerconnection/PeerConnectionMedia.h @@ -63,17 +63,18 @@ Fake_AudioGenerator(nsDOMMediaStream* aStream) : mStream(aStream), mCount(0) { static void Callback(nsITimer* timer, void *arg) { Fake_AudioGenerator* gen = static_cast(arg); - nsRefPtr samples = mozilla::SharedBuffer::Create(1600 * 2 * sizeof(int16_t)); - for (int i=0; i<1600*2; i++) { - reinterpret_cast(samples->Data())[i] = ((gen->mCount % 8) * 4000) - (7*4000)/2; + nsRefPtr samples = mozilla::SharedBuffer::Create(1600 * sizeof(int16_t)); + int16_t* data = static_cast(samples->Data()); + for (int i=0; i<1600; i++) { + data[i] = ((gen->mCount % 8) * 4000) - (7*4000)/2; ++gen->mCount; } mozilla::AudioSegment segment; segment.Init(1); - segment.AppendFrames(samples.forget(), 1600, - 0, 1600, mozilla::AUDIO_FORMAT_S16); - + nsAutoTArray channelData; + channelData.AppendElement(data); + segment.AppendFrames(samples.forget(), channelData, 1600); gen->mStream->GetStream()->AsSourceStream()->AppendToTrack(1, &segment); } diff --git a/media/webrtc/signaling/test/FakeMediaStreams.h b/media/webrtc/signaling/test/FakeMediaStreams.h index f8aa1b4e485c..4abd2e4ba9ce 100644 --- a/media/webrtc/signaling/test/FakeMediaStreams.h +++ b/media/webrtc/signaling/test/FakeMediaStreams.h @@ -120,7 +120,7 @@ class Fake_SourceMediaStream : public Fake_MediaStream { mozilla::AudioChunk& chunk = *(iter); MOZ_ASSERT(chunk.mBuffer); const int16_t* buf = - static_cast(chunk.mBuffer->Data()); + static_cast(chunk.mChannelData[0]); for(int i=0; i samples = mozilla::SharedBuffer::Create(AUDIO_BUFFER_SIZE * NUM_CHANNELS * sizeof(int16_t)); + int16_t* data = reinterpret_cast(samples->Data()); for(int i=0; i<(1600*2); i++) { //saw tooth audio sample - reinterpret_cast(samples->Data())[i] = - ((mCount % 8) * 4000) - (7*4000)/2; + data[i] = ((mCount % 8) * 4000) - (7*4000)/2; mCount++; } mozilla::AudioSegment segment; segment.Init(1); - segment.AppendFrames(samples.forget(), - AUDIO_BUFFER_SIZE, - 0, - AUDIO_BUFFER_SIZE, - mozilla::AUDIO_FORMAT_S16); + nsAutoTArray channels; + channels.AppendElement(data); + segment.AppendFrames(samples.forget(), channels, AUDIO_BUFFER_SIZE); for(std::set::iterator it = mListeners.begin(); it != mListeners.end(); ++it) {