From cf8fbf13e1c2b32a33016917af10c82a1a54ba15 Mon Sep 17 00:00:00 2001
From: Robert O'Callahan <robert@ocallahan.org>
Date: Thu, 22 Nov 2012 18:04:27 +1300
Subject: [PATCH] Bug 827537. Refactor AudioChunk to support having separate
 buffers for each channel. r=jesup

--HG--
extra : rebase_source : 0aa26e1c3181d9fe5158520d4b33248bae0fa5d0
---
 content/media/AudioSampleFormat.h             | 16 ++++-
 content/media/AudioSegment.cpp                | 34 ++++-----
 content/media/AudioSegment.h                  | 72 ++++++++++++++-----
 content/media/MediaDecoderStateMachine.cpp    |  8 ++-
 content/media/SharedBuffer.h                  | 17 +++--
 .../media/webrtc/MediaEngineWebRTCAudio.cpp   |  4 +-
 .../src/mediapipeline/MediaPipeline.cpp       | 13 ++--
 .../src/peerconnection/PeerConnectionMedia.h  | 13 ++--
 .../webrtc/signaling/test/FakeMediaStreams.h  |  2 +-
 .../signaling/test/FakeMediaStreamsImpl.h     | 12 ++--
 10 files changed, 125 insertions(+), 66 deletions(-)
diff --git a/content/media/AudioSampleFormat.h b/content/media/AudioSampleFormat.h
index 992561774235..e18b7fb7962b 100644
--- a/content/media/AudioSampleFormat.h
+++ b/content/media/AudioSampleFormat.h
@@ -139,7 +139,6 @@ ScaleAudioSamples(float* aBuffer, int aCount, float aScale)
   }
 }
 
-
 inline void
 ScaleAudioSamples(short* aBuffer, int aCount, float aScale)
 {
@@ -149,6 +148,21 @@ ScaleAudioSamples(short* aBuffer, int aCount, float aScale)
   }
 }
 
+inline const void*
+AddAudioSampleOffset(const void* aBase, AudioSampleFormat aFormat,
+                     int32_t aOffset)
+{
+  switch (aFormat) {
+  case AUDIO_FORMAT_FLOAT32:
+    return static_cast<const float*>(aBase) + aOffset;
+  case AUDIO_FORMAT_S16:
+    return static_cast<const int16_t*>(aBase) + aOffset;
+  default:
+    NS_ERROR("Unknown format");
+    return nullptr;
+  }
+}
+
 } // namespace mozilla
 
 #endif /* MOZILLA_AUDIOSAMPLEFORMAT_H_ */
diff --git a/content/media/AudioSegment.cpp b/content/media/AudioSegment.cpp
index 399409710835..7ee3a636f0b9 100644
--- a/content/media/AudioSegment.cpp
+++ b/content/media/AudioSegment.cpp
@@ -11,16 +11,15 @@ namespace mozilla {
 
 template <class SrcT, class DestT>
 static void
-InterleaveAndConvertBuffer(const SrcT* aSource, int32_t aSourceLength,
-                           int32_t aLength,
-                           float aVolume,
+InterleaveAndConvertBuffer(const SrcT** aSourceChannels,
+                           int32_t aLength, float aVolume,
                            int32_t aChannels,
                            DestT* aOutput)
 {
   DestT* output = aOutput;
   for (int32_t i = 0; i < aLength; ++i) {
     for (int32_t channel = 0; channel < aChannels; ++channel) {
-      float v = AudioSampleToFloat(aSource[channel*aSourceLength + i])*aVolume;
+      float v = AudioSampleToFloat(aSourceChannels[channel][i])*aVolume;
       *output = FloatToAudioSample<DestT>(v);
       ++output;
     }
@@ -28,9 +27,8 @@ InterleaveAndConvertBuffer(const SrcT* aSource, int32_t aSourceLength,
 }
 
 static inline void
-InterleaveAndConvertBuffer(const int16_t* aSource, int32_t aSourceLength,
-                           int32_t aLength,
-                           float aVolume,
+InterleaveAndConvertBuffer(const int16_t** aSourceChannels,
+                           int32_t aLength, float aVolume,
                            int32_t aChannels,
                            int16_t* aOutput)
 {
@@ -39,7 +37,7 @@ InterleaveAndConvertBuffer(const int16_t* aSource, int32_t aSourceLength,
     int32_t scale = int32_t((1 << 16) * aVolume);
     for (int32_t i = 0; i < aLength; ++i) {
       for (int32_t channel = 0; channel < aChannels; ++channel) {
-        int16_t s = aSource[channel*aSourceLength + i];
+        int16_t s = aSourceChannels[channel][i];
         *output = int16_t((int32_t(s) * scale) >> 16);
         ++output;
       }
@@ -49,7 +47,7 @@ InterleaveAndConvertBuffer(const int16_t* aSource, int32_t aSourceLength,
 
   for (int32_t i = 0; i < aLength; ++i) {
     for (int32_t channel = 0; channel < aChannels; ++channel) {
-      float v = AudioSampleToFloat(aSource[channel*aSourceLength + i])*aVolume;
+      float v = AudioSampleToFloat(aSourceChannels[channel][i])*aVolume;
       *output = FloatToAudioSample<int16_t>(v);
       ++output;
     }
@@ -57,25 +55,22 @@ InterleaveAndConvertBuffer(const int16_t* aSource, int32_t aSourceLength,
 }
 
 static void
-InterleaveAndConvertBuffer(const void* aSource, AudioSampleFormat aSourceFormat,
-                           int32_t aSourceLength,
-                           int32_t aOffset, int32_t aLength,
-                           float aVolume,
+InterleaveAndConvertBuffer(const void** aSourceChannels,
+                           AudioSampleFormat aSourceFormat,
+                           int32_t aLength, float aVolume,
                            int32_t aChannels,
                            AudioDataValue* aOutput)
 {
   switch (aSourceFormat) {
   case AUDIO_FORMAT_FLOAT32:
-    InterleaveAndConvertBuffer(static_cast<const float*>(aSource) + aOffset,
-                               aSourceLength,
+    InterleaveAndConvertBuffer(reinterpret_cast<const float**>(aSourceChannels),
                                aLength,
                                aVolume,
                                aChannels,
                                aOutput);
     break;
   case AUDIO_FORMAT_S16:
-    InterleaveAndConvertBuffer(static_cast<const int16_t*>(aSource) + aOffset,
-                               aSourceLength,
+    InterleaveAndConvertBuffer(reinterpret_cast<const int16_t**>(aSourceChannels),
                                aLength,
                                aVolume,
                                aChannels,
@@ -107,9 +102,8 @@ AudioSegment::WriteTo(AudioStream* aOutput)
     }
     buf.SetLength(int32_t(mChannels*c.mDuration));
     if (c.mBuffer) {
-      InterleaveAndConvertBuffer(c.mBuffer->Data(), c.mBufferFormat, c.mBufferLength,
-                                 c.mOffset, int32_t(c.mDuration),
-                                 c.mVolume,
+      InterleaveAndConvertBuffer(c.mChannelData.Elements(), c.mBufferFormat,
+                                 int32_t(c.mDuration), c.mVolume,
                                  aOutput->GetChannels(),
                                  buf.Elements());
     } else {
diff --git a/content/media/AudioSegment.h b/content/media/AudioSegment.h
index a3ab0e2c3828..b22687259908 100644
--- a/content/media/AudioSegment.h
+++ b/content/media/AudioSegment.h
@@ -15,6 +15,14 @@ namespace mozilla {
 
 class AudioStream;
 
+/**
+ * An AudioChunk represents a multi-channel buffer of audio samples.
+ * It references an underlying ThreadSharedObject which manages the lifetime
+ * of the buffer. An AudioChunk maintains its own duration and channel data
+ * pointers so it can represent a subinterval of a buffer without copying.
+ * An AudioChunk can store its individual channels anywhere; it maintains
+ * separate pointers to each channel's buffer.
+ */
 struct AudioChunk {
   typedef mozilla::AudioSampleFormat SampleFormat;
 
@@ -24,7 +32,11 @@ struct AudioChunk {
     NS_ASSERTION(aStart >= 0 && aStart < aEnd && aEnd <= mDuration,
                  "Slice out of bounds");
     if (mBuffer) {
-      mOffset += int32_t(aStart);
+      MOZ_ASSERT(aStart < INT32_MAX, "Can't slice beyond 32-bit sample lengths");
+      for (uint32_t channel = 0; channel < mChannelData.Length(); ++channel) {
+        mChannelData[channel] = AddAudioSampleOffset(mChannelData[channel],
+            mBufferFormat, int32_t(aStart));
+      }
     }
     mDuration = aEnd - aStart;
   }
@@ -35,9 +47,19 @@ struct AudioChunk {
       return false;
     }
     if (mBuffer) {
-      NS_ASSERTION(aOther.mBufferFormat == mBufferFormat && aOther.mBufferLength == mBufferLength,
+      NS_ASSERTION(aOther.mBufferFormat == mBufferFormat,
                    "Wrong metadata about buffer");
-      return aOther.mOffset == mOffset + mDuration && aOther.mVolume == mVolume;
+      NS_ASSERTION(aOther.mChannelData.Length() == mChannelData.Length(),
+                   "Mismatched channel count");
+      if (mDuration > INT32_MAX) {
+        return false;
+      }
+      for (uint32_t channel = 0; channel < mChannelData.Length(); ++channel) {
+        if (aOther.mChannelData[channel] != AddAudioSampleOffset(mChannelData[channel],
+            mBufferFormat, int32_t(mDuration))) {
+          return false;
+        }
+      }
     }
     return true;
   }
@@ -45,17 +67,16 @@ struct AudioChunk {
   void SetNull(TrackTicks aDuration)
   {
     mBuffer = nullptr;
+    mChannelData.Clear();
     mDuration = aDuration;
-    mOffset = 0;
     mVolume = 1.0f;
   }
 
-  TrackTicks mDuration;           // in frames within the buffer
-  nsRefPtr<SharedBuffer> mBuffer; // null means data is all zeroes
-  int32_t mBufferLength;          // number of frames in mBuffer (only meaningful if mBuffer is nonnull)
-  SampleFormat mBufferFormat;     // format of frames in mBuffer (only meaningful if mBuffer is nonnull)
-  int32_t mOffset;                // in frames within the buffer (zero if mBuffer is null)
-  float mVolume;                  // volume multiplier to apply (1.0f if mBuffer is nonnull)
+  TrackTicks mDuration; // in frames within the buffer
+  nsRefPtr<ThreadSharedObject> mBuffer; // the buffer object whose lifetime is managed; null means data is all zeroes
+  nsTArray<const void*> mChannelData; // one pointer per channel; empty if and only if mBuffer is null
+  float mVolume; // volume multiplier to apply (1.0f if mBuffer is nonnull)
+  SampleFormat mBufferFormat; // format of frames in mBuffer (only meaningful if mBuffer is nonnull)
 };
 
 /**
@@ -83,16 +104,35 @@ public:
     NS_ASSERTION(IsInitialized(), "Not initialized");
     return mChannels;
   }
-  void AppendFrames(already_AddRefed<SharedBuffer> aBuffer, int32_t aBufferLength,
-                    int32_t aStart, int32_t aEnd, SampleFormat aFormat)
+  void AppendFrames(already_AddRefed<ThreadSharedObject> aBuffer,
+                    const nsTArray<const float*>& aChannelData,
+                    int32_t aDuration)
   {
     NS_ASSERTION(mChannels > 0, "Not initialized");
-    AudioChunk* chunk = AppendChunk(aEnd - aStart);
+    NS_ASSERTION(!aBuffer.get() || aChannelData.Length() == uint32_t(mChannels),
+                 "Wrong number of channels");
+    AudioChunk* chunk = AppendChunk(aDuration);
     chunk->mBuffer = aBuffer;
-    chunk->mBufferFormat = aFormat;
-    chunk->mBufferLength = aBufferLength;
-    chunk->mOffset = aStart;
+    for (uint32_t channel = 0; channel < aChannelData.Length(); ++channel) {
+      chunk->mChannelData.AppendElement(aChannelData[channel]);
+    }
     chunk->mVolume = 1.0f;
+    chunk->mBufferFormat = AUDIO_FORMAT_FLOAT32;
+  }
+  void AppendFrames(already_AddRefed<ThreadSharedObject> aBuffer,
+                    const nsTArray<const int16_t*>& aChannelData,
+                    int32_t aDuration)
+  {
+    NS_ASSERTION(mChannels > 0, "Not initialized");
+    NS_ASSERTION(!aBuffer.get() || aChannelData.Length() == uint32_t(mChannels),
+                 "Wrong number of channels");
+    AudioChunk* chunk = AppendChunk(aDuration);
+    chunk->mBuffer = aBuffer;
+    for (uint32_t channel = 0; channel < aChannelData.Length(); ++channel) {
+      chunk->mChannelData.AppendElement(aChannelData[channel]);
+    }
+    chunk->mVolume = 1.0f;
+    chunk->mBufferFormat = AUDIO_FORMAT_S16;
   }
   void ApplyVolume(float aVolume);
   /**
diff --git a/content/media/MediaDecoderStateMachine.cpp b/content/media/MediaDecoderStateMachine.cpp
index 56dc8d3904d8..896d98a77a85 100644
--- a/content/media/MediaDecoderStateMachine.cpp
+++ b/content/media/MediaDecoderStateMachine.cpp
@@ -549,8 +549,12 @@ void MediaDecoderStateMachine::SendStreamAudio(AudioData* aAudio,
 
   aAudio->EnsureAudioBuffer();
   nsRefPtr<SharedBuffer> buffer = aAudio->mAudioBuffer;
-  aOutput->AppendFrames(buffer.forget(), aAudio->mFrames, int32_t(offset), aAudio->mFrames,
-                        AUDIO_OUTPUT_FORMAT);
+  AudioDataValue* bufferData = static_cast<AudioDataValue*>(buffer->Data());
+  nsAutoTArray<const AudioDataValue*,2> channels;
+  for (uint32_t i = 0; i < aAudio->mChannels; ++i) {
+    channels.AppendElement(bufferData + i*aAudio->mFrames + offset);
+  }
+  aOutput->AppendFrames(buffer.forget(), channels, aAudio->mFrames);
   LOG(PR_LOG_DEBUG, ("%p Decoder writing %d frames of data to MediaStream for AudioData at %lld",
                      mDecoder.get(), aAudio->mFrames - int32_t(offset), aAudio->mTime));
   aStream->mAudioFramesWritten += aAudio->mFrames - int32_t(offset);
diff --git a/content/media/SharedBuffer.h b/content/media/SharedBuffer.h
index 4ad1b2cd00b4..68ca65564eb4 100644
--- a/content/media/SharedBuffer.h
+++ b/content/media/SharedBuffer.h
@@ -12,6 +12,16 @@
 
 namespace mozilla {
 
+/**
+ * Base class for objects with a thread-safe refcount and a virtual
+ * destructor.
+ */
+class ThreadSharedObject {
+public:
+  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(ThreadSharedObject)
+  virtual ~ThreadSharedObject() {}
+};
+
 /**
  * Heap-allocated chunk of arbitrary data with threadsafe refcounting.
  * Typically you would allocate one of these, fill it in, and then treat it as
@@ -20,15 +30,10 @@ namespace mozilla {
  * simply assume that the refcount is at least 4-byte aligned and its size
  * is divisible by 4.
  */
-class SharedBuffer {
+class SharedBuffer : public ThreadSharedObject {
 public:
-  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(SharedBuffer)
-  ~SharedBuffer() {}
-
   void* Data() { return this + 1; }
 
-  // Takes ownership of aData (which will be freed via moz_free()).
-  // aData consists of aChannels consecutive buffers, each of aLength samples.
   static already_AddRefed<SharedBuffer> Create(size_t aSize)
   {
     void* m = moz_xmalloc(sizeof(SharedBuffer) + aSize);
diff --git a/content/media/webrtc/MediaEngineWebRTCAudio.cpp b/content/media/webrtc/MediaEngineWebRTCAudio.cpp
index 321cfd5737b4..3ee868326095 100644
--- a/content/media/webrtc/MediaEngineWebRTCAudio.cpp
+++ b/content/media/webrtc/MediaEngineWebRTCAudio.cpp
@@ -300,7 +300,9 @@ MediaEngineWebRTCAudioSource::Process(const int channel,
 
     AudioSegment segment;
     segment.Init(CHANNELS);
-    segment.AppendFrames(buffer.forget(), length, 0, length, AUDIO_FORMAT_S16);
+    nsAutoTArray<const sample*,1> channels;
+    channels.AppendElement(dest);
+    segment.AppendFrames(buffer.forget(), channels, length);
 
     SourceMediaStream *source = mSources[i];
     if (source) {
diff --git a/media/webrtc/signaling/src/mediapipeline/MediaPipeline.cpp b/media/webrtc/signaling/src/mediapipeline/MediaPipeline.cpp
index aab80796a741..d7b9d2462bc9 100644
--- a/media/webrtc/signaling/src/mediapipeline/MediaPipeline.cpp
+++ b/media/webrtc/signaling/src/mediapipeline/MediaPipeline.cpp
@@ -667,7 +667,7 @@ void MediaPipelineTransmit::PipelineListener::ProcessAudioChunk(
   nsAutoArrayPtr<int16_t> samples(new int16_t[chunk.mDuration]);
 
   if (chunk.mBuffer) {
-    switch(chunk.mBufferFormat) {
+    switch (chunk.mBufferFormat) {
       case AUDIO_FORMAT_FLOAT32:
         MOZ_MTLOG(PR_LOG_ERROR, "Can't process audio except in 16-bit PCM yet");
         MOZ_ASSERT(PR_FALSE);
@@ -675,8 +675,7 @@ void MediaPipelineTransmit::PipelineListener::ProcessAudioChunk(
         break;
       case AUDIO_FORMAT_S16:
         {
-          const short* buf = static_cast<const short *>(chunk.mBuffer->Data()) +
-            chunk.mOffset;
+          const short* buf = static_cast<const short *>(chunk.mChannelData[0]);
           ConvertAudioSamplesWithScale(buf, samples, chunk.mDuration, chunk.mVolume);
         }
         break;
@@ -833,11 +832,12 @@ NotifyPull(MediaStreamGraph* graph, StreamTime desired_time) {
   while (MillisecondsToMediaTime(played_) < desired_time) {
     // TODO(ekr@rtfm.com): Is there a way to avoid mallocating here?
     nsRefPtr<SharedBuffer> samples = SharedBuffer::Create(1000);
+    int16_t *samples_data = static_cast<int16_t *>(samples->Data());
     int samples_length;
 
     MediaConduitErrorCode err =
         static_cast<AudioSessionConduit*>(conduit_.get())->GetAudioFrame(
-            static_cast<int16_t *>(samples->Data()),
+            samples_data,
             16000,  // Sampling rate fixed at 16 kHz for now
             0,  // TODO(ekr@rtfm.com): better estimate of capture delay
             samples_length);
@@ -849,8 +849,9 @@ NotifyPull(MediaStreamGraph* graph, StreamTime desired_time) {
 
     AudioSegment segment;
     segment.Init(1);
-    segment.AppendFrames(samples.forget(), samples_length,
-                         0, samples_length, AUDIO_FORMAT_S16);
+    nsAutoTArray<const int16_t*,1> channels;
+    channels.AppendElement(samples_data);
+    segment.AppendFrames(samples.forget(), channels, samples_length);
 
     source_->AppendToTrack(1,  // TODO(ekr@rtfm.com): Track ID
                            &segment);
diff --git a/media/webrtc/signaling/src/peerconnection/PeerConnectionMedia.h b/media/webrtc/signaling/src/peerconnection/PeerConnectionMedia.h
index 38b76580e379..a2243a77af01 100644
--- a/media/webrtc/signaling/src/peerconnection/PeerConnectionMedia.h
+++ b/media/webrtc/signaling/src/peerconnection/PeerConnectionMedia.h
@@ -63,17 +63,18 @@ Fake_AudioGenerator(nsDOMMediaStream* aStream) : mStream(aStream), mCount(0) {
   static void Callback(nsITimer* timer, void *arg) {
     Fake_AudioGenerator* gen = static_cast<Fake_AudioGenerator*>(arg);
 
-    nsRefPtr<mozilla::SharedBuffer> samples = mozilla::SharedBuffer::Create(1600 * 2 * sizeof(int16_t));
-    for (int i=0; i<1600*2; i++) {
-      reinterpret_cast<int16_t *>(samples->Data())[i] = ((gen->mCount % 8) * 4000) - (7*4000)/2;
+    nsRefPtr<mozilla::SharedBuffer> samples = mozilla::SharedBuffer::Create(1600 * sizeof(int16_t));
+    int16_t* data = static_cast<int16_t*>(samples->Data());
+    for (int i=0; i<1600; i++) {
+      data[i] = ((gen->mCount % 8) * 4000) - (7*4000)/2;
       ++gen->mCount;
     }
 
     mozilla::AudioSegment segment;
     segment.Init(1);
-    segment.AppendFrames(samples.forget(), 1600,
-                         0, 1600, mozilla::AUDIO_FORMAT_S16);
-
+    nsAutoTArray<const int16_t*,1> channelData;
+    channelData.AppendElement(data);
+    segment.AppendFrames(samples.forget(), channelData, 1600);
     gen->mStream->GetStream()->AsSourceStream()->AppendToTrack(1, &segment);
   }
 
diff --git a/media/webrtc/signaling/test/FakeMediaStreams.h b/media/webrtc/signaling/test/FakeMediaStreams.h
index f8aa1b4e485c..4abd2e4ba9ce 100644
--- a/media/webrtc/signaling/test/FakeMediaStreams.h
+++ b/media/webrtc/signaling/test/FakeMediaStreams.h
@@ -120,7 +120,7 @@ class Fake_SourceMediaStream : public Fake_MediaStream {
         mozilla::AudioChunk& chunk = *(iter);
         MOZ_ASSERT(chunk.mBuffer);
         const int16_t* buf =
-                static_cast<const int16_t*>(chunk.mBuffer->Data());
+          static_cast<const int16_t*>(chunk.mChannelData[0]);
         for(int i=0; i<chunk.mDuration; i++) {
           if(buf[i]) {
             //atleast one non-zero sample found.
diff --git a/media/webrtc/signaling/test/FakeMediaStreamsImpl.h b/media/webrtc/signaling/test/FakeMediaStreamsImpl.h
index db2a0e812f1e..8299577a962a 100644
--- a/media/webrtc/signaling/test/FakeMediaStreamsImpl.h
+++ b/media/webrtc/signaling/test/FakeMediaStreamsImpl.h
@@ -91,20 +91,18 @@ void Fake_AudioStreamSource::Periodic() {
   //Generate Signed 16 Bit Audio samples
   nsRefPtr<mozilla::SharedBuffer> samples =
     mozilla::SharedBuffer::Create(AUDIO_BUFFER_SIZE * NUM_CHANNELS * sizeof(int16_t));
+  int16_t* data = reinterpret_cast<int16_t *>(samples->Data());
   for(int i=0; i<(1600*2); i++) {
     //saw tooth audio sample
-    reinterpret_cast<int16_t *>(samples->Data())[i] =
-                              ((mCount % 8) * 4000) - (7*4000)/2;
+    data[i] = ((mCount % 8) * 4000) - (7*4000)/2;
     mCount++;
   }
 
   mozilla::AudioSegment segment;
   segment.Init(1);
-  segment.AppendFrames(samples.forget(),
-                       AUDIO_BUFFER_SIZE,
-                       0,
-                       AUDIO_BUFFER_SIZE,
-                       mozilla::AUDIO_FORMAT_S16);
+  nsAutoTArray<const int16_t *,1> channels;
+  channels.AppendElement(data);
+  segment.AppendFrames(samples.forget(), channels, AUDIO_BUFFER_SIZE);
 
   for(std::set<Fake_MediaStreamListener *>::iterator it = mListeners.begin();
        it != mListeners.end(); ++it) {