Bug 827537. Refactor AudioChunk to support having separate buffers for each channel. r=jesup

--HG-- extra : rebase_source : 0aa26e1c3181d9fe5158520d4b33248bae0fa5d0
2024-10-09 11:25:00 +00:00 · 2012-11-22 18:04:27 +13:00 · 2012-11-22 18:04:27 +13:00 · cf8fbf13e1
commit cf8fbf13e1
parent f572ffaeb8
10 changed files with 125 additions and 66 deletions
--- a/content/media/AudioSampleFormat.h
+++ b/content/media/AudioSampleFormat.h
@ -139,7 +139,6 @@ ScaleAudioSamples(float* aBuffer, int aCount, float aScale)
  }
 }

-
 inline void
 ScaleAudioSamples(short* aBuffer, int aCount, float aScale)
 {
@ -149,6 +148,21 @@ ScaleAudioSamples(short* aBuffer, int aCount, float aScale)
  }
 }

+inline const void*
+AddAudioSampleOffset(const void* aBase, AudioSampleFormat aFormat,
+                     int32_t aOffset)
+{
+  switch (aFormat) {
+  case AUDIO_FORMAT_FLOAT32:
+    return static_cast<const float*>(aBase) + aOffset;
+  case AUDIO_FORMAT_S16:
+    return static_cast<const int16_t*>(aBase) + aOffset;
+  default:
+    NS_ERROR("Unknown format");
+    return nullptr;
+  }
+}
+
 } // namespace mozilla

 #endif /* MOZILLA_AUDIOSAMPLEFORMAT_H_ */
--- a/content/media/AudioSegment.cpp
+++ b/content/media/AudioSegment.cpp
@ -11,16 +11,15 @@ namespace mozilla {

 template <class SrcT, class DestT>
 static void
-InterleaveAndConvertBuffer(const SrcT* aSource, int32_t aSourceLength,
-                           int32_t aLength,
-                           float aVolume,
+InterleaveAndConvertBuffer(const SrcT** aSourceChannels,
+                           int32_t aLength, float aVolume,
                           int32_t aChannels,
                           DestT* aOutput)
 {
  DestT* output = aOutput;
  for (int32_t i = 0; i < aLength; ++i) {
    for (int32_t channel = 0; channel < aChannels; ++channel) {
-      float v = AudioSampleToFloat(aSource[channel*aSourceLength + i])*aVolume;
+      float v = AudioSampleToFloat(aSourceChannels[channel][i])*aVolume;
      *output = FloatToAudioSample<DestT>(v);
      ++output;
    }
@ -28,9 +27,8 @@ InterleaveAndConvertBuffer(const SrcT* aSource, int32_t aSourceLength,
 }

 static inline void
-InterleaveAndConvertBuffer(const int16_t* aSource, int32_t aSourceLength,
-                           int32_t aLength,
-                           float aVolume,
+InterleaveAndConvertBuffer(const int16_t** aSourceChannels,
+                           int32_t aLength, float aVolume,
                           int32_t aChannels,
                           int16_t* aOutput)
 {
@ -39,7 +37,7 @@ InterleaveAndConvertBuffer(const int16_t* aSource, int32_t aSourceLength,
    int32_t scale = int32_t((1 << 16) * aVolume);
    for (int32_t i = 0; i < aLength; ++i) {
      for (int32_t channel = 0; channel < aChannels; ++channel) {
-        int16_t s = aSource[channel*aSourceLength + i];
+        int16_t s = aSourceChannels[channel][i];
        *output = int16_t((int32_t(s) * scale) >> 16);
        ++output;
      }
@ -49,7 +47,7 @@ InterleaveAndConvertBuffer(const int16_t* aSource, int32_t aSourceLength,

  for (int32_t i = 0; i < aLength; ++i) {
    for (int32_t channel = 0; channel < aChannels; ++channel) {
-      float v = AudioSampleToFloat(aSource[channel*aSourceLength + i])*aVolume;
+      float v = AudioSampleToFloat(aSourceChannels[channel][i])*aVolume;
      *output = FloatToAudioSample<int16_t>(v);
      ++output;
    }
@ -57,25 +55,22 @@ InterleaveAndConvertBuffer(const int16_t* aSource, int32_t aSourceLength,
 }

 static void
-InterleaveAndConvertBuffer(const void* aSource, AudioSampleFormat aSourceFormat,
-                           int32_t aSourceLength,
-                           int32_t aOffset, int32_t aLength,
-                           float aVolume,
+InterleaveAndConvertBuffer(const void** aSourceChannels,
+                           AudioSampleFormat aSourceFormat,
+                           int32_t aLength, float aVolume,
                           int32_t aChannels,
                           AudioDataValue* aOutput)
 {
  switch (aSourceFormat) {
  case AUDIO_FORMAT_FLOAT32:
-    InterleaveAndConvertBuffer(static_cast<const float*>(aSource) + aOffset,
-                               aSourceLength,
+    InterleaveAndConvertBuffer(reinterpret_cast<const float**>(aSourceChannels),
                               aLength,
                               aVolume,
                               aChannels,
                               aOutput);
    break;
  case AUDIO_FORMAT_S16:
-    InterleaveAndConvertBuffer(static_cast<const int16_t*>(aSource) + aOffset,
-                               aSourceLength,
+    InterleaveAndConvertBuffer(reinterpret_cast<const int16_t**>(aSourceChannels),
                               aLength,
                               aVolume,
                               aChannels,
@ -107,9 +102,8 @@ AudioSegment::WriteTo(AudioStream* aOutput)
    }
    buf.SetLength(int32_t(mChannels*c.mDuration));
    if (c.mBuffer) {
-      InterleaveAndConvertBuffer(c.mBuffer->Data(), c.mBufferFormat, c.mBufferLength,
-                                 c.mOffset, int32_t(c.mDuration),
-                                 c.mVolume,
+      InterleaveAndConvertBuffer(c.mChannelData.Elements(), c.mBufferFormat,
+                                 int32_t(c.mDuration), c.mVolume,
                                 aOutput->GetChannels(),
                                 buf.Elements());
    } else {
--- a/content/media/AudioSegment.h
+++ b/content/media/AudioSegment.h
@ -15,6 +15,14 @@ namespace mozilla {

 class AudioStream;

+/**
+ * An AudioChunk represents a multi-channel buffer of audio samples.
+ * It references an underlying ThreadSharedObject which manages the lifetime
+ * of the buffer. An AudioChunk maintains its own duration and channel data
+ * pointers so it can represent a subinterval of a buffer without copying.
+ * An AudioChunk can store its individual channels anywhere; it maintains
+ * separate pointers to each channel's buffer.
+ */
 struct AudioChunk {
  typedef mozilla::AudioSampleFormat SampleFormat;

@ -24,7 +32,11 @@ struct AudioChunk {
    NS_ASSERTION(aStart >= 0 && aStart < aEnd && aEnd <= mDuration,
                 "Slice out of bounds");
    if (mBuffer) {
-      mOffset += int32_t(aStart);
+      MOZ_ASSERT(aStart < INT32_MAX, "Can't slice beyond 32-bit sample lengths");
+      for (uint32_t channel = 0; channel < mChannelData.Length(); ++channel) {
+        mChannelData[channel] = AddAudioSampleOffset(mChannelData[channel],
+            mBufferFormat, int32_t(aStart));
+      }
    }
    mDuration = aEnd - aStart;
  }
@ -35,9 +47,19 @@ struct AudioChunk {
      return false;
    }
    if (mBuffer) {
-      NS_ASSERTION(aOther.mBufferFormat == mBufferFormat && aOther.mBufferLength == mBufferLength,
+      NS_ASSERTION(aOther.mBufferFormat == mBufferFormat,
                   "Wrong metadata about buffer");
-      return aOther.mOffset == mOffset + mDuration && aOther.mVolume == mVolume;
+      NS_ASSERTION(aOther.mChannelData.Length() == mChannelData.Length(),
+                   "Mismatched channel count");
+      if (mDuration > INT32_MAX) {
+        return false;
+      }
+      for (uint32_t channel = 0; channel < mChannelData.Length(); ++channel) {
+        if (aOther.mChannelData[channel] != AddAudioSampleOffset(mChannelData[channel],
+            mBufferFormat, int32_t(mDuration))) {
+          return false;
+        }
+      }
    }
    return true;
  }
@ -45,17 +67,16 @@ struct AudioChunk {
  void SetNull(TrackTicks aDuration)
  {
    mBuffer = nullptr;
+    mChannelData.Clear();
    mDuration = aDuration;
-    mOffset = 0;
    mVolume = 1.0f;
  }

-  TrackTicks mDuration;           // in frames within the buffer
-  nsRefPtr<SharedBuffer> mBuffer; // null means data is all zeroes
-  int32_t mBufferLength;          // number of frames in mBuffer (only meaningful if mBuffer is nonnull)
-  SampleFormat mBufferFormat;     // format of frames in mBuffer (only meaningful if mBuffer is nonnull)
-  int32_t mOffset;                // in frames within the buffer (zero if mBuffer is null)
-  float mVolume;                  // volume multiplier to apply (1.0f if mBuffer is nonnull)
+  TrackTicks mDuration; // in frames within the buffer
+  nsRefPtr<ThreadSharedObject> mBuffer; // the buffer object whose lifetime is managed; null means data is all zeroes
+  nsTArray<const void*> mChannelData; // one pointer per channel; empty if and only if mBuffer is null
+  float mVolume; // volume multiplier to apply (1.0f if mBuffer is nonnull)
+  SampleFormat mBufferFormat; // format of frames in mBuffer (only meaningful if mBuffer is nonnull)
 };

 /**
@ -83,16 +104,35 @@ public:
    NS_ASSERTION(IsInitialized(), "Not initialized");
    return mChannels;
  }
-  void AppendFrames(already_AddRefed<SharedBuffer> aBuffer, int32_t aBufferLength,
-                    int32_t aStart, int32_t aEnd, SampleFormat aFormat)
+  void AppendFrames(already_AddRefed<ThreadSharedObject> aBuffer,
+                    const nsTArray<const float*>& aChannelData,
+                    int32_t aDuration)
  {
    NS_ASSERTION(mChannels > 0, "Not initialized");
-    AudioChunk* chunk = AppendChunk(aEnd - aStart);
+    NS_ASSERTION(!aBuffer.get() || aChannelData.Length() == uint32_t(mChannels),
+                 "Wrong number of channels");
+    AudioChunk* chunk = AppendChunk(aDuration);
    chunk->mBuffer = aBuffer;
-    chunk->mBufferFormat = aFormat;
-    chunk->mBufferLength = aBufferLength;
-    chunk->mOffset = aStart;
+    for (uint32_t channel = 0; channel < aChannelData.Length(); ++channel) {
+      chunk->mChannelData.AppendElement(aChannelData[channel]);
+    }
    chunk->mVolume = 1.0f;
+    chunk->mBufferFormat = AUDIO_FORMAT_FLOAT32;
+  }
+  void AppendFrames(already_AddRefed<ThreadSharedObject> aBuffer,
+                    const nsTArray<const int16_t*>& aChannelData,
+                    int32_t aDuration)
+  {
+    NS_ASSERTION(mChannels > 0, "Not initialized");
+    NS_ASSERTION(!aBuffer.get() || aChannelData.Length() == uint32_t(mChannels),
+                 "Wrong number of channels");
+    AudioChunk* chunk = AppendChunk(aDuration);
+    chunk->mBuffer = aBuffer;
+    for (uint32_t channel = 0; channel < aChannelData.Length(); ++channel) {
+      chunk->mChannelData.AppendElement(aChannelData[channel]);
+    }
+    chunk->mVolume = 1.0f;
+    chunk->mBufferFormat = AUDIO_FORMAT_S16;
  }
  void ApplyVolume(float aVolume);
  /**
--- a/content/media/MediaDecoderStateMachine.cpp
+++ b/content/media/MediaDecoderStateMachine.cpp
@ -549,8 +549,12 @@ void MediaDecoderStateMachine::SendStreamAudio(AudioData* aAudio,

  aAudio->EnsureAudioBuffer();
  nsRefPtr<SharedBuffer> buffer = aAudio->mAudioBuffer;
-  aOutput->AppendFrames(buffer.forget(), aAudio->mFrames, int32_t(offset), aAudio->mFrames,
-                        AUDIO_OUTPUT_FORMAT);
+  AudioDataValue* bufferData = static_cast<AudioDataValue*>(buffer->Data());
+  nsAutoTArray<const AudioDataValue*,2> channels;
+  for (uint32_t i = 0; i < aAudio->mChannels; ++i) {
+    channels.AppendElement(bufferData + i*aAudio->mFrames + offset);
+  }
+  aOutput->AppendFrames(buffer.forget(), channels, aAudio->mFrames);
  LOG(PR_LOG_DEBUG, ("%p Decoder writing %d frames of data to MediaStream for AudioData at %lld",
                     mDecoder.get(), aAudio->mFrames - int32_t(offset), aAudio->mTime));
  aStream->mAudioFramesWritten += aAudio->mFrames - int32_t(offset);
--- a/content/media/SharedBuffer.h
+++ b/content/media/SharedBuffer.h
@ -12,6 +12,16 @@

 namespace mozilla {

+/**
+ * Base class for objects with a thread-safe refcount and a virtual
+ * destructor.
+ */
+class ThreadSharedObject {
+public:
+  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(ThreadSharedObject)
+  virtual ~ThreadSharedObject() {}
+};
+
 /**
 * Heap-allocated chunk of arbitrary data with threadsafe refcounting.
 * Typically you would allocate one of these, fill it in, and then treat it as
@ -20,15 +30,10 @@ namespace mozilla {
 * simply assume that the refcount is at least 4-byte aligned and its size
 * is divisible by 4.
 */
-class SharedBuffer {
+class SharedBuffer : public ThreadSharedObject {
 public:
-  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(SharedBuffer)
-  ~SharedBuffer() {}
-
  void* Data() { return this + 1; }

-  // Takes ownership of aData (which will be freed via moz_free()).
-  // aData consists of aChannels consecutive buffers, each of aLength samples.
  static already_AddRefed<SharedBuffer> Create(size_t aSize)
  {
    void* m = moz_xmalloc(sizeof(SharedBuffer) + aSize);
--- a/content/media/webrtc/MediaEngineWebRTCAudio.cpp
+++ b/content/media/webrtc/MediaEngineWebRTCAudio.cpp
@ -300,7 +300,9 @@ MediaEngineWebRTCAudioSource::Process(const int channel,

    AudioSegment segment;
    segment.Init(CHANNELS);
-    segment.AppendFrames(buffer.forget(), length, 0, length, AUDIO_FORMAT_S16);
+    nsAutoTArray<const sample*,1> channels;
+    channels.AppendElement(dest);
+    segment.AppendFrames(buffer.forget(), channels, length);

    SourceMediaStream *source = mSources[i];
    if (source) {
--- a/media/webrtc/signaling/src/mediapipeline/MediaPipeline.cpp
+++ b/media/webrtc/signaling/src/mediapipeline/MediaPipeline.cpp
@ -667,7 +667,7 @@ void MediaPipelineTransmit::PipelineListener::ProcessAudioChunk(
  nsAutoArrayPtr<int16_t> samples(new int16_t[chunk.mDuration]);

  if (chunk.mBuffer) {
-    switch(chunk.mBufferFormat) {
+    switch (chunk.mBufferFormat) {
      case AUDIO_FORMAT_FLOAT32:
        MOZ_MTLOG(PR_LOG_ERROR, "Can't process audio except in 16-bit PCM yet");
        MOZ_ASSERT(PR_FALSE);
@ -675,8 +675,7 @@ void MediaPipelineTransmit::PipelineListener::ProcessAudioChunk(
        break;
      case AUDIO_FORMAT_S16:
        {
-          const short* buf = static_cast<const short *>(chunk.mBuffer->Data()) +
-            chunk.mOffset;
+          const short* buf = static_cast<const short *>(chunk.mChannelData[0]);
          ConvertAudioSamplesWithScale(buf, samples, chunk.mDuration, chunk.mVolume);
        }
        break;
@ -833,11 +832,12 @@ NotifyPull(MediaStreamGraph* graph, StreamTime desired_time) {
  while (MillisecondsToMediaTime(played_) < desired_time) {
    // TODO(ekr@rtfm.com): Is there a way to avoid mallocating here?
    nsRefPtr<SharedBuffer> samples = SharedBuffer::Create(1000);
+    int16_t *samples_data = static_cast<int16_t *>(samples->Data());
    int samples_length;

    MediaConduitErrorCode err =
        static_cast<AudioSessionConduit*>(conduit_.get())->GetAudioFrame(
-            static_cast<int16_t *>(samples->Data()),
+            samples_data,
            16000,  // Sampling rate fixed at 16 kHz for now
            0,  // TODO(ekr@rtfm.com): better estimate of capture delay
            samples_length);
@ -849,8 +849,9 @@ NotifyPull(MediaStreamGraph* graph, StreamTime desired_time) {

    AudioSegment segment;
    segment.Init(1);
-    segment.AppendFrames(samples.forget(), samples_length,
-                         0, samples_length, AUDIO_FORMAT_S16);
+    nsAutoTArray<const int16_t*,1> channels;
+    channels.AppendElement(samples_data);
+    segment.AppendFrames(samples.forget(), channels, samples_length);

    source_->AppendToTrack(1,  // TODO(ekr@rtfm.com): Track ID
                           &segment);
--- a/media/webrtc/signaling/src/peerconnection/PeerConnectionMedia.h
+++ b/media/webrtc/signaling/src/peerconnection/PeerConnectionMedia.h
@ -63,17 +63,18 @@ Fake_AudioGenerator(nsDOMMediaStream* aStream) : mStream(aStream), mCount(0) {
  static void Callback(nsITimer* timer, void *arg) {
    Fake_AudioGenerator* gen = static_cast<Fake_AudioGenerator*>(arg);

-    nsRefPtr<mozilla::SharedBuffer> samples = mozilla::SharedBuffer::Create(1600 * 2 * sizeof(int16_t));
-    for (int i=0; i<1600*2; i++) {
-      reinterpret_cast<int16_t *>(samples->Data())[i] = ((gen->mCount % 8) * 4000) - (7*4000)/2;
+    nsRefPtr<mozilla::SharedBuffer> samples = mozilla::SharedBuffer::Create(1600 * sizeof(int16_t));
+    int16_t* data = static_cast<int16_t*>(samples->Data());
+    for (int i=0; i<1600; i++) {
+      data[i] = ((gen->mCount % 8) * 4000) - (7*4000)/2;
      ++gen->mCount;
    }

    mozilla::AudioSegment segment;
    segment.Init(1);
-    segment.AppendFrames(samples.forget(), 1600,
-                         0, 1600, mozilla::AUDIO_FORMAT_S16);
-
+    nsAutoTArray<const int16_t*,1> channelData;
+    channelData.AppendElement(data);
+    segment.AppendFrames(samples.forget(), channelData, 1600);
    gen->mStream->GetStream()->AsSourceStream()->AppendToTrack(1, &segment);
  }

--- a/media/webrtc/signaling/test/FakeMediaStreams.h
+++ b/media/webrtc/signaling/test/FakeMediaStreams.h
@ -120,7 +120,7 @@ class Fake_SourceMediaStream : public Fake_MediaStream {
        mozilla::AudioChunk& chunk = *(iter);
        MOZ_ASSERT(chunk.mBuffer);
        const int16_t* buf =
-                static_cast<const int16_t*>(chunk.mBuffer->Data());
+          static_cast<const int16_t*>(chunk.mChannelData[0]);
        for(int i=0; i<chunk.mDuration; i++) {
          if(buf[i]) {
            //atleast one non-zero sample found.
--- a/media/webrtc/signaling/test/FakeMediaStreamsImpl.h
+++ b/media/webrtc/signaling/test/FakeMediaStreamsImpl.h
@ -91,20 +91,18 @@ void Fake_AudioStreamSource::Periodic() {
  //Generate Signed 16 Bit Audio samples
  nsRefPtr<mozilla::SharedBuffer> samples =
    mozilla::SharedBuffer::Create(AUDIO_BUFFER_SIZE * NUM_CHANNELS * sizeof(int16_t));
+  int16_t* data = reinterpret_cast<int16_t *>(samples->Data());
  for(int i=0; i<(1600*2); i++) {
    //saw tooth audio sample
-    reinterpret_cast<int16_t *>(samples->Data())[i] =
-                              ((mCount % 8) * 4000) - (7*4000)/2;
+    data[i] = ((mCount % 8) * 4000) - (7*4000)/2;
    mCount++;
  }

  mozilla::AudioSegment segment;
  segment.Init(1);
-  segment.AppendFrames(samples.forget(),
-                       AUDIO_BUFFER_SIZE,
-                       0,
-                       AUDIO_BUFFER_SIZE,
-                       mozilla::AUDIO_FORMAT_S16);
+  nsAutoTArray<const int16_t *,1> channels;
+  channels.AppendElement(data);
+  segment.AppendFrames(samples.forget(), channels, AUDIO_BUFFER_SIZE);

  for(std::set<Fake_MediaStreamListener *>::iterator it = mListeners.begin();
       it != mListeners.end(); ++it) {