Bug 1271585: Proxy audio data to a separate thread for encoding r=pehrsons

MozReview-Commit-ID: 2s12qlaklWD
2024-10-17 15:25:52 +00:00 · 2016-05-12 15:34:13 -04:00 · 2016-05-12 15:34:13 -04:00 · 4388c19bb5
commit 4388c19bb5
parent 505d7dd5ea
3 changed files with 141 additions and 82 deletions
--- a/media/webrtc/signaling/src/media-conduit/VideoConduit.h
+++ b/media/webrtc/signaling/src/media-conduit/VideoConduit.h
@ -8,7 +8,6 @@
 #include "nsAutoPtr.h"
 #include "mozilla/Attributes.h"
 #include "mozilla/Atomics.h"
-#include "mozilla/SharedThreadPool.h"

 #include "MediaConduitInterface.h"
 #include "MediaEngineWrapper.h"
--- a/media/webrtc/signaling/src/mediapipeline/MediaPipeline.cpp
+++ b/media/webrtc/signaling/src/mediapipeline/MediaPipeline.cpp
@ -42,6 +42,7 @@
 #include "transportlayerice.h"
 #include "runnable_utils.h"
 #include "libyuv/convert.h"
+#include "mozilla/SharedThreadPool.h"
 #if !defined(MOZILLA_EXTERNAL_LINKAGE)
 #include "mozilla/PeerIdentity.h"
 #include "mozilla/TaskQueue.h"
@ -478,6 +479,128 @@ protected:
 };
 #endif

+// An async inserter for audio data, to avoid running audio codec encoders
+// on the MSG/input audio thread.  Basically just bounces all the audio
+// data to a single audio processing/input queue.  We could if we wanted to
+// use multiple threads and a TaskQueue.
+class AudioProxyThread
+{
+public:
+  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(AudioProxyThread)
+
+  AudioProxyThread()
+  {
+    MOZ_COUNT_CTOR(AudioProxyThread);
+
+#if !defined(MOZILLA_EXTERNAL_LINKAGE)
+    // Use only 1 thread; also forces FIFO operation
+    // We could use multiple threads, but that may be dicier with the webrtc.org
+    // code.  If so we'd need to use TaskQueues like the videoframe converter
+    RefPtr<SharedThreadPool> pool =
+      SharedThreadPool::Get(NS_LITERAL_CSTRING("AudioProxy"), 1);
+
+    mThread = pool.get();
+#else
+    nsCOMPtr<nsIThread> thread;
+    if (!NS_WARN_IF(NS_FAILED(NS_NewNamedThread("AudioProxy", getter_AddRefs(thread))))) {
+      mThread = thread;
+    }
+#endif
+  }
+
+  // called on mThread
+  void InternalProcessAudioChunk(
+    AudioSessionConduit *conduit,
+    TrackRate rate,
+    AudioChunk& chunk,
+    bool enabled) {
+
+    // Convert to interleaved, 16-bits integer audio, with a maximum of two
+    // channels (since the WebRTC.org code below makes the assumption that the
+    // input audio is either mono or stereo).
+    uint32_t outputChannels = chunk.ChannelCount() == 1 ? 1 : 2;
+    const int16_t* samples = nullptr;
+    UniquePtr<int16_t[]> convertedSamples;
+
+    // We take advantage of the fact that the common case (microphone directly to
+    // PeerConnection, that is, a normal call), the samples are already 16-bits
+    // mono, so the representation in interleaved and planar is the same, and we
+    // can just use that.
+    if (enabled && outputChannels == 1 && chunk.mBufferFormat == AUDIO_FORMAT_S16) {
+      samples = chunk.ChannelData<int16_t>().Elements()[0];
+    } else {
+      convertedSamples = MakeUnique<int16_t[]>(chunk.mDuration * outputChannels);
+
+      if (!enabled || chunk.mBufferFormat == AUDIO_FORMAT_SILENCE) {
+        PodZero(convertedSamples.get(), chunk.mDuration * outputChannels);
+      } else if (chunk.mBufferFormat == AUDIO_FORMAT_FLOAT32) {
+        DownmixAndInterleave(chunk.ChannelData<float>(),
+                             chunk.mDuration, chunk.mVolume, outputChannels,
+                             convertedSamples.get());
+      } else if (chunk.mBufferFormat == AUDIO_FORMAT_S16) {
+        DownmixAndInterleave(chunk.ChannelData<int16_t>(),
+                             chunk.mDuration, chunk.mVolume, outputChannels,
+                             convertedSamples.get());
+      }
+      samples = convertedSamples.get();
+    }
+
+    MOZ_ASSERT(!(rate%100)); // rate should be a multiple of 100
+
+    // Check if the rate or the number of channels has changed since the last time
+    // we came through. I realize it may be overkill to check if the rate has
+    // changed, but I believe it is possible (e.g. if we change sources) and it
+    // costs us very little to handle this case.
+
+    uint32_t audio_10ms = rate / 100;
+
+    if (!packetizer_ ||
+        packetizer_->PacketSize() != audio_10ms ||
+        packetizer_->Channels() != outputChannels) {
+      // It's ok to drop the audio still in the packetizer here.
+      packetizer_ = new AudioPacketizer<int16_t, int16_t>(audio_10ms, outputChannels);
+    }
+
+    packetizer_->Input(samples, chunk.mDuration);
+
+    while (packetizer_->PacketsAvailable()) {
+      uint32_t samplesPerPacket = packetizer_->PacketSize() *
+                                  packetizer_->Channels();
+
+      // We know that webrtc.org's code going to copy the samples down the line,
+      // so we can just use a stack buffer here instead of malloc-ing.
+      // Max size given stereo is 480*2*2 = 1920 (10ms of 16-bits stereo audio at
+      // 48KHz)
+      const size_t AUDIO_SAMPLE_BUFFER_MAX = 1920;
+      int16_t packet[AUDIO_SAMPLE_BUFFER_MAX];
+
+      packetizer_->Output(packet);
+      conduit->SendAudioFrame(packet,
+                              samplesPerPacket,
+                              rate, 0);
+    }
+  }
+
+  void QueueAudioChunk(AudioSessionConduit *conduit,
+                       TrackRate rate, AudioChunk& chunk, bool enabled)
+  {
+    RUN_ON_THREAD(mThread,
+                  WrapRunnable(this, &AudioProxyThread::InternalProcessAudioChunk,
+                               conduit, rate, chunk, enabled),
+                  NS_DISPATCH_NORMAL);
+  }
+
+protected:
+  virtual ~AudioProxyThread()
+  {
+    MOZ_COUNT_DTOR(AudioProxyThread);
+  }
+
+  nsCOMPtr<nsIEventTarget> mThread;
+  // Only accessed on mThread
+  nsAutoPtr<AudioPacketizer<int16_t, int16_t>> packetizer_;
+};
+
 static char kDTLSExporterLabel[] = "EXTRACTOR-dtls_srtp";

 MediaPipeline::MediaPipeline(const std::string& pc,
@ -1102,8 +1225,7 @@ public:
      track_id_external_(TRACK_INVALID),
      active_(false),
      enabled_(false),
-      direct_connect_(false),
-      packetizer_(nullptr)
+      direct_connect_(false)
  {
  }

@ -1136,6 +1258,13 @@ public:
  void SetActive(bool active) { active_ = active; }
  void SetEnabled(bool enabled) { enabled_ = enabled; }

+  // These are needed since nested classes don't have access to any particular
+  // instance of the parent
+  void SetAudioProxy(const RefPtr<AudioProxyThread>& proxy)
+  {
+    audio_processing_ = proxy;
+  }
+
 #if !defined(MOZILLA_EXTERNAL_LINKAGE)
  void SetVideoFrameConverter(const RefPtr<VideoFrameConverter>& converter)
  {
@ -1183,10 +1312,8 @@ private:
               StreamTime offset,
               const MediaSegment& media);

-  virtual void ProcessAudioChunk(AudioSessionConduit *conduit,
-                                 TrackRate rate, AudioChunk& chunk);
-
  RefPtr<MediaSessionConduit> conduit_;
+  RefPtr<AudioProxyThread> audio_processing_;
 #if !defined(MOZILLA_EXTERNAL_LINKAGE)
  RefPtr<VideoFrameConverter> converter_;
 #endif
@ -1206,8 +1333,6 @@ private:

  // Written and read on the MediaStreamGraph thread
  bool direct_connect_;
-
-  nsAutoPtr<AudioPacketizer<int16_t, int16_t>> packetizer_;
 };

 #if !defined(MOZILLA_EXTERNAL_LINKAGE)
@ -1290,8 +1415,12 @@ MediaPipelineTransmit::MediaPipelineTransmit(
  listener_(new PipelineListener(conduit)),
  domtrack_(domtrack)
 {
+  if (!IsVideo()) {
+    audio_processing_ = MakeAndAddRef<AudioProxyThread>();
+    listener_->SetAudioProxy(audio_processing_);
+  }
 #if !defined(MOZILLA_EXTERNAL_LINKAGE)
-  if (IsVideo()) {
+  else { // Video
    // For video we send frames to an async VideoFrameConverter that calls
    // back to a VideoFrameFeeder that feeds I420 frames to VideoConduit.

@ -1655,8 +1784,8 @@ NewData(MediaStreamGraph* graph,
 #else
      rate = graph->GraphRate();
 #endif
-      ProcessAudioChunk(static_cast<AudioSessionConduit*>(conduit_.get()),
-                        rate, *iter);
+      audio_processing_->QueueAudioChunk(static_cast<AudioSessionConduit*>(conduit_.get()),
+                                         rate, *iter, enabled_);
      iter.Next();
    }
  } else if (media.GetType() == MediaSegment::VIDEO) {
@ -1675,77 +1804,6 @@ NewData(MediaStreamGraph* graph,
  }
 }

-void MediaPipelineTransmit::PipelineListener::ProcessAudioChunk(
-    AudioSessionConduit *conduit,
-    TrackRate rate,
-    AudioChunk& chunk) {
-
-  // Convert to interleaved, 16-bits integer audio, with a maximum of two
-  // channels (since the WebRTC.org code below makes the assumption that the
-  // input audio is either mono or stereo).
-  uint32_t outputChannels = chunk.ChannelCount() == 1 ? 1 : 2;
-  const int16_t* samples = nullptr;
-  UniquePtr<int16_t[]> convertedSamples;
-
-  // We take advantage of the fact that the common case (microphone directly to
-  // PeerConnection, that is, a normal call), the samples are already 16-bits
-  // mono, so the representation in interleaved and planar is the same, and we
-  // can just use that.
-  if (enabled_ && outputChannels == 1 && chunk.mBufferFormat == AUDIO_FORMAT_S16) {
-    samples = chunk.ChannelData<int16_t>().Elements()[0];
-  } else {
-    convertedSamples = MakeUnique<int16_t[]>(chunk.mDuration * outputChannels);
-
-    if (!enabled_ || chunk.mBufferFormat == AUDIO_FORMAT_SILENCE) {
-      PodZero(convertedSamples.get(), chunk.mDuration * outputChannels);
-    } else if (chunk.mBufferFormat == AUDIO_FORMAT_FLOAT32) {
-      DownmixAndInterleave(chunk.ChannelData<float>(),
-                           chunk.mDuration, chunk.mVolume, outputChannels,
-                           convertedSamples.get());
-    } else if (chunk.mBufferFormat == AUDIO_FORMAT_S16) {
-      DownmixAndInterleave(chunk.ChannelData<int16_t>(),
-                           chunk.mDuration, chunk.mVolume, outputChannels,
-                           convertedSamples.get());
-    }
-    samples = convertedSamples.get();
-  }
-
-  MOZ_ASSERT(!(rate%100)); // rate should be a multiple of 100
-
-  // Check if the rate or the number of channels has changed since the last time
-  // we came through. I realize it may be overkill to check if the rate has
-  // changed, but I believe it is possible (e.g. if we change sources) and it
-  // costs us very little to handle this case.
-
-  uint32_t audio_10ms = rate / 100;
-
-  if (!packetizer_ ||
-      packetizer_->PacketSize() != audio_10ms ||
-      packetizer_->Channels() != outputChannels) {
-    // It's ok to drop the audio still in the packetizer here.
-    packetizer_ = new AudioPacketizer<int16_t, int16_t>(audio_10ms, outputChannels);
-   }
-
-  packetizer_->Input(samples, chunk.mDuration);
-
-  while (packetizer_->PacketsAvailable()) {
-    uint32_t samplesPerPacket = packetizer_->PacketSize() *
-                                packetizer_->Channels();
-
-    // We know that webrtc.org's code going to copy the samples down the line,
-    // so we can just use a stack buffer here instead of malloc-ing.
-    // Max size given stereo is 480*2*2 = 1920 (10ms of 16-bits stereo audio at
-    // 48KHz)
-    const size_t AUDIO_SAMPLE_BUFFER_MAX = 1920;
-    int16_t packet[AUDIO_SAMPLE_BUFFER_MAX];
-
-    packetizer_->Output(packet);
-    conduit->SendAudioFrame(packet,
-                            samplesPerPacket,
-                            rate, 0);
-  }
-}
-
 class TrackAddedCallback {
 public:
  virtual void TrackAdded(TrackTicks current_ticks) = 0;
--- a/media/webrtc/signaling/src/mediapipeline/MediaPipeline.h
+++ b/media/webrtc/signaling/src/mediapipeline/MediaPipeline.h
@ -30,6 +30,7 @@ class nsIPrincipal;
 namespace mozilla {
 class MediaPipelineFilter;
 class PeerIdentity;
+class AudioProxyThread;
 #if !defined(MOZILLA_EXTERNAL_LINKAGE)
 class VideoFrameConverter;
 #endif
@ -344,6 +345,7 @@ public:

 private:
  RefPtr<PipelineListener> listener_;
+  RefPtr<AudioProxyThread> audio_processing_;
 #if !defined(MOZILLA_EXTERNAL_LINKAGE)
  RefPtr<VideoFrameFeeder> feeder_;
  RefPtr<VideoFrameConverter> converter_;