diff --git a/dom/media/webrtc/MediaEngineWebRTC.cpp b/dom/media/webrtc/MediaEngineWebRTC.cpp
index 844d2829511b..40cbf498ce8e 100644
--- a/dom/media/webrtc/MediaEngineWebRTC.cpp
+++ b/dom/media/webrtc/MediaEngineWebRTC.cpp
@@ -127,6 +127,9 @@ MediaEngineWebRTC::MediaEngineWebRTC(MediaEnginePrefs &aPrefs)
 #endif
   // XXX
   gFarendObserver = new AudioOutputObserver();
+
+  NS_NewNamedThread("AudioGUM", getter_AddRefs(mThread));
+  MOZ_ASSERT(mThread);
 }
 
 void
@@ -407,7 +410,7 @@ MediaEngineWebRTC::EnumerateAudioDevices(dom::MediaSourceEnum aMediaSource,
         // XXX Small window where the device list/index could change!
         audioinput = new mozilla::AudioInputCubeb(mVoiceEngine, i);
       }
-      aSource = new MediaEngineWebRTCMicrophoneSource(mVoiceEngine, audioinput,
+      aSource = new MediaEngineWebRTCMicrophoneSource(mThread, mVoiceEngine, audioinput,
                                                       i, deviceName, uniqueId);
       mAudioSources.Put(uuid, aSource); // Hashtable takes ownership.
       aASources->AppendElement(aSource);
@@ -448,6 +451,11 @@ MediaEngineWebRTC::Shutdown()
 
   mozilla::camera::Shutdown();
   AudioInputCubeb::CleanupGlobalData();
+
+  if (mThread) {
+    mThread->Shutdown();
+    mThread = nullptr;
+  }
 }
 
 }
diff --git a/dom/media/webrtc/MediaEngineWebRTC.h b/dom/media/webrtc/MediaEngineWebRTC.h
index 7b377af1cd06..648c32907ad0 100644
--- a/dom/media/webrtc/MediaEngineWebRTC.h
+++ b/dom/media/webrtc/MediaEngineWebRTC.h
@@ -416,7 +416,8 @@ class MediaEngineWebRTCMicrophoneSource : public MediaEngineAudioSource,
                                           private MediaConstraintsHelper
 {
 public:
-  MediaEngineWebRTCMicrophoneSource(webrtc::VoiceEngine* aVoiceEnginePtr,
+  MediaEngineWebRTCMicrophoneSource(nsIThread* aThread,
+                                    webrtc::VoiceEngine* aVoiceEnginePtr,
                                     mozilla::AudioInput* aAudioInput,
                                     int aIndex,
                                     const char* name,
@@ -425,6 +426,7 @@ public:
     , mVoiceEngine(aVoiceEnginePtr)
     , mAudioInput(aAudioInput)
     , mMonitor("WebRTCMic.Monitor")
+    , mThread(aThread)
     , mCapIndex(aIndex)
     , mChannel(-1)
     , mNrAllocations(0)
@@ -533,6 +535,7 @@ private:
   Monitor mMonitor;
   nsTArray<RefPtr<SourceMediaStream>> mSources;
   nsTArray<PrincipalHandle> mPrincipalHandles; // Maps to mSources.
+  nsCOMPtr<nsIThread> mThread;
   int mCapIndex;
   int mChannel;
   int mNrAllocations; // When this becomes 0, we shut down HW
diff --git a/dom/media/webrtc/MediaEngineWebRTCAudio.cpp b/dom/media/webrtc/MediaEngineWebRTCAudio.cpp
index ee1d8b1f16d5..36b0d6effe56 100644
--- a/dom/media/webrtc/MediaEngineWebRTCAudio.cpp
+++ b/dom/media/webrtc/MediaEngineWebRTCAudio.cpp
@@ -730,7 +730,11 @@ MediaEngineWebRTCMicrophoneSource::Process(int channel,
 
       // This is safe from any thread, and is safe if the track is Finished
       // or Destroyed.
-      mSources[i]->AppendToTrack(mTrackID, segment);
+      // Note: due to evil magic, the nsAutoPtr<AudioSegment>'s ownership transfers to
+      // the Runnable (AutoPtr<> = AutoPtr<>)
+      RUN_ON_THREAD(mThread, WrapRunnable(mSources[i], &SourceMediaStream::AppendToTrack,
+                                          mTrackID, segment, (AudioSegment *) nullptr),
+                    NS_DISPATCH_NORMAL);
     }
   }
 
diff --git a/media/webrtc/signaling/src/media-conduit/VideoConduit.h b/media/webrtc/signaling/src/media-conduit/VideoConduit.h
index f3961b2c9620..328d1dbdafe7 100755
--- a/media/webrtc/signaling/src/media-conduit/VideoConduit.h
+++ b/media/webrtc/signaling/src/media-conduit/VideoConduit.h
@@ -8,6 +8,7 @@
 #include "nsAutoPtr.h"
 #include "mozilla/Attributes.h"
 #include "mozilla/Atomics.h"
+#include "mozilla/SharedThreadPool.h"
 
 #include "MediaConduitInterface.h"
 #include "MediaEngineWrapper.h"
diff --git a/media/webrtc/signaling/src/mediapipeline/MediaPipeline.cpp b/media/webrtc/signaling/src/mediapipeline/MediaPipeline.cpp
index a631efd1616e..f7d5ec0a602e 100644
--- a/media/webrtc/signaling/src/mediapipeline/MediaPipeline.cpp
+++ b/media/webrtc/signaling/src/mediapipeline/MediaPipeline.cpp
@@ -42,7 +42,6 @@
 #include "transportlayerice.h"
 #include "runnable_utils.h"
 #include "libyuv/convert.h"
-#include "mozilla/SharedThreadPool.h"
 #if !defined(MOZILLA_EXTERNAL_LINKAGE)
 #include "mozilla/PeerIdentity.h"
 #include "mozilla/TaskQueue.h"
@@ -479,128 +478,6 @@ protected:
 };
 #endif
 
-// An async inserter for audio data, to avoid running audio codec encoders
-// on the MSG/input audio thread.  Basically just bounces all the audio
-// data to a single audio processing/input queue.  We could if we wanted to
-// use multiple threads and a TaskQueue.
-class AudioProxyThread
-{
-public:
-  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(AudioProxyThread)
-
-  AudioProxyThread()
-  {
-    MOZ_COUNT_CTOR(AudioProxyThread);
-
-#if !defined(MOZILLA_EXTERNAL_LINKAGE)
-    // Use only 1 thread; also forces FIFO operation
-    // We could use multiple threads, but that may be dicier with the webrtc.org
-    // code.  If so we'd need to use TaskQueues like the videoframe converter
-    RefPtr<SharedThreadPool> pool =
-      SharedThreadPool::Get(NS_LITERAL_CSTRING("AudioProxy"), 1);
-
-    mThread = pool.get();
-#else
-    nsCOMPtr<nsIThread> thread;
-    if (!NS_WARN_IF(NS_FAILED(NS_NewNamedThread("AudioProxy", getter_AddRefs(thread))))) {
-      mThread = thread;
-    }
-#endif
-  }
-
-  // called on mThread
-  void InternalProcessAudioChunk(
-    AudioSessionConduit *conduit,
-    TrackRate rate,
-    AudioChunk& chunk,
-    bool enabled) {
-
-    // Convert to interleaved, 16-bits integer audio, with a maximum of two
-    // channels (since the WebRTC.org code below makes the assumption that the
-    // input audio is either mono or stereo).
-    uint32_t outputChannels = chunk.ChannelCount() == 1 ? 1 : 2;
-    const int16_t* samples = nullptr;
-    UniquePtr<int16_t[]> convertedSamples;
-
-    // We take advantage of the fact that the common case (microphone directly to
-    // PeerConnection, that is, a normal call), the samples are already 16-bits
-    // mono, so the representation in interleaved and planar is the same, and we
-    // can just use that.
-    if (enabled && outputChannels == 1 && chunk.mBufferFormat == AUDIO_FORMAT_S16) {
-      samples = chunk.ChannelData<int16_t>().Elements()[0];
-    } else {
-      convertedSamples = MakeUnique<int16_t[]>(chunk.mDuration * outputChannels);
-
-      if (!enabled || chunk.mBufferFormat == AUDIO_FORMAT_SILENCE) {
-        PodZero(convertedSamples.get(), chunk.mDuration * outputChannels);
-      } else if (chunk.mBufferFormat == AUDIO_FORMAT_FLOAT32) {
-        DownmixAndInterleave(chunk.ChannelData<float>(),
-                             chunk.mDuration, chunk.mVolume, outputChannels,
-                             convertedSamples.get());
-      } else if (chunk.mBufferFormat == AUDIO_FORMAT_S16) {
-        DownmixAndInterleave(chunk.ChannelData<int16_t>(),
-                             chunk.mDuration, chunk.mVolume, outputChannels,
-                             convertedSamples.get());
-      }
-      samples = convertedSamples.get();
-    }
-
-    MOZ_ASSERT(!(rate%100)); // rate should be a multiple of 100
-
-    // Check if the rate or the number of channels has changed since the last time
-    // we came through. I realize it may be overkill to check if the rate has
-    // changed, but I believe it is possible (e.g. if we change sources) and it
-    // costs us very little to handle this case.
-
-    uint32_t audio_10ms = rate / 100;
-
-    if (!packetizer_ ||
-        packetizer_->PacketSize() != audio_10ms ||
-        packetizer_->Channels() != outputChannels) {
-      // It's ok to drop the audio still in the packetizer here.
-      packetizer_ = new AudioPacketizer<int16_t, int16_t>(audio_10ms, outputChannels);
-    }
-
-    packetizer_->Input(samples, chunk.mDuration);
-
-    while (packetizer_->PacketsAvailable()) {
-      uint32_t samplesPerPacket = packetizer_->PacketSize() *
-                                  packetizer_->Channels();
-
-      // We know that webrtc.org's code going to copy the samples down the line,
-      // so we can just use a stack buffer here instead of malloc-ing.
-      // Max size given stereo is 480*2*2 = 1920 (10ms of 16-bits stereo audio at
-      // 48KHz)
-      const size_t AUDIO_SAMPLE_BUFFER_MAX = 1920;
-      int16_t packet[AUDIO_SAMPLE_BUFFER_MAX];
-
-      packetizer_->Output(packet);
-      conduit->SendAudioFrame(packet,
-                              samplesPerPacket,
-                              rate, 0);
-    }
-  }
-
-  void QueueAudioChunk(AudioSessionConduit *conduit,
-                       TrackRate rate, AudioChunk& chunk, bool enabled)
-  {
-    RUN_ON_THREAD(mThread,
-                  WrapRunnable(this, &AudioProxyThread::InternalProcessAudioChunk,
-                               conduit, rate, chunk, enabled),
-                  NS_DISPATCH_NORMAL);
-  }
-
-protected:
-  virtual ~AudioProxyThread()
-  {
-    MOZ_COUNT_DTOR(AudioProxyThread);
-  }
-
-  nsCOMPtr<nsIEventTarget> mThread;
-  // Only accessed on mThread
-  nsAutoPtr<AudioPacketizer<int16_t, int16_t>> packetizer_;
-};
-
 static char kDTLSExporterLabel[] = "EXTRACTOR-dtls_srtp";
 
 MediaPipeline::MediaPipeline(const std::string& pc,
@@ -1225,7 +1102,8 @@ public:
       track_id_external_(TRACK_INVALID),
       active_(false),
       enabled_(false),
-      direct_connect_(false)
+      direct_connect_(false),
+      packetizer_(nullptr)
   {
   }
 
@@ -1258,13 +1136,6 @@ public:
   void SetActive(bool active) { active_ = active; }
   void SetEnabled(bool enabled) { enabled_ = enabled; }
 
-  // These are needed since nested classes don't have access to any particular
-  // instance of the parent
-  void SetAudioProxy(const RefPtr<AudioProxyThread>& proxy)
-  {
-    audio_processing_ = proxy;
-  }
-
 #if !defined(MOZILLA_EXTERNAL_LINKAGE)
   void SetVideoFrameConverter(const RefPtr<VideoFrameConverter>& converter)
   {
@@ -1312,8 +1183,10 @@ private:
                StreamTime offset,
                const MediaSegment& media);
 
+  virtual void ProcessAudioChunk(AudioSessionConduit *conduit,
+                                 TrackRate rate, AudioChunk& chunk);
+
   RefPtr<MediaSessionConduit> conduit_;
-  RefPtr<AudioProxyThread> audio_processing_;
 #if !defined(MOZILLA_EXTERNAL_LINKAGE)
   RefPtr<VideoFrameConverter> converter_;
 #endif
@@ -1333,6 +1206,8 @@ private:
 
   // Written and read on the MediaStreamGraph thread
   bool direct_connect_;
+
+  nsAutoPtr<AudioPacketizer<int16_t, int16_t>> packetizer_;
 };
 
 #if !defined(MOZILLA_EXTERNAL_LINKAGE)
@@ -1415,12 +1290,8 @@ MediaPipelineTransmit::MediaPipelineTransmit(
   listener_(new PipelineListener(conduit)),
   domtrack_(domtrack)
 {
-  if (!IsVideo()) {
-    audio_processing_ = MakeAndAddRef<AudioProxyThread>();
-    listener_->SetAudioProxy(audio_processing_);
-  }
 #if !defined(MOZILLA_EXTERNAL_LINKAGE)
-  else { // Video
+  if (IsVideo()) {
     // For video we send frames to an async VideoFrameConverter that calls
     // back to a VideoFrameFeeder that feeds I420 frames to VideoConduit.
 
@@ -1784,8 +1655,8 @@ NewData(MediaStreamGraph* graph,
 #else
       rate = graph->GraphRate();
 #endif
-      audio_processing_->QueueAudioChunk(static_cast<AudioSessionConduit*>(conduit_.get()),
-                                         rate, *iter, enabled_);
+      ProcessAudioChunk(static_cast<AudioSessionConduit*>(conduit_.get()),
+                        rate, *iter);
       iter.Next();
     }
   } else if (media.GetType() == MediaSegment::VIDEO) {
@@ -1804,6 +1675,77 @@ NewData(MediaStreamGraph* graph,
   }
 }
 
+void MediaPipelineTransmit::PipelineListener::ProcessAudioChunk(
+    AudioSessionConduit *conduit,
+    TrackRate rate,
+    AudioChunk& chunk) {
+
+  // Convert to interleaved, 16-bits integer audio, with a maximum of two
+  // channels (since the WebRTC.org code below makes the assumption that the
+  // input audio is either mono or stereo).
+  uint32_t outputChannels = chunk.ChannelCount() == 1 ? 1 : 2;
+  const int16_t* samples = nullptr;
+  UniquePtr<int16_t[]> convertedSamples;
+
+  // We take advantage of the fact that the common case (microphone directly to
+  // PeerConnection, that is, a normal call), the samples are already 16-bits
+  // mono, so the representation in interleaved and planar is the same, and we
+  // can just use that.
+  if (enabled_ && outputChannels == 1 && chunk.mBufferFormat == AUDIO_FORMAT_S16) {
+    samples = chunk.ChannelData<int16_t>().Elements()[0];
+  } else {
+    convertedSamples = MakeUnique<int16_t[]>(chunk.mDuration * outputChannels);
+
+    if (!enabled_ || chunk.mBufferFormat == AUDIO_FORMAT_SILENCE) {
+      PodZero(convertedSamples.get(), chunk.mDuration * outputChannels);
+    } else if (chunk.mBufferFormat == AUDIO_FORMAT_FLOAT32) {
+      DownmixAndInterleave(chunk.ChannelData<float>(),
+                           chunk.mDuration, chunk.mVolume, outputChannels,
+                           convertedSamples.get());
+    } else if (chunk.mBufferFormat == AUDIO_FORMAT_S16) {
+      DownmixAndInterleave(chunk.ChannelData<int16_t>(),
+                           chunk.mDuration, chunk.mVolume, outputChannels,
+                           convertedSamples.get());
+    }
+    samples = convertedSamples.get();
+  }
+
+  MOZ_ASSERT(!(rate%100)); // rate should be a multiple of 100
+
+  // Check if the rate or the number of channels has changed since the last time
+  // we came through. I realize it may be overkill to check if the rate has
+  // changed, but I believe it is possible (e.g. if we change sources) and it
+  // costs us very little to handle this case.
+
+  uint32_t audio_10ms = rate / 100;
+
+  if (!packetizer_ ||
+      packetizer_->PacketSize() != audio_10ms ||
+      packetizer_->Channels() != outputChannels) {
+    // It's ok to drop the audio still in the packetizer here.
+    packetizer_ = new AudioPacketizer<int16_t, int16_t>(audio_10ms, outputChannels);
+   }
+
+  packetizer_->Input(samples, chunk.mDuration);
+
+  while (packetizer_->PacketsAvailable()) {
+    uint32_t samplesPerPacket = packetizer_->PacketSize() *
+                                packetizer_->Channels();
+
+    // We know that webrtc.org's code going to copy the samples down the line,
+    // so we can just use a stack buffer here instead of malloc-ing.
+    // Max size given stereo is 480*2*2 = 1920 (10ms of 16-bits stereo audio at
+    // 48KHz)
+    const size_t AUDIO_SAMPLE_BUFFER_MAX = 1920;
+    int16_t packet[AUDIO_SAMPLE_BUFFER_MAX];
+
+    packetizer_->Output(packet);
+    conduit->SendAudioFrame(packet,
+                            samplesPerPacket,
+                            rate, 0);
+  }
+}
+
 class TrackAddedCallback {
  public:
   virtual void TrackAdded(TrackTicks current_ticks) = 0;
diff --git a/media/webrtc/signaling/src/mediapipeline/MediaPipeline.h b/media/webrtc/signaling/src/mediapipeline/MediaPipeline.h
index 8c9a27fa2b88..e220008be9d7 100644
--- a/media/webrtc/signaling/src/mediapipeline/MediaPipeline.h
+++ b/media/webrtc/signaling/src/mediapipeline/MediaPipeline.h
@@ -30,7 +30,6 @@ class nsIPrincipal;
 namespace mozilla {
 class MediaPipelineFilter;
 class PeerIdentity;
-class AudioProxyThread;
 #if !defined(MOZILLA_EXTERNAL_LINKAGE)
 class VideoFrameConverter;
 #endif
@@ -345,7 +344,6 @@ public:
 
  private:
   RefPtr<PipelineListener> listener_;
-  RefPtr<AudioProxyThread> audio_processing_;
 #if !defined(MOZILLA_EXTERNAL_LINKAGE)
   RefPtr<VideoFrameFeeder> feeder_;
   RefPtr<VideoFrameConverter> converter_;