Bug 919215 - Start the AudioStream on creation when in low-latency mode, and let it underrun. r=roc

The BufferedAudioStream buffers the data it gets through the Write() calls and what is consumed by the callback. This means that if the audio producer starts Write()ing data right after Start()ing the stream, data will accumulate in this buffer and won't be consumed. Eventually, the buffer will be of a certain size before it begins to be consumed by the callback, and this means an umcompressible latency (because the data will be written at more or less the same rate as it is produced). This patch start the BufferedAudioStream right away when it is created, dropping the silent AudioSegment until it finds real data (and padding with silence is then done at the beginning). The stream will underrun, but the callback will synthetize silence, avoiding overbuffering in the BufferedAudioStream. This ensures minimal latency cause by the buffering. Note that the clock will still advance, so this will not change the behavior of content that has leading silence.
2025-02-07 23:43:37 +00:00 · 2013-11-19 10:43:15 +13:00 · 2013-11-19 10:43:15 +13:00 · 7f90ed61c6
commit 7f90ed61c6
parent 1c2bc77c17
3 changed files with 80 additions and 25 deletions
--- a/content/media/AudioSegment.cpp
+++ b/content/media/AudioSegment.cpp
@ -126,36 +126,45 @@ AudioSegment::WriteTo(uint64_t aID, AudioStream* aOutput)
        NS_ERROR("Buffer overflow");
        return;
      }
+
      uint32_t duration = uint32_t(durationTicks);
-      buf.SetLength(outputChannels*duration);
-      if (c.mBuffer) {
-        channelData.SetLength(c.mChannelData.Length());
-        for (uint32_t i = 0; i < channelData.Length(); ++i) {
-          channelData[i] =
-            AddAudioSampleOffset(c.mChannelData[i], c.mBufferFormat, int32_t(offset));
-        }

-        if (channelData.Length() < outputChannels) {
-          // Up-mix. Note that this might actually make channelData have more
-          // than outputChannels temporarily.
-          AudioChannelsUpMix(&channelData, outputChannels, gZeroChannel);
-        }
+      // If we have written data in the past, or we have real (non-silent) data
+      // to write, we can proceed. Otherwise, it means we just started the
+      // AudioStream, and we don't have real data to write to it (just silence).
+      // To avoid overbuffering in the AudioStream, we simply drop the silence,
+      // here. The stream will underrun and output silence anyways.
+      if (c.mBuffer || aOutput->GetWritten()) {
+        buf.SetLength(outputChannels*duration);
+        if (c.mBuffer) {
+          channelData.SetLength(c.mChannelData.Length());
+          for (uint32_t i = 0; i < channelData.Length(); ++i) {
+            channelData[i] =
+              AddAudioSampleOffset(c.mChannelData[i], c.mBufferFormat, int32_t(offset));
+          }

-        if (channelData.Length() > outputChannels) {
-          // Down-mix.
-          DownmixAndInterleave(channelData, c.mBufferFormat, duration,
-                               c.mVolume, outputChannels, buf.Elements());
+          if (channelData.Length() < outputChannels) {
+            // Up-mix. Note that this might actually make channelData have more
+            // than outputChannels temporarily.
+            AudioChannelsUpMix(&channelData, outputChannels, gZeroChannel);
+          }
+
+          if (channelData.Length() > outputChannels) {
+            // Down-mix.
+            DownmixAndInterleave(channelData, c.mBufferFormat, duration,
+                                 c.mVolume, outputChannels, buf.Elements());
+          } else {
+            InterleaveAndConvertBuffer(channelData.Elements(), c.mBufferFormat,
+                                       duration, c.mVolume,
+                                       outputChannels,
+                                       buf.Elements());
+          }
        } else {
-          InterleaveAndConvertBuffer(channelData.Elements(), c.mBufferFormat,
-                                     duration, c.mVolume,
-                                     outputChannels,
-                                     buf.Elements());
+          // Assumes that a bit pattern of zeroes == 0.0f
+          memset(buf.Elements(), 0, buf.Length()*sizeof(AudioDataValue));
        }
-      } else {
-        // Assumes that a bit pattern of zeroes == 0.0f
-        memset(buf.Elements(), 0, buf.Length()*sizeof(AudioDataValue));
+        aOutput->Write(buf.Elements(), int32_t(duration), &(c.mTimeStamp));
      }
-      aOutput->Write(buf.Elements(), int32_t(duration), &(c.mTimeStamp));
      if(!c.mTimeStamp.IsNull()) {
        TimeStamp now = TimeStamp::Now();
        // would be more efficient to c.mTimeStamp to ms on create time then pass here
--- a/content/media/AudioStream.cpp
+++ b/content/media/AudioStream.cpp
@ -144,6 +144,7 @@ AudioStream::AudioStream()
  mChannels(0),
  mWritten(0),
  mAudioClock(MOZ_THIS_IN_INITIALIZER_LIST()),
+  mLatencyRequest(HighLatency),
  mReadPoint(0)
 {}

@ -364,6 +365,7 @@ private:
  // aTime is the time in ms the samples were inserted into MediaStreamGraph
  long GetUnprocessed(void* aBuffer, long aFrames, int64_t &aTime);
  long GetTimeStretched(void* aBuffer, long aFrames, int64_t &aTime);
+  long GetUnprocessedWithSilencePadding(void* aBuffer, long aFrames, int64_t &aTime);

  // Shared implementation of underflow adjusted position calculation.
  // Caller must own the monitor.
@ -577,6 +579,7 @@ BufferedAudioStream::Init(int32_t aNumChannels, int32_t aRate,
    ("%s  channels: %d, rate: %d", __FUNCTION__, aNumChannels, aRate));
  mInRate = mOutRate = aRate;
  mChannels = aNumChannels;
+  mLatencyRequest = aLatencyRequest;

  mDumpFile = OpenDumpFile(this);

@ -634,6 +637,13 @@ BufferedAudioStream::Init(int32_t aNumChannels, int32_t aRate,
  NS_ABORT_IF_FALSE(bufferLimit % mBytesPerFrame == 0, "Must buffer complete frames");
  mBuffer.SetCapacity(bufferLimit);

+  // Start the stream right away when low latency has been requested. This means
+  // that the DataCallback will feed silence to cubeb, until the first frames
+  // are writtent to this BufferedAudioStream.
+  if (mLatencyRequest == AudioStream::LowLatency) {
+    Start();
+  }
+
  return NS_OK;
 }

@ -912,6 +922,32 @@ BufferedAudioStream::GetUnprocessed(void* aBuffer, long aFrames, int64_t &aTimeM
  return BytesToFrames(available) + flushedFrames;
 }

+// Get unprocessed samples, and pad the beginning of the buffer with silence if
+// there is not enough data.
+long
+BufferedAudioStream::GetUnprocessedWithSilencePadding(void* aBuffer, long aFrames, int64_t& aTimeMs)
+{
+  uint32_t toPopBytes = FramesToBytes(aFrames);
+  uint32_t available = std::min(toPopBytes, mBuffer.Length());
+  uint32_t silenceOffset = toPopBytes - available;
+
+  uint8_t* wpos = reinterpret_cast<uint8_t*>(aBuffer);
+
+  memset(wpos, 0, silenceOffset);
+  wpos += silenceOffset;
+
+  void* input[2];
+  uint32_t input_size[2];
+  mBuffer.PopElements(available, &input[0], &input_size[0], &input[1], &input_size[1]);
+  memcpy(wpos, input[0], input_size[0]);
+  wpos += input_size[0];
+  memcpy(wpos, input[1], input_size[1]);
+
+  GetBufferInsertTime(aTimeMs);
+
+  return aFrames;
+}
+
 long
 BufferedAudioStream::GetTimeStretched(void* aBuffer, long aFrames, int64_t &aTimeMs)
 {
@ -965,8 +1001,16 @@ BufferedAudioStream::DataCallback(void* aBuffer, long aFrames)
  int64_t insertTime;

  if (available) {
+    // When we are playing a low latency stream, and it is the first time we are
+    // getting data from the buffer, we prefer to add the silence for an
+    // underrun at the beginning of the buffer, so the first buffer is not cut
+    // in half by the silence inserted to compensate for the underrun.
    if (mInRate == mOutRate) {
-      servicedFrames = GetUnprocessed(output, aFrames, insertTime);
+      if (mLatencyRequest == AudioStream::LowLatency && !mWritten) {
+        servicedFrames = GetUnprocessedWithSilencePadding(output, aFrames, insertTime);
+      } else {
+        servicedFrames = GetUnprocessed(output, aFrames, insertTime);
+      }
    } else {
      servicedFrames = GetTimeStretched(output, aFrames, insertTime);
    }
--- a/content/media/AudioStream.h
+++ b/content/media/AudioStream.h
@ -207,6 +207,8 @@ protected:

  // copy of Latency logger's starting time for offset calculations
  TimeStamp mStartTime;
+  // Whether we are playing a low latency stream, or a normal stream.
+  LatencyRequest mLatencyRequest;
  // Where in the current mInserts[0] block cubeb has read to
  int64_t mReadPoint;
  // Keep track of each inserted block of samples and the time it was inserted