b=913854 capture the full extent of the resampling filter r=padenot

The resampling filter means that the buffer influences a greater number of samples than indicated by just its length. Including the full influence of the linear filter means that adjacent buffers aligned appropriately will behave as if they were one extended buffer. The buffers are not yet aligned more carefully than track ticks, so buffers play back seamlessly only if their sample rates and lengths are such that their duration is an integer number of track ticks. Knowing how far the filter extends before the start time requires initializing the resampler before buffer processing. The patch also includes the input latency in the first resampler input buffer sample count estimate to reduce the number to calls required to start the resampler. --HG-- extra : rebase_source : 16d5af79bc5621be830f5956b51f7ff59d490575
2025-01-27 07:34:20 +00:00 · 2014-02-27 11:45:03 +13:00 · 2014-02-27 11:45:03 +13:00 · d68f21ee08
commit d68f21ee08
parent d5e8ef47f8
1 changed files with 115 additions and 87 deletions
--- a/content/media/webaudio/AudioBufferSourceNode.cpp
+++ b/content/media/webaudio/AudioBufferSourceNode.cpp
@ -56,11 +56,12 @@ public:
  explicit AudioBufferSourceNodeEngine(AudioNode* aNode,
                                       AudioDestinationNode* aDestination) :
    AudioNodeEngine(aNode),
-    mStart(0), mStop(TRACK_TICKS_MAX),
+    mStart(0), mBeginProcessing(0),
+    mStop(TRACK_TICKS_MAX),
    mResampler(nullptr), mRemainingResamplerTail(0),
    mBufferEnd(0),
    mLoopStart(0), mLoopEnd(0),
-    mBufferSampleRate(0), mBufferPosition(0), mChannels(0), mPlaybackRate(1.0f),
+    mBufferSampleRate(0), mBufferPosition(0), mChannels(0),
    mDopplerShift(1.0f),
    mDestination(static_cast<AudioNodeStream*>(aDestination->Stream())),
    mPlaybackRateTimeline(1.0f), mLoop(false)
@ -94,7 +95,10 @@ public:
  virtual void SetStreamTimeParameter(uint32_t aIndex, TrackTicks aParam)
  {
    switch (aIndex) {
-    case AudioBufferSourceNode::START: mStart = aParam; break;
+    case AudioBufferSourceNode::START:
+      MOZ_ASSERT(!mStart, "Another START?");
+      mBeginProcessing = mStart = aParam;
+      break;
    case AudioBufferSourceNode::STOP: mStop = aParam; break;
    default:
      NS_ERROR("Bad AudioBufferSourceNodeEngine StreamTimeParameter");
@ -104,7 +108,7 @@ public:
  {
    switch (aIndex) {
      case AudioBufferSourceNode::DOPPLERSHIFT:
-        mDopplerShift = aParam;
+        mDopplerShift = aParam > 0 && aParam == aParam ? aParam : 1.0;
        break;
      default:
        NS_ERROR("Bad AudioBufferSourceNodeEngine double parameter.");
@ -132,22 +136,52 @@ public:
    mBuffer = aBuffer;
  }

-  SpeexResamplerState* Resampler(AudioNodeStream* aStream, uint32_t aChannels)
+  bool BegunResampling()
  {
-    if (aChannels != mChannels && mResampler) {
+    return mBeginProcessing == -TRACK_TICKS_MAX;
+  }
+
+  void UpdateResampler(int32_t aOutRate, uint32_t aChannels)
+  {
+    if (mResampler &&
+        (aChannels != mChannels ||
+         // If the resampler has begun, then it will have moved
+         // mBufferPosition to after the samples it has read, but it hasn't
+         // output its buffered samples.  Keep using the resampler, even if
+         // the rates now match, so that this latent segment is output.
+         (aOutRate == mBufferSampleRate && !BegunResampling()))) {
      speex_resampler_destroy(mResampler);
      mResampler = nullptr;
+      mBeginProcessing = mStart;
+    }
+
+    if (aOutRate == mBufferSampleRate && !mResampler) {
+      return;
    }

    if (!mResampler) {
      mChannels = aChannels;
-      mResampler = speex_resampler_init(mChannels, mBufferSampleRate,
-                                        ComputeFinalOutSampleRate(aStream->SampleRate()),
+      mResampler = speex_resampler_init(mChannels, mBufferSampleRate, aOutRate,
                                        SPEEX_RESAMPLER_QUALITY_DEFAULT,
                                        nullptr);
-      speex_resampler_skip_zeros(mResampler);
+    } else {
+      uint32_t currentOutSampleRate, currentInSampleRate;
+      speex_resampler_get_rate(mResampler, &currentInSampleRate,
+                               &currentOutSampleRate);
+      if (currentOutSampleRate == static_cast<uint32_t>(aOutRate)) {
+        return;
+      }
+      speex_resampler_set_rate(mResampler, currentInSampleRate, aOutRate);
+    }
+
+    if (!BegunResampling()) {
+      // Low pass filter effects from the resampler mean that samples before
+      // the start time are influenced by resampling the buffer.  The input
+      // latency indicates half the filter width.
+      int64_t inputLatency = speex_resampler_get_input_latency(mResampler);
+      // Intentionally rounding down.  There is no effect beyond the filter.
+      mBeginProcessing = mStart - inputLatency * aOutRate / mBufferSampleRate;
    }
-    return mResampler;
  }

  // Borrow a full buffer of size WEBAUDIO_BLOCK_SIZE from the source buffer
@ -187,24 +221,43 @@ public:
  void CopyFromInputBufferWithResampling(AudioNodeStream* aStream,
                                         AudioChunk* aOutput,
                                         uint32_t aChannels,
-                                         uint32_t aOffsetWithinBlock,
-                                         uint32_t& aFramesWritten,
+                                         uint32_t* aOffsetWithinBlock,
+                                         TrackTicks* aCurrentPosition,
                                         int32_t aBufferMax) {
    // TODO: adjust for mStop (see bug 913854 comment 9).
-    uint32_t availableInOutputBuffer = WEBAUDIO_BLOCK_SIZE - aOffsetWithinBlock;
-    SpeexResamplerState* resampler = Resampler(aStream, aChannels);
+    uint32_t availableInOutputBuffer =
+      WEBAUDIO_BLOCK_SIZE - *aOffsetWithinBlock;
+    SpeexResamplerState* resampler = mResampler;
    MOZ_ASSERT(aChannels > 0);

    if (mBufferPosition < aBufferMax) {
      uint32_t availableInInputBuffer = aBufferMax - mBufferPosition;
+      uint32_t ratioNum, ratioDen;
+      speex_resampler_get_ratio(resampler, &ratioNum, &ratioDen);
      // Limit the number of input samples copied and possibly
      // format-converted for resampling by estimating how many will be used.
-      // This may be a little small when filling the resampler with initial
-      // data, but we'll get called again and it will work out.
-      uint32_t num, den;
-      speex_resampler_get_ratio(resampler, &num, &den);
-      uint32_t inputLimit = std::min(availableInInputBuffer,
-                                     availableInOutputBuffer * num / den + 10);
+      // This may be a little small if still filling the resampler with
+      // initial data, but we'll get called again and it will work out.
+      uint32_t inputLimit = availableInOutputBuffer * ratioNum / ratioDen + 10;
+      if (!BegunResampling()) {
+        // First time the resampler is used.
+        uint32_t inputLatency = speex_resampler_get_input_latency(resampler);
+        inputLimit += inputLatency;
+        // If starting after mStart, then play from the beginning of the
+        // buffer, but correct for input latency.  If starting before mStart,
+        // then align the resampler so that the time corresponding to the
+        // first input sample is mStart.
+        uint32_t skipFracNum = inputLatency * ratioDen;
+        if (*aCurrentPosition < mStart) {
+          skipFracNum -= (mStart - *aCurrentPosition) * ratioNum;
+          MOZ_ASSERT(skipFracNum < INT32_MAX, "mBeginProcessing is wrong?");
+        }
+        speex_resampler_set_skip_frac_num(resampler, skipFracNum);
+
+        mBeginProcessing = -TRACK_TICKS_MAX;
+      }
+      inputLimit = std::min(inputLimit, availableInInputBuffer);
+
      for (uint32_t i = 0; true; ) {
        uint32_t inSamples = inputLimit;
        const float* inputData = mBuffer->GetData(i) + mBufferPosition;
@ -212,7 +265,7 @@ public:
        uint32_t outSamples = availableInOutputBuffer;
        float* outputData =
          static_cast<float*>(const_cast<void*>(aOutput->mChannelData[i])) +
-          aOffsetWithinBlock;
+          *aOffsetWithinBlock;

        WebAudioUtils::SpeexResamplerProcess(resampler, i,
                                             inputData, &inSamples,
@ -220,20 +273,14 @@ public:
        if (++i == aChannels) {
          mBufferPosition += inSamples;
          MOZ_ASSERT(mBufferPosition <= mBufferEnd || mLoop);
-          aFramesWritten = outSamples;
+          *aOffsetWithinBlock += outSamples;
+          *aCurrentPosition += outSamples;
          if (inSamples == availableInInputBuffer && !mLoop) {
-            // If the available output space were unbounded then the input
-            // latency would always be the correct amount of extra input to
-            // provide in order to advance the output position to align with
-            // the final point in the buffer.  However, when the output space
-            // becomes full, the resampler may read all available input
-            // without writing out the corresponding output.  Add one more
-            // input sample, so that we know that enough output has been
-            // written when the last input sample has been read.  This may
-            // often write more than necessary but the extra samples will be
-            // based on (mostly) zero input.
+            // We'll feed in enough zeros to empty out the resampler's memory.
+            // This handles the output latency as well as capturing the low
+            // pass effects of the resample filter.
            mRemainingResamplerTail =
-              speex_resampler_get_input_latency(resampler) + 1;
+              2 * speex_resampler_get_input_latency(resampler) - 1;
          }
          return;
        }
@ -244,7 +291,7 @@ public:
        uint32_t outSamples = availableInOutputBuffer;
        float* outputData =
          static_cast<float*>(const_cast<void*>(aOutput->mChannelData[i])) +
-          aOffsetWithinBlock;
+          *aOffsetWithinBlock;

        // AudioDataValue* for aIn selects the function that does not try to
        // copy and format-convert input data.
@ -254,7 +301,8 @@ public:
        if (++i == aChannels) {
          mRemainingResamplerTail -= inSamples;
          MOZ_ASSERT(mRemainingResamplerTail >= 0);
-          aFramesWritten = outSamples;
+          *aOffsetWithinBlock += outSamples;
+          *aCurrentPosition += outSamples;
          break;
        }
      }
@ -312,7 +360,7 @@ public:
      std::min(std::min<TrackTicks>(WEBAUDIO_BLOCK_SIZE - *aOffsetWithinBlock,
                                    aBufferMax - mBufferPosition),
               mStop - *aCurrentPosition);
-    if (numFrames == WEBAUDIO_BLOCK_SIZE && !ShouldResample(aStream->SampleRate())) {
+    if (numFrames == WEBAUDIO_BLOCK_SIZE && !mResampler) {
      MOZ_ASSERT(mBufferPosition < aBufferMax);
      BorrowFromInputBuffer(aOutput, aChannels);
      *aOffsetWithinBlock += numFrames;
@ -322,68 +370,43 @@ public:
      if (*aOffsetWithinBlock == 0) {
        AllocateAudioBlock(aChannels, aOutput);
      }
-      if (!ShouldResample(aStream->SampleRate())) {
+      if (!mResampler) {
        MOZ_ASSERT(mBufferPosition < aBufferMax);
        CopyFromInputBuffer(aOutput, aChannels, *aOffsetWithinBlock, numFrames);
        *aOffsetWithinBlock += numFrames;
        *aCurrentPosition += numFrames;
        mBufferPosition += numFrames;
      } else {
-        uint32_t framesWritten;
-        CopyFromInputBufferWithResampling(aStream, aOutput, aChannels, *aOffsetWithinBlock, framesWritten, aBufferMax);
-        *aOffsetWithinBlock += framesWritten;
-        *aCurrentPosition += framesWritten;
+        CopyFromInputBufferWithResampling(aStream, aOutput, aChannels, aOffsetWithinBlock, aCurrentPosition, aBufferMax);
      }
    }
  }

-  uint32_t ComputeFinalOutSampleRate(TrackRate aStreamSampleRate)
+  int32_t ComputeFinalOutSampleRate(float aPlaybackRate)
  {
-    if (mPlaybackRate <= 0 || mPlaybackRate != mPlaybackRate) {
-      mPlaybackRate = 1.0f;
-    }
-    if (mDopplerShift <= 0 || mDopplerShift != mDopplerShift) {
-      mDopplerShift = 1.0f;
-    }
-    return WebAudioUtils::TruncateFloatToInt<uint32_t>(aStreamSampleRate /
-                                                       (mPlaybackRate * mDopplerShift));
-  }
-
-  bool ShouldResample(TrackRate aStreamSampleRate) const
-  {
-    // There is latency in the resampler.  If there is already a resampler,
-    // then it will have moved mBufferPosition to after the samples it has
-    // read, but it hasn't output its buffered samples.  Keep using the
-    // resampler, even if the rates now match, so that this latent segment is
-    // output.
-    return mResampler ||
-      (mPlaybackRate * mDopplerShift * mBufferSampleRate != aStreamSampleRate);
-  }
-
-  void UpdateSampleRateIfNeeded(AudioNodeStream* aStream, uint32_t aChannels)
-  {
-    if (mPlaybackRateTimeline.HasSimpleValue()) {
-      mPlaybackRate = mPlaybackRateTimeline.GetValue();
-    } else {
-      mPlaybackRate = mPlaybackRateTimeline.GetValueAtTime(aStream->GetCurrentPosition());
-    }
-
    // Make sure the playback rate and the doppler shift are something
    // our resampler can work with.
-    if (ComputeFinalOutSampleRate(aStream->SampleRate()) == 0) {
-      mPlaybackRate = 1.0;
-      mDopplerShift = 1.0;
+    int32_t rate = WebAudioUtils::
+      TruncateFloatToInt<int32_t>(mSource->SampleRate() /
+                                  (aPlaybackRate * mDopplerShift));
+    return rate ? rate : mBufferSampleRate;
+  }
+
+  void UpdateSampleRateIfNeeded(uint32_t aChannels)
+  {
+    float playbackRate;
+
+    if (mPlaybackRateTimeline.HasSimpleValue()) {
+      playbackRate = mPlaybackRateTimeline.GetValue();
+    } else {
+      playbackRate = mPlaybackRateTimeline.GetValueAtTime(mSource->GetCurrentPosition());
+    }
+    if (playbackRate <= 0 || playbackRate != playbackRate) {
+      playbackRate = 1.0f;
    }

-    if (mResampler) {
-      SpeexResamplerState* resampler = Resampler(aStream, aChannels);
-      uint32_t currentOutSampleRate, currentInSampleRate;
-      speex_resampler_get_rate(resampler, &currentInSampleRate, &currentOutSampleRate);
-      uint32_t finalSampleRate = ComputeFinalOutSampleRate(aStream->SampleRate());
-      if (currentOutSampleRate != finalSampleRate) {
-        speex_resampler_set_rate(resampler, currentInSampleRate, finalSampleRate);
-      }
-    }
+    int32_t outRate = ComputeFinalOutSampleRate(playbackRate);
+    UpdateResampler(outRate, aChannels);
  }

  virtual void ProduceAudioBlock(AudioNodeStream* aStream,
@ -405,7 +428,7 @@ public:
    // WebKit treats the playbackRate as a k-rate parameter in their code,
    // despite the spec saying that it should be an a-rate parameter. We treat
    // it as k-rate. Spec bug: https://www.w3.org/Bugs/Public/show_bug.cgi?id=21592
-    UpdateSampleRateIfNeeded(aStream, channels);
+    UpdateSampleRateIfNeeded(channels);

    uint32_t written = 0;
    TrackTicks streamPosition = aStream->GetCurrentPosition();
@ -415,8 +438,9 @@ public:
        FillWithZeroes(aOutput, channels, &written, &streamPosition, TRACK_TICKS_MAX);
        continue;
      }
-      if (streamPosition < mStart) {
-        FillWithZeroes(aOutput, channels, &written, &streamPosition, mStart);
+      if (streamPosition < mBeginProcessing) {
+        FillWithZeroes(aOutput, channels, &written, &streamPosition,
+                       mBeginProcessing);
        continue;
      }
      if (mLoop) {
@ -445,6 +469,11 @@ public:
  }

  TrackTicks mStart;
+  // Low pass filter effects from the resampler mean that samples before the
+  // start time are influenced by resampling the buffer.  mBeginProcessing
+  // includes the extent of this filter.  The special value of -TRACK_TICKS_MAX
+  // indicates that the resampler has begun processing.
+  TrackTicks mBeginProcessing;
  TrackTicks mStop;
  nsRefPtr<ThreadSharedFloatArrayBufferList> mBuffer;
  SpeexResamplerState* mResampler;
@ -457,7 +486,6 @@ public:
  int32_t mBufferSampleRate;
  int32_t mBufferPosition;
  uint32_t mChannels;
-  float mPlaybackRate;
  float mDopplerShift;
  AudioNodeStream* mDestination;
  AudioNodeStream* mSource;