Bug 1879873 - Scale the inverse FFT result using tx instead of pre- or post-processing the data. r=karlt

This skips a copy at the expense of having to do a `const_cast`. Differential Revision: https://phabricator.services.mozilla.com/D202434
2024-11-24 05:11:16 +00:00 · 2024-02-28 12:50:27 +00:00 · 2024-02-28 12:50:27 +00:00 · a2ddae256f
commit a2ddae256f
parent ac309d19de
5 changed files with 25 additions and 25 deletions
--- a/dom/media/webaudio/FFTBlock.cpp
+++ b/dom/media/webaudio/FFTBlock.cpp
@ -50,17 +50,15 @@ static double fdlibm_carg(const Complex& z) {
 FFTBlock* FFTBlock::CreateInterpolatedBlock(const FFTBlock& block0,
                                            const FFTBlock& block1,
                                            double interp) {
-  FFTBlock* newBlock = new FFTBlock(block0.FFTSize());
+  uint32_t fftSize = block0.FFTSize();
+  FFTBlock* newBlock = new FFTBlock(fftSize, 1.0f / AssertedCast<float>(fftSize));

  newBlock->InterpolateFrequencyComponents(block0, block1, interp);

  // In the time-domain, the 2nd half of the response must be zero, to avoid
  // circular convolution aliasing...
-  int fftSize = newBlock->FFTSize();
  AlignedTArray<float> buffer(fftSize);
-  newBlock->GetInverseWithoutScaling(buffer.Elements());
-  AudioBufferInPlaceScale(buffer.Elements(),
-                          1.0f / AssertedCast<float>(fftSize), fftSize / 2);
+  newBlock->GetInverse(buffer.Elements());
  PodZero(buffer.Elements() + fftSize / 2, fftSize / 2);

  // Put back into frequency domain.
--- a/dom/media/webaudio/FFTBlock.h
+++ b/dom/media/webaudio/FFTBlock.h
@ -32,7 +32,8 @@ class FFTBlock final {
      FFVPXRuntimeLinker::GetFFTFuncs(&sFFTFuncs);
    }
  }
-  explicit FFTBlock(uint32_t aFFTSize) {
+  explicit FFTBlock(uint32_t aFFTSize, float aInverseScaling = 1.0f)
+      : mInverseScaling(aInverseScaling) {
    MOZ_COUNT_CTOR(FFTBlock);
    SetFFTSize(aFFTSize);
  }
@ -53,24 +54,16 @@ class FFTBlock final {
      return;
    }

-    PodCopy(mOutputBuffer.Elements()->f, aData, mFFTSize);
-    // In place transform
-    mFn(mTxCtx, mOutputBuffer.Elements()->f, mOutputBuffer.Elements()->f,
+    mFn(mTxCtx, mOutputBuffer.Elements()->f, const_cast<float*>(aData),
        2 * sizeof(float));
 #ifdef DEBUG
    mInversePerformed = false;
 #endif
  }
-  // Inverse-transform internal data and store the resulting FFTSize()
-  // points in aDataOut.
-  void GetInverse(float* aDataOut) {
-    GetInverseWithoutScaling(aDataOut);
-    AudioBufferInPlaceScale(aDataOut, 1.0f / mFFTSize, mFFTSize);
-  }
  // Inverse-transform internal frequency data and store the resulting
  // FFTSize() points in |aDataOut|.  If frequency data has not already been
  // scaled, then the output will need scaling by 1/FFTSize().
-  void GetInverseWithoutScaling(float* aDataOut) {
+  void GetInverse(float* aDataOut) {
    if (!EnsureIFFT()) {
      std::fill_n(aDataOut, mFFTSize, 0.0f);
      return;
@ -109,8 +102,8 @@ class FFTBlock final {
    MOZ_ASSERT(dataSize <= FFTSize());
    AlignedTArray<float> paddedData;
    paddedData.SetLength(FFTSize());
-    AudioBufferCopyWithScale(aData, 1.0f / FFTSize(), paddedData.Elements(),
-                             dataSize);
+    AudioBufferCopyWithScale(aData, 1.0f / AssertedCast<float>(FFTSize()),
+                             paddedData.Elements(), dataSize);
    PodZero(paddedData.Elements() + dataSize, mFFTSize - dataSize);
    PerformFFT(paddedData.Elements());
  }
@ -128,12 +121,18 @@ class FFTBlock final {
  double ExtractAverageGroupDelay();

  uint32_t FFTSize() const { return mFFTSize; }
-  float RealData(uint32_t aIndex) const { return mOutputBuffer[aIndex].r; }
+  float RealData(uint32_t aIndex) const {
+    MOZ_ASSERT(!mInversePerformed);
+    return mOutputBuffer[aIndex].r;
+  }
  float& RealData(uint32_t aIndex) {
    MOZ_ASSERT(!mInversePerformed);
    return mOutputBuffer[aIndex].r;
  }
-  float ImagData(uint32_t aIndex) const { return mOutputBuffer[aIndex].i; }
+  float ImagData(uint32_t aIndex) const {
+    MOZ_ASSERT(!mInversePerformed);
+    return mOutputBuffer[aIndex].i;
+  }
  float& ImagData(uint32_t aIndex) {
    MOZ_ASSERT(!mInversePerformed);
    return mOutputBuffer[aIndex].i;
@ -165,6 +164,7 @@ class FFTBlock final {
 private:
  bool EnsureFFT() {
    if (!mTxCtx) {
+      // Forward transform is always unscaled for our purpose.
      float scale = 1.0f;
      int rv = sFFTFuncs.init(&mTxCtx, &mFn, AV_TX_FLOAT_RDFT, 0 /* forward */,
                              AssertedCast<int>(mFFTSize), &scale, 0);
@ -175,10 +175,9 @@ class FFTBlock final {
  }
  bool EnsureIFFT() {
    if (!mITxCtx) {
-      float scale = 0.5f;
      int rv =
          sFFTFuncs.init(&mITxCtx, &mIFn, AV_TX_FLOAT_RDFT, 1 /* inverse */,
-                         AssertedCast<int>(mFFTSize), &scale, 0);
+                         AssertedCast<int>(mFFTSize), &mInverseScaling, 0);
      MOZ_ASSERT(!rv, "av_tx_init: invalid parameters (inverse)");
      return !rv;
    }
@ -208,6 +207,9 @@ class FFTBlock final {
  av_tx_fn mIFn{};
  AlignedTArray<ComplexU> mOutputBuffer;
  uint32_t mFFTSize{};
+  // A scaling that is performed when doing an inverse transform. The forward
+  // transform is always unscaled.
+  float mInverseScaling;
 #ifdef DEBUG
  bool mInversePerformed = false;
 #endif
--- a/dom/media/webaudio/blink/FFTConvolver.cpp
+++ b/dom/media/webaudio/blink/FFTConvolver.cpp
@ -85,7 +85,7 @@ const float* FFTConvolver::process(FFTBlock* fftKernel, const float* sourceP) {
    // The input buffer is now filled (get frequency-domain version)
    m_frame.PerformFFT(m_inputBuffer.Elements());
    m_frame.Multiply(*fftKernel);
-    m_frame.GetInverseWithoutScaling(m_outputBuffer.Elements());
+    m_frame.GetInverse(m_outputBuffer.Elements());

    // Overlap-add 1st half from previous time
    AudioBufferAddWithScale(m_lastOverlapBuffer.Elements(), 1.0f,
--- a/dom/media/webaudio/blink/HRTFKernel.cpp
+++ b/dom/media/webaudio/blink/HRTFKernel.cpp
@ -38,7 +38,7 @@ static float extractAverageGroupDelay(float* impulseP, size_t length) {
  // Check for power-of-2.
  MOZ_ASSERT(length && (length & (length - 1)) == 0);

-  FFTBlock estimationFrame(length);
+  FFTBlock estimationFrame(length, 1.f / length);
  estimationFrame.PerformFFT(impulseP);

  float frameDelay =
--- a/dom/media/webaudio/blink/PeriodicWave.cpp
+++ b/dom/media/webaudio/blink/PeriodicWave.cpp
@ -266,7 +266,7 @@ void PeriodicWave::createBandLimitedTables(float fundamentalFrequency,

  // Apply an inverse FFT to generate the time-domain table data.
  float* data = m_bandLimitedTables[rangeIndex]->Elements();
-  frame.GetInverseWithoutScaling(data);
+  frame.GetInverse(data);

  // For the first range (which has the highest power), calculate
  // its peak value then compute normalization scale.