From a2ddae256f025dfdc13bb942cecbc73d4c066ba9 Mon Sep 17 00:00:00 2001
From: Paul Adenot <paul@paul.cx>
Date: Wed, 28 Feb 2024 12:50:27 +0000
Subject: [PATCH] Bug 1879873 - Scale the inverse FFT result using tx instead
 of pre- or post-processing the data. r=karlt

This skips a copy at the expense of having to do a `const_cast`.

Differential Revision: https://phabricator.services.mozilla.com/D202434
---
 dom/media/webaudio/FFTBlock.cpp           |  8 ++---
 dom/media/webaudio/FFTBlock.h             | 36 ++++++++++++-----------
 dom/media/webaudio/blink/FFTConvolver.cpp |  2 +-
 dom/media/webaudio/blink/HRTFKernel.cpp   |  2 +-
 dom/media/webaudio/blink/PeriodicWave.cpp |  2 +-
 5 files changed, 25 insertions(+), 25 deletions(-)
diff --git a/dom/media/webaudio/FFTBlock.cpp b/dom/media/webaudio/FFTBlock.cpp
index 1af663808272..79fb934a0095 100644
--- a/dom/media/webaudio/FFTBlock.cpp
+++ b/dom/media/webaudio/FFTBlock.cpp
@@ -50,17 +50,15 @@ static double fdlibm_carg(const Complex& z) {
 FFTBlock* FFTBlock::CreateInterpolatedBlock(const FFTBlock& block0,
                                             const FFTBlock& block1,
                                             double interp) {
-  FFTBlock* newBlock = new FFTBlock(block0.FFTSize());
+  uint32_t fftSize = block0.FFTSize();
+  FFTBlock* newBlock = new FFTBlock(fftSize, 1.0f / AssertedCast<float>(fftSize));
 
   newBlock->InterpolateFrequencyComponents(block0, block1, interp);
 
   // In the time-domain, the 2nd half of the response must be zero, to avoid
   // circular convolution aliasing...
-  int fftSize = newBlock->FFTSize();
   AlignedTArray<float> buffer(fftSize);
-  newBlock->GetInverseWithoutScaling(buffer.Elements());
-  AudioBufferInPlaceScale(buffer.Elements(),
-                          1.0f / AssertedCast<float>(fftSize), fftSize / 2);
+  newBlock->GetInverse(buffer.Elements());
   PodZero(buffer.Elements() + fftSize / 2, fftSize / 2);
 
   // Put back into frequency domain.
diff --git a/dom/media/webaudio/FFTBlock.h b/dom/media/webaudio/FFTBlock.h
index b2a8f615589d..840f50e160e9 100644
--- a/dom/media/webaudio/FFTBlock.h
+++ b/dom/media/webaudio/FFTBlock.h
@@ -32,7 +32,8 @@ class FFTBlock final {
       FFVPXRuntimeLinker::GetFFTFuncs(&sFFTFuncs);
     }
   }
-  explicit FFTBlock(uint32_t aFFTSize) {
+  explicit FFTBlock(uint32_t aFFTSize, float aInverseScaling = 1.0f)
+      : mInverseScaling(aInverseScaling) {
     MOZ_COUNT_CTOR(FFTBlock);
     SetFFTSize(aFFTSize);
   }
@@ -53,24 +54,16 @@ class FFTBlock final {
       return;
     }
 
-    PodCopy(mOutputBuffer.Elements()->f, aData, mFFTSize);
-    // In place transform
-    mFn(mTxCtx, mOutputBuffer.Elements()->f, mOutputBuffer.Elements()->f,
+    mFn(mTxCtx, mOutputBuffer.Elements()->f, const_cast<float*>(aData),
         2 * sizeof(float));
 #ifdef DEBUG
     mInversePerformed = false;
 #endif
   }
-  // Inverse-transform internal data and store the resulting FFTSize()
-  // points in aDataOut.
-  void GetInverse(float* aDataOut) {
-    GetInverseWithoutScaling(aDataOut);
-    AudioBufferInPlaceScale(aDataOut, 1.0f / mFFTSize, mFFTSize);
-  }
   // Inverse-transform internal frequency data and store the resulting
   // FFTSize() points in |aDataOut|.  If frequency data has not already been
   // scaled, then the output will need scaling by 1/FFTSize().
-  void GetInverseWithoutScaling(float* aDataOut) {
+  void GetInverse(float* aDataOut) {
     if (!EnsureIFFT()) {
       std::fill_n(aDataOut, mFFTSize, 0.0f);
       return;
@@ -109,8 +102,8 @@ class FFTBlock final {
     MOZ_ASSERT(dataSize <= FFTSize());
     AlignedTArray<float> paddedData;
     paddedData.SetLength(FFTSize());
-    AudioBufferCopyWithScale(aData, 1.0f / FFTSize(), paddedData.Elements(),
-                             dataSize);
+    AudioBufferCopyWithScale(aData, 1.0f / AssertedCast<float>(FFTSize()),
+                             paddedData.Elements(), dataSize);
     PodZero(paddedData.Elements() + dataSize, mFFTSize - dataSize);
     PerformFFT(paddedData.Elements());
   }
@@ -128,12 +121,18 @@ class FFTBlock final {
   double ExtractAverageGroupDelay();
 
   uint32_t FFTSize() const { return mFFTSize; }
-  float RealData(uint32_t aIndex) const { return mOutputBuffer[aIndex].r; }
+  float RealData(uint32_t aIndex) const {
+    MOZ_ASSERT(!mInversePerformed);
+    return mOutputBuffer[aIndex].r;
+  }
   float& RealData(uint32_t aIndex) {
     MOZ_ASSERT(!mInversePerformed);
     return mOutputBuffer[aIndex].r;
   }
-  float ImagData(uint32_t aIndex) const { return mOutputBuffer[aIndex].i; }
+  float ImagData(uint32_t aIndex) const {
+    MOZ_ASSERT(!mInversePerformed);
+    return mOutputBuffer[aIndex].i;
+  }
   float& ImagData(uint32_t aIndex) {
     MOZ_ASSERT(!mInversePerformed);
     return mOutputBuffer[aIndex].i;
@@ -165,6 +164,7 @@ class FFTBlock final {
  private:
   bool EnsureFFT() {
     if (!mTxCtx) {
+      // Forward transform is always unscaled for our purpose.
       float scale = 1.0f;
       int rv = sFFTFuncs.init(&mTxCtx, &mFn, AV_TX_FLOAT_RDFT, 0 /* forward */,
                               AssertedCast<int>(mFFTSize), &scale, 0);
@@ -175,10 +175,9 @@ class FFTBlock final {
   }
   bool EnsureIFFT() {
     if (!mITxCtx) {
-      float scale = 0.5f;
       int rv =
           sFFTFuncs.init(&mITxCtx, &mIFn, AV_TX_FLOAT_RDFT, 1 /* inverse */,
-                         AssertedCast<int>(mFFTSize), &scale, 0);
+                         AssertedCast<int>(mFFTSize), &mInverseScaling, 0);
       MOZ_ASSERT(!rv, "av_tx_init: invalid parameters (inverse)");
       return !rv;
     }
@@ -208,6 +207,9 @@ class FFTBlock final {
   av_tx_fn mIFn{};
   AlignedTArray<ComplexU> mOutputBuffer;
   uint32_t mFFTSize{};
+  // A scaling that is performed when doing an inverse transform. The forward
+  // transform is always unscaled.
+  float mInverseScaling;
 #ifdef DEBUG
   bool mInversePerformed = false;
 #endif
diff --git a/dom/media/webaudio/blink/FFTConvolver.cpp b/dom/media/webaudio/blink/FFTConvolver.cpp
index 2ade9031cea1..f9b456a0d480 100644
--- a/dom/media/webaudio/blink/FFTConvolver.cpp
+++ b/dom/media/webaudio/blink/FFTConvolver.cpp
@@ -85,7 +85,7 @@ const float* FFTConvolver::process(FFTBlock* fftKernel, const float* sourceP) {
     // The input buffer is now filled (get frequency-domain version)
     m_frame.PerformFFT(m_inputBuffer.Elements());
     m_frame.Multiply(*fftKernel);
-    m_frame.GetInverseWithoutScaling(m_outputBuffer.Elements());
+    m_frame.GetInverse(m_outputBuffer.Elements());
 
     // Overlap-add 1st half from previous time
     AudioBufferAddWithScale(m_lastOverlapBuffer.Elements(), 1.0f,
diff --git a/dom/media/webaudio/blink/HRTFKernel.cpp b/dom/media/webaudio/blink/HRTFKernel.cpp
index ecaa846a6692..96a53609f24e 100644
--- a/dom/media/webaudio/blink/HRTFKernel.cpp
+++ b/dom/media/webaudio/blink/HRTFKernel.cpp
@@ -38,7 +38,7 @@ static float extractAverageGroupDelay(float* impulseP, size_t length) {
   // Check for power-of-2.
   MOZ_ASSERT(length && (length & (length - 1)) == 0);
 
-  FFTBlock estimationFrame(length);
+  FFTBlock estimationFrame(length, 1.f / length);
   estimationFrame.PerformFFT(impulseP);
 
   float frameDelay =
diff --git a/dom/media/webaudio/blink/PeriodicWave.cpp b/dom/media/webaudio/blink/PeriodicWave.cpp
index 6b1d173008e3..4ed882992851 100644
--- a/dom/media/webaudio/blink/PeriodicWave.cpp
+++ b/dom/media/webaudio/blink/PeriodicWave.cpp
@@ -266,7 +266,7 @@ void PeriodicWave::createBandLimitedTables(float fundamentalFrequency,
 
   // Apply an inverse FFT to generate the time-domain table data.
   float* data = m_bandLimitedTables[rangeIndex]->Elements();
-  frame.GetInverseWithoutScaling(data);
+  frame.GetInverse(data);
 
   // For the first range (which has the highest power), calculate
   // its peak value then compute normalization scale.