From a2ddae256f025dfdc13bb942cecbc73d4c066ba9 Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Wed, 28 Feb 2024 12:50:27 +0000 Subject: [PATCH] Bug 1879873 - Scale the inverse FFT result using tx instead of pre- or post-processing the data. r=karlt This skips a copy at the expense of having to do a `const_cast`. Differential Revision: https://phabricator.services.mozilla.com/D202434 --- dom/media/webaudio/FFTBlock.cpp | 8 ++--- dom/media/webaudio/FFTBlock.h | 36 ++++++++++++----------- dom/media/webaudio/blink/FFTConvolver.cpp | 2 +- dom/media/webaudio/blink/HRTFKernel.cpp | 2 +- dom/media/webaudio/blink/PeriodicWave.cpp | 2 +- 5 files changed, 25 insertions(+), 25 deletions(-) diff --git a/dom/media/webaudio/FFTBlock.cpp b/dom/media/webaudio/FFTBlock.cpp index 1af663808272..79fb934a0095 100644 --- a/dom/media/webaudio/FFTBlock.cpp +++ b/dom/media/webaudio/FFTBlock.cpp @@ -50,17 +50,15 @@ static double fdlibm_carg(const Complex& z) { FFTBlock* FFTBlock::CreateInterpolatedBlock(const FFTBlock& block0, const FFTBlock& block1, double interp) { - FFTBlock* newBlock = new FFTBlock(block0.FFTSize()); + uint32_t fftSize = block0.FFTSize(); + FFTBlock* newBlock = new FFTBlock(fftSize, 1.0f / AssertedCast(fftSize)); newBlock->InterpolateFrequencyComponents(block0, block1, interp); // In the time-domain, the 2nd half of the response must be zero, to avoid // circular convolution aliasing... - int fftSize = newBlock->FFTSize(); AlignedTArray buffer(fftSize); - newBlock->GetInverseWithoutScaling(buffer.Elements()); - AudioBufferInPlaceScale(buffer.Elements(), - 1.0f / AssertedCast(fftSize), fftSize / 2); + newBlock->GetInverse(buffer.Elements()); PodZero(buffer.Elements() + fftSize / 2, fftSize / 2); // Put back into frequency domain. diff --git a/dom/media/webaudio/FFTBlock.h b/dom/media/webaudio/FFTBlock.h index b2a8f615589d..840f50e160e9 100644 --- a/dom/media/webaudio/FFTBlock.h +++ b/dom/media/webaudio/FFTBlock.h @@ -32,7 +32,8 @@ class FFTBlock final { FFVPXRuntimeLinker::GetFFTFuncs(&sFFTFuncs); } } - explicit FFTBlock(uint32_t aFFTSize) { + explicit FFTBlock(uint32_t aFFTSize, float aInverseScaling = 1.0f) + : mInverseScaling(aInverseScaling) { MOZ_COUNT_CTOR(FFTBlock); SetFFTSize(aFFTSize); } @@ -53,24 +54,16 @@ class FFTBlock final { return; } - PodCopy(mOutputBuffer.Elements()->f, aData, mFFTSize); - // In place transform - mFn(mTxCtx, mOutputBuffer.Elements()->f, mOutputBuffer.Elements()->f, + mFn(mTxCtx, mOutputBuffer.Elements()->f, const_cast(aData), 2 * sizeof(float)); #ifdef DEBUG mInversePerformed = false; #endif } - // Inverse-transform internal data and store the resulting FFTSize() - // points in aDataOut. - void GetInverse(float* aDataOut) { - GetInverseWithoutScaling(aDataOut); - AudioBufferInPlaceScale(aDataOut, 1.0f / mFFTSize, mFFTSize); - } // Inverse-transform internal frequency data and store the resulting // FFTSize() points in |aDataOut|. If frequency data has not already been // scaled, then the output will need scaling by 1/FFTSize(). - void GetInverseWithoutScaling(float* aDataOut) { + void GetInverse(float* aDataOut) { if (!EnsureIFFT()) { std::fill_n(aDataOut, mFFTSize, 0.0f); return; @@ -109,8 +102,8 @@ class FFTBlock final { MOZ_ASSERT(dataSize <= FFTSize()); AlignedTArray paddedData; paddedData.SetLength(FFTSize()); - AudioBufferCopyWithScale(aData, 1.0f / FFTSize(), paddedData.Elements(), - dataSize); + AudioBufferCopyWithScale(aData, 1.0f / AssertedCast(FFTSize()), + paddedData.Elements(), dataSize); PodZero(paddedData.Elements() + dataSize, mFFTSize - dataSize); PerformFFT(paddedData.Elements()); } @@ -128,12 +121,18 @@ class FFTBlock final { double ExtractAverageGroupDelay(); uint32_t FFTSize() const { return mFFTSize; } - float RealData(uint32_t aIndex) const { return mOutputBuffer[aIndex].r; } + float RealData(uint32_t aIndex) const { + MOZ_ASSERT(!mInversePerformed); + return mOutputBuffer[aIndex].r; + } float& RealData(uint32_t aIndex) { MOZ_ASSERT(!mInversePerformed); return mOutputBuffer[aIndex].r; } - float ImagData(uint32_t aIndex) const { return mOutputBuffer[aIndex].i; } + float ImagData(uint32_t aIndex) const { + MOZ_ASSERT(!mInversePerformed); + return mOutputBuffer[aIndex].i; + } float& ImagData(uint32_t aIndex) { MOZ_ASSERT(!mInversePerformed); return mOutputBuffer[aIndex].i; @@ -165,6 +164,7 @@ class FFTBlock final { private: bool EnsureFFT() { if (!mTxCtx) { + // Forward transform is always unscaled for our purpose. float scale = 1.0f; int rv = sFFTFuncs.init(&mTxCtx, &mFn, AV_TX_FLOAT_RDFT, 0 /* forward */, AssertedCast(mFFTSize), &scale, 0); @@ -175,10 +175,9 @@ class FFTBlock final { } bool EnsureIFFT() { if (!mITxCtx) { - float scale = 0.5f; int rv = sFFTFuncs.init(&mITxCtx, &mIFn, AV_TX_FLOAT_RDFT, 1 /* inverse */, - AssertedCast(mFFTSize), &scale, 0); + AssertedCast(mFFTSize), &mInverseScaling, 0); MOZ_ASSERT(!rv, "av_tx_init: invalid parameters (inverse)"); return !rv; } @@ -208,6 +207,9 @@ class FFTBlock final { av_tx_fn mIFn{}; AlignedTArray mOutputBuffer; uint32_t mFFTSize{}; + // A scaling that is performed when doing an inverse transform. The forward + // transform is always unscaled. + float mInverseScaling; #ifdef DEBUG bool mInversePerformed = false; #endif diff --git a/dom/media/webaudio/blink/FFTConvolver.cpp b/dom/media/webaudio/blink/FFTConvolver.cpp index 2ade9031cea1..f9b456a0d480 100644 --- a/dom/media/webaudio/blink/FFTConvolver.cpp +++ b/dom/media/webaudio/blink/FFTConvolver.cpp @@ -85,7 +85,7 @@ const float* FFTConvolver::process(FFTBlock* fftKernel, const float* sourceP) { // The input buffer is now filled (get frequency-domain version) m_frame.PerformFFT(m_inputBuffer.Elements()); m_frame.Multiply(*fftKernel); - m_frame.GetInverseWithoutScaling(m_outputBuffer.Elements()); + m_frame.GetInverse(m_outputBuffer.Elements()); // Overlap-add 1st half from previous time AudioBufferAddWithScale(m_lastOverlapBuffer.Elements(), 1.0f, diff --git a/dom/media/webaudio/blink/HRTFKernel.cpp b/dom/media/webaudio/blink/HRTFKernel.cpp index ecaa846a6692..96a53609f24e 100644 --- a/dom/media/webaudio/blink/HRTFKernel.cpp +++ b/dom/media/webaudio/blink/HRTFKernel.cpp @@ -38,7 +38,7 @@ static float extractAverageGroupDelay(float* impulseP, size_t length) { // Check for power-of-2. MOZ_ASSERT(length && (length & (length - 1)) == 0); - FFTBlock estimationFrame(length); + FFTBlock estimationFrame(length, 1.f / length); estimationFrame.PerformFFT(impulseP); float frameDelay = diff --git a/dom/media/webaudio/blink/PeriodicWave.cpp b/dom/media/webaudio/blink/PeriodicWave.cpp index 6b1d173008e3..4ed882992851 100644 --- a/dom/media/webaudio/blink/PeriodicWave.cpp +++ b/dom/media/webaudio/blink/PeriodicWave.cpp @@ -266,7 +266,7 @@ void PeriodicWave::createBandLimitedTables(float fundamentalFrequency, // Apply an inverse FFT to generate the time-domain table data. float* data = m_bandLimitedTables[rangeIndex]->Elements(); - frame.GetInverseWithoutScaling(data); + frame.GetInverse(data); // For the first range (which has the highest power), calculate // its peak value then compute normalization scale.