b=815643 Add Blink's frequency interpolation and group delay methods from FFTFrame to FFTBlock r=ehsan

--HG--
extra : rebase_source : 1e3393c85f1828cbcce49af1e3160fc3535d39b8
This commit is contained in:
Karl Tomlinson 2013-08-08 21:38:24 +12:00
parent 0cf01a15bc
commit 7ec61fbe98
4 changed files with 245 additions and 312 deletions

View File

@ -0,0 +1,227 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/* vim:set ts=4 sw=4 sts=4 et cindent: */
/*
* Copyright (C) 2010 Google Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of
* its contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "FFTBlock.h"
#include "AudioNodeEngine.h"
#include <complex>
namespace mozilla {
typedef std::complex<double> Complex;
FFTBlock* FFTBlock::CreateInterpolatedBlock(const FFTBlock& block0, const FFTBlock& block1, double interp)
{
FFTBlock* newBlock = new FFTBlock(block0.FFTSize());
newBlock->InterpolateFrequencyComponents(block0, block1, interp);
// In the time-domain, the 2nd half of the response must be zero, to avoid circular convolution aliasing...
int fftSize = newBlock->FFTSize();
nsTArray<float> buffer;
buffer.SetLength(fftSize);
newBlock->PerformInverseFFT(buffer.Elements());
PodZero(buffer.Elements() + fftSize / 2, fftSize / 2);
// Put back into frequency domain.
newBlock->PerformFFT(buffer.Elements());
return newBlock;
}
void FFTBlock::InterpolateFrequencyComponents(const FFTBlock& block0, const FFTBlock& block1, double interp)
{
// FIXME : with some work, this method could be optimized
kiss_fft_cpx* dft = mOutputBuffer.Elements();
const kiss_fft_cpx* dft1 = block0.mOutputBuffer.Elements();
const kiss_fft_cpx* dft2 = block1.mOutputBuffer.Elements();
MOZ_ASSERT(mFFTSize == block0.FFTSize());
MOZ_ASSERT(mFFTSize == block1.FFTSize());
double s1base = (1.0 - interp);
double s2base = interp;
double phaseAccum = 0.0;
double lastPhase1 = 0.0;
double lastPhase2 = 0.0;
int n = mFFTSize / 2;
dft[0].r = static_cast<float>(s1base * dft1[0].r + s2base * dft2[0].r);
dft[n].r = static_cast<float>(s1base * dft1[n].r + s2base * dft2[n].r);
for (int i = 1; i < n; ++i) {
Complex c1(dft1[i].r, dft1[i].i);
Complex c2(dft2[i].r, dft2[i].i);
double mag1 = abs(c1);
double mag2 = abs(c2);
// Interpolate magnitudes in decibels
double mag1db = 20.0 * log10(mag1);
double mag2db = 20.0 * log10(mag2);
double s1 = s1base;
double s2 = s2base;
double magdbdiff = mag1db - mag2db;
// Empirical tweak to retain higher-frequency zeroes
double threshold = (i > 16) ? 5.0 : 2.0;
if (magdbdiff < -threshold && mag1db < 0.0) {
s1 = pow(s1, 0.75);
s2 = 1.0 - s1;
} else if (magdbdiff > threshold && mag2db < 0.0) {
s2 = pow(s2, 0.75);
s1 = 1.0 - s2;
}
// Average magnitude by decibels instead of linearly
double magdb = s1 * mag1db + s2 * mag2db;
double mag = pow(10.0, 0.05 * magdb);
// Now, deal with phase
double phase1 = arg(c1);
double phase2 = arg(c2);
double deltaPhase1 = phase1 - lastPhase1;
double deltaPhase2 = phase2 - lastPhase2;
lastPhase1 = phase1;
lastPhase2 = phase2;
// Unwrap phase deltas
if (deltaPhase1 > M_PI)
deltaPhase1 -= 2.0 * M_PI;
if (deltaPhase1 < -M_PI)
deltaPhase1 += 2.0 * M_PI;
if (deltaPhase2 > M_PI)
deltaPhase2 -= 2.0 * M_PI;
if (deltaPhase2 < -M_PI)
deltaPhase2 += 2.0 * M_PI;
// Blend group-delays
double deltaPhaseBlend;
if (deltaPhase1 - deltaPhase2 > M_PI)
deltaPhaseBlend = s1 * deltaPhase1 + s2 * (2.0 * M_PI + deltaPhase2);
else if (deltaPhase2 - deltaPhase1 > M_PI)
deltaPhaseBlend = s1 * (2.0 * M_PI + deltaPhase1) + s2 * deltaPhase2;
else
deltaPhaseBlend = s1 * deltaPhase1 + s2 * deltaPhase2;
phaseAccum += deltaPhaseBlend;
// Unwrap
if (phaseAccum > M_PI)
phaseAccum -= 2.0 * M_PI;
if (phaseAccum < -M_PI)
phaseAccum += 2.0 * M_PI;
dft[i].r = static_cast<float>(mag * cos(phaseAccum));
dft[i].i = static_cast<float>(mag * sin(phaseAccum));
}
}
double FFTBlock::ExtractAverageGroupDelay()
{
kiss_fft_cpx* dft = mOutputBuffer.Elements();
double aveSum = 0.0;
double weightSum = 0.0;
double lastPhase = 0.0;
int halfSize = FFTSize() / 2;
const double kSamplePhaseDelay = (2.0 * M_PI) / double(FFTSize());
// Calculate weighted average group delay
for (int i = 1; i < halfSize; i++) {
Complex c(dft[i].r, dft[i].i);
double mag = abs(c);
double phase = arg(c);
double deltaPhase = phase - lastPhase;
lastPhase = phase;
// Unwrap
if (deltaPhase < -M_PI)
deltaPhase += 2.0 * M_PI;
if (deltaPhase > M_PI)
deltaPhase -= 2.0 * M_PI;
aveSum += mag * deltaPhase;
weightSum += mag;
}
// Note how we invert the phase delta wrt frequency since this is how group delay is defined
double ave = aveSum / weightSum;
double aveSampleDelay = -ave / kSamplePhaseDelay;
// Leave 20 sample headroom (for leading edge of impulse)
if (aveSampleDelay > 20.0)
aveSampleDelay -= 20.0;
// Remove average group delay (minus 20 samples for headroom)
AddConstantGroupDelay(-aveSampleDelay);
// Remove DC offset
dft[0].r = 0.0f;
return aveSampleDelay;
}
void FFTBlock::AddConstantGroupDelay(double sampleFrameDelay)
{
int halfSize = FFTSize() / 2;
kiss_fft_cpx* dft = mOutputBuffer.Elements();
const double kSamplePhaseDelay = (2.0 * M_PI) / double(FFTSize());
double phaseAdj = -sampleFrameDelay * kSamplePhaseDelay;
// Add constant group delay
for (int i = 1; i < halfSize; i++) {
Complex c(dft[i].r, dft[i].i);
double mag = abs(c);
double phase = arg(c);
phase += i * phaseAdj;
dft[i].r = static_cast<float>(mag * cos(phase));
dft[i].i = static_cast<float>(mag * sin(phase));
}
}
} // namespace mozilla

View File

@ -23,14 +23,22 @@ public:
, mIFFT(nullptr)
, mFFTSize(aFFTSize)
{
MOZ_COUNT_CTOR(FFTBlock);
mOutputBuffer.SetLength(aFFTSize / 2 + 1);
PodZero(mOutputBuffer.Elements(), aFFTSize / 2 + 1);
}
~FFTBlock()
{
MOZ_COUNT_DTOR(FFTBlock);
Clear();
}
// Return a new FFTBlock with frequency components interpolated between
// |block0| and |block1| with |interp| between 0.0 and 1.0.
static FFTBlock*
CreateInterpolatedBlock(const FFTBlock& block0,
const FFTBlock& block1, double interp);
void PerformFFT(const float* aData)
{
EnsureFFT();
@ -70,6 +78,9 @@ public:
Clear();
}
// Return the average group delay and removes this from the frequency data.
double ExtractAverageGroupDelay();
uint32_t FFTSize() const
{
return mFFTSize;
@ -84,6 +95,9 @@ public:
}
private:
FFTBlock(const FFTBlock& other) MOZ_DELETE;
void operator=(const FFTBlock& other) MOZ_DELETE;
void EnsureFFT()
{
if (!mFFT) {
@ -102,8 +116,10 @@ private:
free(mIFFT);
mFFT = mIFFT = nullptr;
}
void AddConstantGroupDelay(double sampleFrameDelay);
void InterpolateFrequencyComponents(const FFTBlock& block0,
const FFTBlock& block1, double interp);
private:
kiss_fftr_cfg mFFT, mIFFT;
nsTArray<kiss_fft_cpx> mOutputBuffer;
uint32_t mFFTSize;

View File

@ -1,311 +0,0 @@
/*
* Copyright (C) 2010 Google Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of
* its contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#if ENABLE(WEB_AUDIO)
#include "core/platform/audio/FFTFrame.h"
#ifndef NDEBUG
#include <stdio.h>
#endif
#include "core/platform/Logging.h"
#include "core/platform/PlatformMemoryInstrumentation.h"
#include <wtf/Complex.h>
#include <wtf/MathExtras.h>
#include <wtf/MemoryObjectInfo.h>
#include <wtf/OwnPtr.h>
#if !USE_ACCELERATE_FFT && USE(WEBAUDIO_FFMPEG)
void reportMemoryUsage(const RDFTContext* const&, WTF::MemoryObjectInfo*);
#endif // USE(WEBAUDIO_FFMPEG)
namespace WebCore {
void FFTFrame::doPaddedFFT(const float* data, size_t dataSize)
{
// Zero-pad the impulse response
AudioFloatArray paddedResponse(fftSize()); // zero-initialized
paddedResponse.copyToRange(data, 0, dataSize);
// Get the frequency-domain version of padded response
doFFT(paddedResponse.data());
}
PassOwnPtr<FFTFrame> FFTFrame::createInterpolatedFrame(const FFTFrame& frame1, const FFTFrame& frame2, double x)
{
OwnPtr<FFTFrame> newFrame = adoptPtr(new FFTFrame(frame1.fftSize()));
newFrame->interpolateFrequencyComponents(frame1, frame2, x);
// In the time-domain, the 2nd half of the response must be zero, to avoid circular convolution aliasing...
int fftSize = newFrame->fftSize();
AudioFloatArray buffer(fftSize);
newFrame->doInverseFFT(buffer.data());
buffer.zeroRange(fftSize / 2, fftSize);
// Put back into frequency domain.
newFrame->doFFT(buffer.data());
return newFrame.release();
}
void FFTFrame::interpolateFrequencyComponents(const FFTFrame& frame1, const FFTFrame& frame2, double interp)
{
// FIXME : with some work, this method could be optimized
float* realP = realData();
float* imagP = imagData();
const float* realP1 = frame1.realData();
const float* imagP1 = frame1.imagData();
const float* realP2 = frame2.realData();
const float* imagP2 = frame2.imagData();
m_FFTSize = frame1.fftSize();
m_log2FFTSize = frame1.log2FFTSize();
double s1base = (1.0 - interp);
double s2base = interp;
double phaseAccum = 0.0;
double lastPhase1 = 0.0;
double lastPhase2 = 0.0;
realP[0] = static_cast<float>(s1base * realP1[0] + s2base * realP2[0]);
imagP[0] = static_cast<float>(s1base * imagP1[0] + s2base * imagP2[0]);
int n = m_FFTSize / 2;
for (int i = 1; i < n; ++i) {
Complex c1(realP1[i], imagP1[i]);
Complex c2(realP2[i], imagP2[i]);
double mag1 = abs(c1);
double mag2 = abs(c2);
// Interpolate magnitudes in decibels
double mag1db = 20.0 * log10(mag1);
double mag2db = 20.0 * log10(mag2);
double s1 = s1base;
double s2 = s2base;
double magdbdiff = mag1db - mag2db;
// Empirical tweak to retain higher-frequency zeroes
double threshold = (i > 16) ? 5.0 : 2.0;
if (magdbdiff < -threshold && mag1db < 0.0) {
s1 = pow(s1, 0.75);
s2 = 1.0 - s1;
} else if (magdbdiff > threshold && mag2db < 0.0) {
s2 = pow(s2, 0.75);
s1 = 1.0 - s2;
}
// Average magnitude by decibels instead of linearly
double magdb = s1 * mag1db + s2 * mag2db;
double mag = pow(10.0, 0.05 * magdb);
// Now, deal with phase
double phase1 = arg(c1);
double phase2 = arg(c2);
double deltaPhase1 = phase1 - lastPhase1;
double deltaPhase2 = phase2 - lastPhase2;
lastPhase1 = phase1;
lastPhase2 = phase2;
// Unwrap phase deltas
if (deltaPhase1 > piDouble)
deltaPhase1 -= 2.0 * piDouble;
if (deltaPhase1 < -piDouble)
deltaPhase1 += 2.0 * piDouble;
if (deltaPhase2 > piDouble)
deltaPhase2 -= 2.0 * piDouble;
if (deltaPhase2 < -piDouble)
deltaPhase2 += 2.0 * piDouble;
// Blend group-delays
double deltaPhaseBlend;
if (deltaPhase1 - deltaPhase2 > piDouble)
deltaPhaseBlend = s1 * deltaPhase1 + s2 * (2.0 * piDouble + deltaPhase2);
else if (deltaPhase2 - deltaPhase1 > piDouble)
deltaPhaseBlend = s1 * (2.0 * piDouble + deltaPhase1) + s2 * deltaPhase2;
else
deltaPhaseBlend = s1 * deltaPhase1 + s2 * deltaPhase2;
phaseAccum += deltaPhaseBlend;
// Unwrap
if (phaseAccum > piDouble)
phaseAccum -= 2.0 * piDouble;
if (phaseAccum < -piDouble)
phaseAccum += 2.0 * piDouble;
Complex c = complexFromMagnitudePhase(mag, phaseAccum);
realP[i] = static_cast<float>(c.real());
imagP[i] = static_cast<float>(c.imag());
}
}
double FFTFrame::extractAverageGroupDelay()
{
float* realP = realData();
float* imagP = imagData();
double aveSum = 0.0;
double weightSum = 0.0;
double lastPhase = 0.0;
int halfSize = fftSize() / 2;
const double kSamplePhaseDelay = (2.0 * piDouble) / double(fftSize());
// Calculate weighted average group delay
for (int i = 1; i < halfSize; i++) {
Complex c(realP[i], imagP[i]);
double mag = abs(c);
double phase = arg(c);
double deltaPhase = phase - lastPhase;
lastPhase = phase;
// Unwrap
if (deltaPhase < -piDouble)
deltaPhase += 2.0 * piDouble;
if (deltaPhase > piDouble)
deltaPhase -= 2.0 * piDouble;
aveSum += mag * deltaPhase;
weightSum += mag;
}
// Note how we invert the phase delta wrt frequency since this is how group delay is defined
double ave = aveSum / weightSum;
double aveSampleDelay = -ave / kSamplePhaseDelay;
// Leave 20 sample headroom (for leading edge of impulse)
if (aveSampleDelay > 20.0)
aveSampleDelay -= 20.0;
// Remove average group delay (minus 20 samples for headroom)
addConstantGroupDelay(-aveSampleDelay);
// Remove DC offset
realP[0] = 0.0f;
return aveSampleDelay;
}
void FFTFrame::addConstantGroupDelay(double sampleFrameDelay)
{
int halfSize = fftSize() / 2;
float* realP = realData();
float* imagP = imagData();
const double kSamplePhaseDelay = (2.0 * piDouble) / double(fftSize());
double phaseAdj = -sampleFrameDelay * kSamplePhaseDelay;
// Add constant group delay
for (int i = 1; i < halfSize; i++) {
Complex c(realP[i], imagP[i]);
double mag = abs(c);
double phase = arg(c);
phase += i * phaseAdj;
Complex c2 = complexFromMagnitudePhase(mag, phase);
realP[i] = static_cast<float>(c2.real());
imagP[i] = static_cast<float>(c2.imag());
}
}
void FFTFrame::reportMemoryUsage(MemoryObjectInfo* memoryObjectInfo) const
{
MemoryClassInfo info(memoryObjectInfo, this, PlatformMemoryTypes::AudioSharedData);
#if USE_ACCELERATE_FFT
info.addMember(m_frame, "frame");
info.addMember(m_realData, "realData");
info.addMember(m_imagData, "imagData");
#else // !USE_ACCELERATE_FFT
#if USE(WEBAUDIO_FFMPEG)
info.addMember(m_forwardContext, "forwardContext");
info.addMember(m_inverseContext, "inverseContext");
info.addMember(m_complexData, "complexData");
info.addMember(m_realData, "realData");
info.addMember(m_imagData, "imagData");
#endif // USE(WEBAUDIO_FFMPEG)
#if USE(WEBAUDIO_IPP)
int size = 0;
ippsDFTGetBufSize_R_32f(m_DFTSpec, &size);
info.addRawBuffer(m_buffer, size * sizeof(Ipp8u), "buffer");
ippsDFTGetSize_R_32f(m_FFTSize, IPP_FFT_NODIV_BY_ANY, ippAlgHintFast, &size, 0, 0);
info.addRawBuffer(m_DFTSpec, size, "DFTSpec");
info.addMember(m_complexData, "complexData");
info.addMember(m_realData, "realData");
info.addMember(m_imagData, "imagData");
#endif // USE(WEBAUDIO_IPP)
#endif // !USE_ACCELERATE_FFT
}
#ifndef NDEBUG
void FFTFrame::print()
{
FFTFrame& frame = *this;
float* realP = frame.realData();
float* imagP = frame.imagData();
LOG(WebAudio, "**** \n");
LOG(WebAudio, "DC = %f : nyquist = %f\n", realP[0], imagP[0]);
int n = m_FFTSize / 2;
for (int i = 1; i < n; i++) {
double mag = sqrt(realP[i] * realP[i] + imagP[i] * imagP[i]);
double phase = atan2(realP[i], imagP[i]);
LOG(WebAudio, "[%d] (%f %f)\n", i, mag, phase);
}
LOG(WebAudio, "****\n");
}
#endif // NDEBUG
} // namespace WebCore
#endif // ENABLE(WEB_AUDIO)

View File

@ -66,6 +66,7 @@ CPP_SOURCES += [
'DelayNode.cpp',
'DynamicsCompressorNode.cpp',
'EnableWebAudioCheck.cpp',
'FFTBlock.cpp',
'GainNode.cpp',
'MediaBufferDecoder.cpp',
'MediaElementAudioSourceNode.cpp',