Bug 886886: replace fixed-ratio capture resampler in webrtc with speex resample r=derf,jmspeex

This commit is contained in:
Randell Jesup 2013-07-21 03:47:24 -04:00
parent 3af66c5a46
commit 03b686d2df
9 changed files with 268 additions and 1171 deletions

View File

@ -17,5 +17,5 @@
#
# Modifying this file will now automatically clobber the buildbot machines \o/
#
Bug 895670 - Remove SetupScreen and associated assets
Android-only resource clobber (just rm objdir/mobile/android/base/res)
Bug 886886 - replace fixed-ratio capture resampler in webrtc with speex resampler
Requires a clobber due to modification of a .gypi file without a .gyp or configure.in change

View File

@ -17,98 +17,48 @@
#define WEBRTC_RESAMPLER_RESAMPLER_H_
#include "typedefs.h"
#include "speex/speex_resampler.h"
namespace webrtc
{
// TODO(andrew): the implementation depends on the exact values of this enum.
// It should be rewritten in a less fragile way.
#define FIXED_RATE_RESAMPLER 0x10
enum ResamplerType
{
// 4 MSB = Number of channels
// 4 LSB = Synchronous or asynchronous
kResamplerSynchronous = 0x10,
kResamplerAsynchronous = 0x11,
kResamplerSynchronousStereo = 0x20,
kResamplerAsynchronousStereo = 0x21,
kResamplerInvalid = 0xff
};
// TODO(andrew): doesn't need to be part of the interface.
enum ResamplerMode
{
kResamplerMode1To1,
kResamplerMode1To2,
kResamplerMode1To3,
kResamplerMode1To4,
kResamplerMode1To6,
kResamplerMode1To12,
kResamplerMode2To3,
kResamplerMode2To11,
kResamplerMode4To11,
kResamplerMode8To11,
kResamplerMode11To16,
kResamplerMode11To32,
kResamplerMode2To1,
kResamplerMode3To1,
kResamplerMode4To1,
kResamplerMode6To1,
kResamplerMode12To1,
kResamplerMode3To2,
kResamplerMode11To2,
kResamplerMode11To4,
kResamplerMode11To8
kResamplerSynchronous = 0x00,
kResamplerSynchronousStereo = 0x01,
kResamplerFixedSynchronous = 0x00 | FIXED_RATE_RESAMPLER,
kResamplerFixedSynchronousStereo = 0x01 | FIXED_RATE_RESAMPLER,
};
class Resampler
{
public:
Resampler();
// TODO(andrew): use an init function instead.
Resampler(int inFreq, int outFreq, ResamplerType type);
Resampler(int in_freq, int out_freq, ResamplerType type);
~Resampler();
// Reset all states
int Reset(int inFreq, int outFreq, ResamplerType type);
int Reset(int in_freq, int out_freq, ResamplerType type);
// Reset all states if any parameter has changed
int ResetIfNeeded(int inFreq, int outFreq, ResamplerType type);
int ResetIfNeeded(int in_freq, int out_freq, ResamplerType type);
// Synchronous resampling, all output samples are written to samplesOut
int Push(const int16_t* samplesIn, int lengthIn, int16_t* samplesOut,
int maxLen, int &outLen);
// Asynchronous resampling, input
int Insert(int16_t* samplesIn, int lengthIn);
// Asynchronous resampling output, remaining samples are buffered
int Pull(int16_t* samplesOut, int desiredLen, int &outLen);
int Push(const int16_t* samples_in, int length_in,
int16_t* samples_out, int max_len, int &out_len);
private:
// Generic pointers since we don't know what states we'll need
void* state1_;
void* state2_;
void* state3_;
bool IsFixedRate() { return !!(type_ & FIXED_RATE_RESAMPLER); }
// Storage if needed
int16_t* in_buffer_;
int16_t* out_buffer_;
int in_buffer_size_;
int out_buffer_size_;
int in_buffer_size_max_;
int out_buffer_size_max_;
SpeexResamplerState* state_;
// State
int my_in_frequency_khz_;
int my_out_frequency_khz_;
ResamplerMode my_mode_;
ResamplerType my_type_;
// Extra instance for stereo
Resampler* slave_left_;
Resampler* slave_right_;
int in_freq_;
int out_freq_;
int channels_;
ResamplerType type_;
};
} // namespace webrtc

File diff suppressed because it is too large Load Diff

View File

@ -17,10 +17,24 @@
'include_dirs': [
'include',
],
'conditions': [
['build_with_mozilla==1', {
'include_dirs': [
'$(DEPTH)/dist/include',
],
}],
],
'direct_dependent_settings': {
'include_dirs': [
'include',
],
'conditions': [
['build_with_mozilla==1', {
'include_dirs': [
'$(DEPTH)/dist/include',
],
}],
],
},
'sources': [
'include/resampler.h',

View File

@ -8,6 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include <math.h>
#include "gtest/gtest.h"
#include "common_audio/resampler/include/resampler.h"
@ -18,10 +20,7 @@ namespace webrtc {
namespace {
const ResamplerType kTypes[] = {
kResamplerSynchronous,
kResamplerAsynchronous,
kResamplerSynchronousStereo,
kResamplerAsynchronousStereo
// kResamplerInvalid excluded
};
const size_t kTypesSize = sizeof(kTypes) / sizeof(*kTypes);
@ -31,7 +30,7 @@ const int kRates[] = {
8000,
16000,
32000,
44000,
44100,
48000,
kMaxRate
};
@ -39,26 +38,19 @@ const size_t kRatesSize = sizeof(kRates) / sizeof(*kRates);
const int kMaxChannels = 2;
const size_t kDataSize = static_cast<size_t> (kMaxChannels * kMaxRate / 100);
// TODO(andrew): should we be supporting these combinations?
bool ValidRates(int in_rate, int out_rate) {
// Not the most compact notation, for clarity.
if ((in_rate == 44000 && (out_rate == 48000 || out_rate == 96000)) ||
(out_rate == 44000 && (in_rate == 48000 || in_rate == 96000))) {
return false;
}
return true;
}
class ResamplerTest : public testing::Test {
protected:
ResamplerTest();
virtual void SetUp();
virtual void TearDown();
void RunResampleTest(int channels,
int src_sample_rate_hz,
int dst_sample_rate_hz);
Resampler rs_;
int16_t data_in_[kDataSize];
int16_t data_out_[kDataSize];
int16_t data_reference_[kDataSize];
};
ResamplerTest::ResamplerTest() {}
@ -83,34 +75,119 @@ TEST_F(ResamplerTest, Reset) {
ss << "Input rate: " << kRates[i] << ", output rate: " << kRates[j]
<< ", type: " << kTypes[k];
SCOPED_TRACE(ss.str());
if (ValidRates(kRates[i], kRates[j]))
EXPECT_EQ(0, rs_.Reset(kRates[i], kRates[j], kTypes[k]));
else
EXPECT_EQ(-1, rs_.Reset(kRates[i], kRates[j], kTypes[k]));
EXPECT_EQ(0, rs_.Reset(kRates[i], kRates[j], kTypes[k]));
}
}
}
}
// TODO(tlegrand): Replace code inside the two tests below with a function
// with number of channels and ResamplerType as input.
TEST_F(ResamplerTest, Synchronous) {
for (size_t i = 0; i < kRatesSize; ++i) {
for (size_t j = 0; j < kRatesSize; ++j) {
std::ostringstream ss;
ss << "Input rate: " << kRates[i] << ", output rate: " << kRates[j];
SCOPED_TRACE(ss.str());
// Sets the signal value to increase by |data| with every sample. Floats are
// used so non-integer values result in rounding error, but not an accumulating
// error.
void SetMonoFrame(int16_t* buffer, float data, int sample_rate_hz) {
for (int i = 0; i < sample_rate_hz / 100; i++) {
buffer[i] = data * i;
}
}
if (ValidRates(kRates[i], kRates[j])) {
int in_length = kRates[i] / 100;
int out_length = 0;
EXPECT_EQ(0, rs_.Reset(kRates[i], kRates[j], kResamplerSynchronous));
EXPECT_EQ(0, rs_.Push(data_in_, in_length, data_out_, kDataSize,
out_length));
EXPECT_EQ(kRates[j] / 100, out_length);
} else {
EXPECT_EQ(-1, rs_.Reset(kRates[i], kRates[j], kResamplerSynchronous));
}
// Sets the signal value to increase by |left| and |right| with every sample in
// each channel respectively.
void SetStereoFrame(int16_t* buffer, float left, float right,
int sample_rate_hz) {
for (int i = 0; i < sample_rate_hz / 100; i++) {
buffer[i * 2] = left * i;
buffer[i * 2 + 1] = right * i;
}
}
// Computes the best SNR based on the error between |ref_frame| and
// |test_frame|. It allows for a sample delay between the signals to
// compensate for the resampling delay.
float ComputeSNR(const int16_t* reference, const int16_t* test,
int sample_rate_hz, int channels, int max_delay) {
float best_snr = 0;
int best_delay = 0;
int samples_per_channel = sample_rate_hz/100;
for (int delay = 0; delay < max_delay; delay++) {
float mse = 0;
float variance = 0;
for (int i = 0; i < samples_per_channel * channels - delay; i++) {
int error = reference[i] - test[i + delay];
mse += error * error;
variance += reference[i] * reference[i];
}
float snr = 100; // We assign 100 dB to the zero-error case.
if (mse > 0)
snr = 10 * log10(variance / mse);
if (snr > best_snr) {
best_snr = snr;
best_delay = delay;
}
}
printf("SNR=%.1f dB at delay=%d\n", best_snr, best_delay);
return best_snr;
}
void ResamplerTest::RunResampleTest(int channels,
int src_sample_rate_hz,
int dst_sample_rate_hz) {
Resampler resampler; // Create a new one with every test.
const int16_t kSrcLeft = 60; // Shouldn't overflow for any used sample rate.
const int16_t kSrcRight = 30;
const float kResamplingFactor = (1.0 * src_sample_rate_hz) /
dst_sample_rate_hz;
const float kDstLeft = kResamplingFactor * kSrcLeft;
const float kDstRight = kResamplingFactor * kSrcRight;
if (channels == 1)
SetMonoFrame(data_in_, kSrcLeft, src_sample_rate_hz);
else
SetStereoFrame(data_in_, kSrcLeft, kSrcRight, src_sample_rate_hz);
if (channels == 1) {
SetMonoFrame(data_out_, 0, dst_sample_rate_hz);
SetMonoFrame(data_reference_, kDstLeft, dst_sample_rate_hz);
} else {
SetStereoFrame(data_out_, 0, 0, dst_sample_rate_hz);
SetStereoFrame(data_reference_, kDstLeft, kDstRight, dst_sample_rate_hz);
}
// The speex resampler has a known delay dependent on quality and rates,
// which we approximate here. Multiplying by two gives us a crude maximum
// for any resampling, as the old resampler typically (but not always)
// has lower delay. The actual delay is calculated internally based on the
// filter length in the QualityMap.
static const int kInputKernelDelaySamples = 16*3;
const int max_delay = std::min(1.0f, 1/kResamplingFactor) *
kInputKernelDelaySamples * channels * 2;
printf("(%d, %d Hz) -> (%d, %d Hz) ", // SNR reported on the same line later.
channels, src_sample_rate_hz, channels, dst_sample_rate_hz);
int in_length = channels * src_sample_rate_hz / 100;
int out_length = 0;
EXPECT_EQ(0, rs_.Reset(src_sample_rate_hz, dst_sample_rate_hz,
(channels == 1 ?
kResamplerSynchronous :
kResamplerSynchronousStereo)));
EXPECT_EQ(0, rs_.Push(data_in_, in_length, data_out_, kDataSize,
out_length));
EXPECT_EQ(channels * dst_sample_rate_hz / 100, out_length);
// EXPECT_EQ(0, Resample(src_frame_, &resampler, &dst_frame_));
EXPECT_GT(ComputeSNR(data_reference_, data_out_, dst_sample_rate_hz,
channels, max_delay), 40.0f);
}
TEST_F(ResamplerTest, Synchronous) {
// Number of channels is 1, mono mode.
const int kChannels = 1;
// We don't attempt to be exhaustive here, but just get good coverage. Some
// combinations of rates will not be resampled, and some give an odd
// resampling factor which makes it more difficult to evaluate.
const int kSampleRates[] = {16000, 32000, 44100, 48000};
const int kSampleRatesSize = sizeof(kSampleRates) / sizeof(*kSampleRates);
for (int src_rate = 0; src_rate < kSampleRatesSize; src_rate++) {
for (int dst_rate = 0; dst_rate < kSampleRatesSize; dst_rate++) {
RunResampleTest(kChannels, kSampleRates[src_rate], kSampleRates[dst_rate]);
}
}
}
@ -118,24 +195,14 @@ TEST_F(ResamplerTest, Synchronous) {
TEST_F(ResamplerTest, SynchronousStereo) {
// Number of channels is 2, stereo mode.
const int kChannels = 2;
for (size_t i = 0; i < kRatesSize; ++i) {
for (size_t j = 0; j < kRatesSize; ++j) {
std::ostringstream ss;
ss << "Input rate: " << kRates[i] << ", output rate: " << kRates[j];
SCOPED_TRACE(ss.str());
if (ValidRates(kRates[i], kRates[j])) {
int in_length = kChannels * kRates[i] / 100;
int out_length = 0;
EXPECT_EQ(0, rs_.Reset(kRates[i], kRates[j],
kResamplerSynchronousStereo));
EXPECT_EQ(0, rs_.Push(data_in_, in_length, data_out_, kDataSize,
out_length));
EXPECT_EQ(kChannels * kRates[j] / 100, out_length);
} else {
EXPECT_EQ(-1, rs_.Reset(kRates[i], kRates[j],
kResamplerSynchronousStereo));
}
// We don't attempt to be exhaustive here, but just get good coverage. Some
// combinations of rates will not be resampled, and some give an odd
// resampling factor which makes it more difficult to evaluate.
const int kSampleRates[] = {16000, 32000, 44100, 48000};
const int kSampleRatesSize = sizeof(kSampleRates) / sizeof(*kSampleRates);
for (int src_rate = 0; src_rate < kSampleRatesSize; src_rate++) {
for (int dst_rate = 0; dst_rate < kSampleRatesSize; dst_rate++) {
RunResampleTest(kChannels, kSampleRates[src_rate], kSampleRates[dst_rate]);
}
}
}

View File

@ -47,8 +47,8 @@ int16_t ACMResampler::Resample10Msec(const int16_t* in_audio,
int32_t ret;
ResamplerType type;
type = (num_audio_channels == 1) ? kResamplerSynchronous :
kResamplerSynchronousStereo;
type = (num_audio_channels == 1) ? kResamplerFixedSynchronous :
kResamplerFixedSynchronousStereo;
ret = resampler_.ResetIfNeeded(in_freq_hz, out_freq_hz, type);
if (ret < 0) {

View File

@ -35,7 +35,7 @@ int RemixAndResample(const AudioFrame& src_frame,
}
const ResamplerType resampler_type = audio_ptr_num_channels == 1 ?
kResamplerSynchronous : kResamplerSynchronousStereo;
kResamplerFixedSynchronous : kResamplerFixedSynchronousStereo;
if (resampler->ResetIfNeeded(src_frame.sample_rate_hz_,
dst_frame->sample_rate_hz_,
resampler_type) == -1) {

View File

@ -80,13 +80,14 @@ void VerifyParams(const AudioFrame& ref_frame, const AudioFrame& test_frame) {
}
// Computes the best SNR based on the error between |ref_frame| and
// |test_frame|. It allows for up to a 30 sample delay between the signals to
// |test_frame|. It allows for a sample delay between the signals to
// compensate for the resampling delay.
float ComputeSNR(const AudioFrame& ref_frame, const AudioFrame& test_frame) {
float ComputeSNR(const AudioFrame& ref_frame, const AudioFrame& test_frame,
int max_delay) {
VerifyParams(ref_frame, test_frame);
float best_snr = 0;
int best_delay = 0;
for (int delay = 0; delay < 30; delay++) {
for (int delay = 0; delay < max_delay; delay++) {
float mse = 0;
float variance = 0;
for (int i = 0; i < ref_frame.samples_per_channel_ *
@ -147,18 +148,23 @@ void OutputMixerTest::RunResampleTest(int src_channels,
SetStereoFrame(&golden_frame_, kDstLeft, kDstRight, dst_sample_rate_hz);
}
// The speex resampler has a known delay dependent on quality and rates,
// which we approximate here. Multiplying by two gives us a crude maximum
// for any resampling, as the old resampler typically (but not always)
// has lower delay. The actual delay is calculated internally based on the
// filter length in the QualityMap.
static const int kInputKernelDelaySamples = 16*3;
const int max_delay = std::min(1.0f, 1/kResamplingFactor) *
kInputKernelDelaySamples * dst_channels * 2;
printf("(%d, %d Hz) -> (%d, %d Hz) ", // SNR reported on the same line later.
src_channels, src_sample_rate_hz, dst_channels, dst_sample_rate_hz);
EXPECT_EQ(0, RemixAndResample(src_frame_, &resampler, &dst_frame_));
EXPECT_GT(ComputeSNR(golden_frame_, dst_frame_), 40.0f);
}
TEST_F(OutputMixerTest, RemixAndResampleFailsWithBadSampleRate) {
SetMonoFrame(&dst_frame_, 10, 44100);
EXPECT_EQ(-1, RemixAndResample(src_frame_, &resampler_, &dst_frame_));
VerifyFramesAreEqual(src_frame_, dst_frame_);
EXPECT_GT(ComputeSNR(golden_frame_, dst_frame_, max_delay), 40.0f);
}
// These two tests assume memcpy() (no delay and no filtering) for input
// freq == output freq && same channels. RemixAndResample uses 'Fixed'
// resamplers to enable this behavior
TEST_F(OutputMixerTest, RemixAndResampleCopyFrameSucceeds) {
// Stereo -> stereo.
SetStereoFrame(&src_frame_, 10, 10);
@ -193,7 +199,7 @@ TEST_F(OutputMixerTest, RemixAndResampleSucceeds) {
// We don't attempt to be exhaustive here, but just get good coverage. Some
// combinations of rates will not be resampled, and some give an odd
// resampling factor which makes it more difficult to evaluate.
const int kSampleRates[] = {16000, 32000, 48000};
const int kSampleRates[] = {16000, 32000, 44100, 48000};
const int kSampleRatesSize = sizeof(kSampleRates) / sizeof(*kSampleRates);
const int kChannels[] = {1, 2};
const int kChannelsSize = sizeof(kChannels) / sizeof(*kChannels);

View File

@ -1152,6 +1152,8 @@ bool TransmitMixer::IsRecordingMic()
}
// TODO(andrew): use RemixAndResample for this.
// Note that if drift compensation is done here, a buffering stage will be
// needed and this will need to switch to non-fixed resamples.
int TransmitMixer::GenerateAudioFrame(const int16_t audio[],
int samples_per_channel,
int num_channels,
@ -1179,7 +1181,7 @@ int TransmitMixer::GenerateAudioFrame(const int16_t audio[],
}
ResamplerType resampler_type = (num_channels == 1) ?
kResamplerSynchronous : kResamplerSynchronousStereo;
kResamplerFixedSynchronous : kResamplerFixedSynchronousStereo;
if (_audioResampler.ResetIfNeeded(sample_rate_hz,
destination_rate,