Bug 1649817 - Verify audio buffers in drift correction tests, r=padenot

In AudioDriftCorrection gtests, the audio in the buffers is simulated by a sine tone of specific amplitude and frequency. This paych implements a way to verify automatically that the expected signal is being received in the output.

Differential Revision: https://phabricator.services.mozilla.com/D81988
This commit is contained in:
Alex Chronopoulos 2020-07-15 14:02:41 +00:00
parent 53bed70ef3
commit 70eee99400

View File

@ -199,13 +199,159 @@ TEST(TestClockDrift, SmallBufferedFrames)
EXPECT_FLOAT_EQ(c.GetCorrection(), 1.1);
}
template <typename T>
class AudioToneGenerator {
public:
static_assert(std::is_same<T, int16_t>::value ||
std::is_same<T, float>::value);
explicit AudioToneGenerator(int32_t aRate) : mRate(aRate) {
MOZ_ASSERT(mRate > 0);
}
void Write(AudioChunk& aChunk) {
float time = mTime;
for (uint32_t i = 0; i < aChunk.ChannelCount(); ++i) {
mTime = time; // reset the time for each channel
T* buffer = aChunk.ChannelDataForWrite<T>(0);
Write(buffer, aChunk.GetDuration());
}
}
void Write(T* aBuffer, int32_t aFrames) {
for (int i = 0; i < aFrames; ++i) {
double value = Amplitude() * sin(2 * M_PI * mFrequency * mTime + mPhase);
aBuffer[i] = static_cast<T>(value);
mTime += mDeltaTime;
}
}
T MaxMagnitudeDifference() {
return static_cast<T>(Amplitude() *
sin(2 * M_PI * mFrequency * mDeltaTime + mPhase));
}
const int32_t mRate;
const int32_t mFrequency = 100;
const float mPhase = 0.0;
const float mDeltaTime = 1.0f / mRate;
static T Amplitude() {
if (std::is_same<T, int16_t>::value) {
return 19384; // int16_t::max / 2
}
return 0.5f; // 1.0f / 2
}
private:
float mTime = 0.0;
};
template <typename T>
class AudioToneVerifier {
public:
explicit AudioToneVerifier(int32_t aRate)
: mRate(aRate), mExpectedTone(aRate) {
MOZ_ASSERT(mRate > 0);
}
// Only the mono channel is taken into account.
void AppendData(const AudioSegment& segment) {
for (AudioSegment::ConstChunkIterator iter(segment); !iter.IsEnded();
iter.Next()) {
const AudioChunk& c = *iter;
CheckBuffer(c.ChannelData<T>()[0], c.GetDuration());
}
}
int32_t EstimatedFreq() const {
if (mTotalFramesSoFar == PreSilenceSamples() || mZeroCrossCount <= 1) {
return 0.0f;
}
MOZ_ASSERT(mZeroCrossCount > 1);
return mRate / (mSumPeriodInSamples / (mZeroCrossCount - 1));
}
int64_t PreSilenceSamples() const {
// The first sample of the sinewave is zero.
MOZ_ASSERT(mPreSilenceSamples >= 1);
return mPreSilenceSamples - 1;
}
int32_t CountDiscontinuities() const {
// Every discontinuity is counted twice one on current sample and one more
// on previous sample.
return mDiscontinuitiesCount / 2;
}
private:
void CheckBuffer(const T* aBuffer, int32_t aSamples) {
for (int i = 0; i < aSamples; ++i) {
++mTotalFramesSoFar;
// Avoid pre-silence
if (!CountPreSilence(aBuffer[i])) {
CountZeroCrossing(aBuffer[i]);
CountDiscontinuities(aBuffer[i]);
}
mPrevious = aBuffer[i];
}
}
bool CountPreSilence(T aCurrentSample) {
if (IsZero(aCurrentSample) && mPreSilenceSamples == mTotalFramesSoFar - 1) {
++mPreSilenceSamples;
return true;
}
return false;
}
// Positive to negative direction
void CountZeroCrossing(T aCurrentSample) {
if (mPrevious > 0 && aCurrentSample <= 0) {
if (mZeroCrossCount++) {
MOZ_ASSERT(mZeroCrossCount > 1);
mSumPeriodInSamples += mTotalFramesSoFar - mLastZeroCrossPosition;
}
mLastZeroCrossPosition = mTotalFramesSoFar;
}
}
void CountDiscontinuities(T aCurrentSample) {
mDiscontinuitiesCount += fabs(fabs(aCurrentSample) - fabs(mPrevious)) >
2 * mExpectedTone.MaxMagnitudeDifference();
}
bool IsZero(float a) { return fabs(a) < 1e-8; }
bool IsZero(short a) { return a == 0; }
private:
const int32_t mRate;
AudioToneGenerator<T> mExpectedTone;
int32_t mZeroCrossCount = 0;
int64_t mLastZeroCrossPosition = 0;
int64_t mSumPeriodInSamples = 0;
int64_t mTotalFramesSoFar = 0;
int64_t mPreSilenceSamples = 0;
int32_t mDiscontinuitiesCount = 0;
// This is needed to connect previous the previous buffers.
T mPrevious = {};
};
// Print the mono channel of a segment.
void printAudioSegment(const AudioSegment& segment) {
for (AudioSegment::ConstChunkIterator iter(segment); !iter.IsEnded();
iter.Next()) {
const AudioChunk& c = *iter;
const float* buffer = c.ChannelData<float>()[0];
for (int i = 0; i < c.GetDuration(); ++i) {
printf("%f\n", buffer[i]);
if (c.mBufferFormat == AUDIO_FORMAT_FLOAT32) {
printf("%f\n", c.ChannelData<float>()[0][i]);
} else {
printf("%d\n", c.ChannelData<int16_t>()[0][i]);
}
}
}
}
@ -219,11 +365,9 @@ void testAudioCorrection(int32_t aSourceRate, int32_t aTargetRate) {
const int32_t sampleRateReceiver = aTargetRate;
AudioDriftCorrection ad(sampleRateTransmitter, sampleRateReceiver);
const float amplitude = 0.5;
const float frequency = 10;
const float phase = 0.0;
float time = 0.0;
const float deltaTime = 1.0f / sampleRateTransmitter;
AudioToneGenerator<AudioDataValue> tone(sampleRateTransmitter);
AudioToneVerifier<AudioDataValue> inToneVerifier(sampleRateTransmitter);
AudioToneVerifier<AudioDataValue> outToneVerifier(sampleRateReceiver);
int32_t sourceFrames;
const int32_t targetFrames = sampleRateReceiver / 100;
@ -239,16 +383,12 @@ void testAudioCorrection(int32_t aSourceRate, int32_t aTargetRate) {
for (int n = 0; n < 250; ++n) {
// Create the input (sine tone)
AudioChunk chunk =
CreateAudioChunk<float>(sourceFrames, 1, AUDIO_FORMAT_FLOAT32);
float* monoBuffer = chunk.ChannelDataForWrite<float>(0);
for (int i = 0; i < sourceFrames; ++i) {
double value = amplitude * sin(2 * M_PI * frequency * time + phase);
monoBuffer[i] = static_cast<float>(value);
time += deltaTime;
}
AudioChunk chunk = CreateAudioChunk<AudioDataValue>(sourceFrames, 1,
AUDIO_OUTPUT_FORMAT);
tone.Write(chunk);
AudioSegment inSegment;
inSegment.AppendAndConsumeChunk(&chunk);
inToneVerifier.AppendData(inSegment);
// Print the input for debugging
// printAudioSegment(inSegment);
@ -257,8 +397,18 @@ void testAudioCorrection(int32_t aSourceRate, int32_t aTargetRate) {
EXPECT_EQ(outSegment.GetDuration(), targetFrames);
// Print the output for debugging
// printAudioSegment(outSegment);
outToneVerifier.AppendData(outSegment);
}
}
EXPECT_EQ(inToneVerifier.EstimatedFreq(), tone.mFrequency);
EXPECT_EQ(inToneVerifier.PreSilenceSamples(), 0);
EXPECT_EQ(inToneVerifier.CountDiscontinuities(), 0);
EXPECT_EQ(outToneVerifier.EstimatedFreq(), tone.mFrequency);
// The expected pre-silence is 50ms plus the resampling, this is roughly more
// than 2000 frames for the samples rates being used here
EXPECT_GT(outToneVerifier.PreSilenceSamples(), 2000);
EXPECT_EQ(outToneVerifier.CountDiscontinuities(), 0);
}
TEST(TestAudioDriftCorrection, Basic)
@ -273,11 +423,9 @@ void testMonoToStereoInput(int aSourceRate, int aTargetRate) {
const int32_t sampleRateReceiver = aTargetRate;
AudioDriftCorrection ad(sampleRateTransmitter, sampleRateReceiver);
const float amplitude = 0.5;
const float frequency = 10;
const float phase = 0.0;
float time = 0.0;
const float deltaTime = 1.0f / sampleRateTransmitter;
AudioToneGenerator<AudioDataValue> tone(sampleRateTransmitter);
AudioToneVerifier<AudioDataValue> inToneVerify(sampleRateTransmitter);
AudioToneVerifier<AudioDataValue> outToneVerify(sampleRateReceiver);
int32_t sourceFrames;
const int32_t targetFrames = sampleRateReceiver / 100;
@ -293,25 +441,18 @@ void testMonoToStereoInput(int aSourceRate, int aTargetRate) {
for (int n = 0; n < 250; ++n) {
// Create the input (sine tone)
AudioChunk chunk =
CreateAudioChunk<float>(sourceFrames / 2, 1, AUDIO_FORMAT_FLOAT32);
float* monoBuffer = chunk.ChannelDataForWrite<float>(0);
for (int i = 0; i < chunk.GetDuration(); ++i) {
double value = amplitude * sin(2 * M_PI * frequency * time + phase);
monoBuffer[i] = static_cast<float>(value);
time += deltaTime;
}
AudioChunk chunk2 =
CreateAudioChunk<float>(sourceFrames / 2, 2, AUDIO_FORMAT_FLOAT32);
for (int i = 0; i < chunk2.GetDuration(); ++i) {
double value = amplitude * sin(2 * M_PI * frequency * time + phase);
chunk2.ChannelDataForWrite<float>(0)[i] =
chunk2.ChannelDataForWrite<float>(1)[i] = static_cast<float>(value);
time += deltaTime;
}
AudioChunk chunk = CreateAudioChunk<AudioDataValue>(sourceFrames / 2, 1,
AUDIO_OUTPUT_FORMAT);
tone.Write(chunk);
AudioChunk chunk2 = CreateAudioChunk<AudioDataValue>(sourceFrames / 2, 2,
AUDIO_OUTPUT_FORMAT);
tone.Write(chunk2);
AudioSegment inSegment;
inSegment.AppendAndConsumeChunk(&chunk);
inSegment.AppendAndConsumeChunk(&chunk2);
inToneVerify.AppendData(inSegment);
// Print the input for debugging
// printAudioSegment(inSegment);
@ -320,8 +461,21 @@ void testMonoToStereoInput(int aSourceRate, int aTargetRate) {
EXPECT_EQ(outSegment.GetDuration(), targetFrames);
// Print the output for debugging
// printAudioSegment(outSegment);
outToneVerify.AppendData(outSegment);
}
}
EXPECT_EQ(inToneVerify.EstimatedFreq(), tone.mFrequency);
EXPECT_EQ(inToneVerify.PreSilenceSamples(), 0);
EXPECT_EQ(inToneVerify.CountDiscontinuities(), 0);
EXPECT_GT(outToneVerify.CountDiscontinuities(), 0)
<< "Expect discontinuities";
EXPECT_NE(outToneVerify.EstimatedFreq(), tone.mFrequency)
<< "Estimation is not accurate due to discontinuities";
// The expected pre-silence is 50ms plus the resampling. However, due to
// discontinuities pre-silence is expected only in the first iteration which
// is routhly a little more than 400 frames for the chosen sample rates.
EXPECT_GT(outToneVerify.PreSilenceSamples(), 400);
}
TEST(TestAudioDriftCorrection, MonoToStereoInput)