/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "AudioSegment.h" #include "AudioStream.h" #include "AudioChannelFormat.h" #include "Latency.h" #include "speex/speex_resampler.h" namespace mozilla { template static void InterleaveAndConvertBuffer(const SrcT** aSourceChannels, int32_t aLength, float aVolume, int32_t aChannels, DestT* aOutput) { DestT* output = aOutput; for (int32_t i = 0; i < aLength; ++i) { for (int32_t channel = 0; channel < aChannels; ++channel) { float v = AudioSampleToFloat(aSourceChannels[channel][i])*aVolume; *output = FloatToAudioSample(v); ++output; } } } void InterleaveAndConvertBuffer(const void** aSourceChannels, AudioSampleFormat aSourceFormat, int32_t aLength, float aVolume, int32_t aChannels, AudioDataValue* aOutput) { switch (aSourceFormat) { case AUDIO_FORMAT_FLOAT32: InterleaveAndConvertBuffer(reinterpret_cast(aSourceChannels), aLength, aVolume, aChannels, aOutput); break; case AUDIO_FORMAT_S16: InterleaveAndConvertBuffer(reinterpret_cast(aSourceChannels), aLength, aVolume, aChannels, aOutput); break; } } void AudioSegment::ApplyVolume(float aVolume) { for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) { ci->mVolume *= aVolume; } } static const int AUDIO_PROCESSING_FRAMES = 640; /* > 10ms of 48KHz audio */ static const uint8_t gZeroChannel[MAX_AUDIO_SAMPLE_SIZE*AUDIO_PROCESSING_FRAMES] = {0}; void DownmixAndInterleave(const nsTArray& aChannelData, AudioSampleFormat aSourceFormat, int32_t aDuration, float aVolume, uint32_t aOutputChannels, AudioDataValue* aOutput) { nsAutoTArray channelData; nsAutoTArray downmixConversionBuffer; nsAutoTArray downmixOutputBuffer; channelData.SetLength(aChannelData.Length()); if (aSourceFormat != AUDIO_FORMAT_FLOAT32) { NS_ASSERTION(aSourceFormat == AUDIO_FORMAT_S16, "unknown format"); downmixConversionBuffer.SetLength(aDuration*aChannelData.Length()); for (uint32_t i = 0; i < aChannelData.Length(); ++i) { float* conversionBuf = downmixConversionBuffer.Elements() + (i*aDuration); const int16_t* sourceBuf = static_cast(aChannelData[i]); for (uint32_t j = 0; j < (uint32_t)aDuration; ++j) { conversionBuf[j] = AudioSampleToFloat(sourceBuf[j]); } channelData[i] = conversionBuf; } } else { for (uint32_t i = 0; i < aChannelData.Length(); ++i) { channelData[i] = aChannelData[i]; } } downmixOutputBuffer.SetLength(aDuration*aOutputChannels); nsAutoTArray outputChannelBuffers; nsAutoTArray outputChannelData; outputChannelBuffers.SetLength(aOutputChannels); outputChannelData.SetLength(aOutputChannels); for (uint32_t i = 0; i < (uint32_t)aOutputChannels; ++i) { outputChannelData[i] = outputChannelBuffers[i] = downmixOutputBuffer.Elements() + aDuration*i; } if (channelData.Length() > aOutputChannels) { AudioChannelsDownMix(channelData, outputChannelBuffers.Elements(), aOutputChannels, aDuration); } InterleaveAndConvertBuffer(outputChannelData.Elements(), AUDIO_FORMAT_FLOAT32, aDuration, aVolume, aOutputChannels, aOutput); } void AudioSegment::ResampleChunks(SpeexResamplerState* aResampler) { uint32_t inRate, outRate; if (mChunks.IsEmpty()) { return; } speex_resampler_get_rate(aResampler, &inRate, &outRate); switch (mChunks[0].mBufferFormat) { case AUDIO_FORMAT_FLOAT32: Resample(aResampler, inRate, outRate); break; case AUDIO_FORMAT_S16: Resample(aResampler, inRate, outRate); break; default: MOZ_ASSERT(false); break; } } void AudioSegment::WriteTo(uint64_t aID, AudioStream* aOutput) { uint32_t outputChannels = aOutput->GetChannels(); nsAutoTArray buf; nsAutoTArray channelData; for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) { AudioChunk& c = *ci; TrackTicks offset = 0; while (offset < c.mDuration) { TrackTicks durationTicks = std::min(c.mDuration - offset, AUDIO_PROCESSING_FRAMES); if (uint64_t(outputChannels)*durationTicks > INT32_MAX || offset > INT32_MAX) { NS_ERROR("Buffer overflow"); return; } uint32_t duration = uint32_t(durationTicks); // If we have written data in the past, or we have real (non-silent) data // to write, we can proceed. Otherwise, it means we just started the // AudioStream, and we don't have real data to write to it (just silence). // To avoid overbuffering in the AudioStream, we simply drop the silence, // here. The stream will underrun and output silence anyways. if (c.mBuffer || aOutput->GetWritten()) { buf.SetLength(outputChannels*duration); if (c.mBuffer) { channelData.SetLength(c.mChannelData.Length()); for (uint32_t i = 0; i < channelData.Length(); ++i) { channelData[i] = AddAudioSampleOffset(c.mChannelData[i], c.mBufferFormat, int32_t(offset)); } if (channelData.Length() < outputChannels) { // Up-mix. Note that this might actually make channelData have more // than outputChannels temporarily. AudioChannelsUpMix(&channelData, outputChannels, gZeroChannel); } if (channelData.Length() > outputChannels) { // Down-mix. DownmixAndInterleave(channelData, c.mBufferFormat, duration, c.mVolume, outputChannels, buf.Elements()); } else { InterleaveAndConvertBuffer(channelData.Elements(), c.mBufferFormat, duration, c.mVolume, outputChannels, buf.Elements()); } } else { // Assumes that a bit pattern of zeroes == 0.0f memset(buf.Elements(), 0, buf.Length()*sizeof(AudioDataValue)); } aOutput->Write(buf.Elements(), int32_t(duration), &(c.mTimeStamp)); } if(!c.mTimeStamp.IsNull()) { TimeStamp now = TimeStamp::Now(); // would be more efficient to c.mTimeStamp to ms on create time then pass here LogTime(AsyncLatencyLogger::AudioMediaStreamTrack, aID, (now - c.mTimeStamp).ToMilliseconds(), c.mTimeStamp); } offset += duration; } } aOutput->Start(); } }