Bug 877662 - Align audio buffer allocations to 16 byte boundaries r=padenot

To be able to use SSE2 routines, we need to audio buffers to be allocated
on 16 byte boundaries.

MozReview-Commit-ID: 2mjxMWqysFd

--HG--
extra : rebase_source : 8bd7d48b767b7bcfa5874061586b9b41c26a18ae
This commit is contained in:
Dan Minor 2016-04-13 15:31:50 -04:00
parent 7bd057f84a
commit 8c7cbbbf79
11 changed files with 73 additions and 39 deletions

View File

@ -5,6 +5,7 @@
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "AudioBlock.h"
#include "AlignmentUtils.h"
namespace mozilla {
@ -17,9 +18,7 @@ namespace mozilla {
* buffer can reuse and modify its contents next iteration if other references
* are all downstream temporary references held by AudioBlock.
*
* This only guarantees 4-byte alignment of the data. For alignment we simply
* assume that the memory from malloc is at least 4-byte aligned and that
* AudioBlockBuffer's size is divisible by 4.
* We guarantee 16 byte alignment of the channel data.
*/
class AudioBlockBuffer final : public ThreadSharedObject {
public:
@ -28,7 +27,9 @@ public:
float* ChannelData(uint32_t aChannel)
{
return reinterpret_cast<float*>(this + 1) + aChannel * WEBAUDIO_BLOCK_SIZE;
float* base = reinterpret_cast<float*>(((uintptr_t)(this + 1) + 15) & ~0x0F);
ASSERT_ALIGNED16(base);
return base + aChannel * WEBAUDIO_BLOCK_SIZE;
}
static already_AddRefed<AudioBlockBuffer> Create(uint32_t aChannelCount)
@ -37,9 +38,11 @@ public:
size *= aChannelCount;
size *= sizeof(float);
size += sizeof(AudioBlockBuffer);
size += 15; //padding for alignment
if (!size.isValid()) {
MOZ_CRASH();
}
void* m = moz_xmalloc(size.value());
RefPtr<AudioBlockBuffer> p = new (m) AudioBlockBuffer();
NS_ASSERTION((reinterpret_cast<char*>(p.get() + 1) - reinterpret_cast<char*>(p.get())) % 4 == 0,
@ -150,8 +153,6 @@ AudioBlock::AllocateChannels(uint32_t aChannelCount)
}
}
// XXX for SIMD purposes we should do something here to make sure the
// channel buffers are 16-byte aligned.
RefPtr<AudioBlockBuffer> buffer = AudioBlockBuffer::Create(aChannelCount);
mChannelData.SetLength(aChannelCount);
for (uint32_t i = 0; i < aChannelCount; ++i) {

View File

@ -5,6 +5,7 @@
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "AudioDestinationNode.h"
#include "AlignmentUtils.h"
#include "AudioContext.h"
#include "mozilla/dom/AudioDestinationNodeBinding.h"
#include "mozilla/dom/ScriptSettings.h"
@ -87,7 +88,7 @@ public:
PodZero(outputData, duration);
} else {
const float* inputBuffer = static_cast<const float*>(aInput.mChannelData[i]);
if (duration == WEBAUDIO_BLOCK_SIZE) {
if (duration == WEBAUDIO_BLOCK_SIZE && IS_ALIGNED16(inputBuffer)) {
// Use the optimized version of the copy with scale operation
AudioBlockCopyChannelWithScale(inputBuffer, aInput.mVolume,
outputData);

View File

@ -3,6 +3,8 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "AlignedTArray.h"
#include "AlignmentUtils.h"
#include "AudioNodeEngine.h"
#include "AudioNodeExternalInputStream.h"
#include "AudioChannelFormat.h"
@ -90,9 +92,20 @@ static void ConvertSegmentToAudioBlock(AudioSegment* aSegment,
NS_ASSERTION(!ci.IsEnded(), "Should be at least one chunk!");
if (ci->GetDuration() == WEBAUDIO_BLOCK_SIZE &&
(ci->IsNull() || ci->mBufferFormat == AUDIO_FORMAT_FLOAT32)) {
bool aligned = true;
for (size_t i = 0; i < ci->mChannelData.Length(); ++i) {
if (!IS_ALIGNED16(ci->mChannelData[i])) {
aligned = false;
break;
}
}
// Return this chunk directly to avoid copying data.
*aBlock = *ci;
return;
if (aligned) {
*aBlock = *ci;
return;
}
}
}
@ -192,7 +205,10 @@ AudioNodeExternalInputStream::ProcessInput(GraphTime aFrom, GraphTime aTo,
uint32_t accumulateIndex = 0;
if (inputChannels) {
AutoTArray<float,GUESS_AUDIO_CHANNELS*WEBAUDIO_BLOCK_SIZE> downmixBuffer;
// TODO: See Bug 1261168. Ideally we would use an aligned version of
// AutoTArray (of size GUESS_AUDIO_CHANNELS*WEBAUDIO_BLOCK_SIZE) here.
AlignedTArray<float,16> downmixBuffer;
downmixBuffer.SetLength(GUESS_AUDIO_CHANNELS*WEBAUDIO_BLOCK_SIZE);
for (uint32_t i = 0; i < audioSegments.Length(); ++i) {
AudioBlock tmpChunk;
ConvertSegmentToAudioBlock(&audioSegments[i], &tmpChunk, inputChannels);

View File

@ -453,8 +453,10 @@ AudioNodeStream::ObtainInputBlock(AudioBlock& aTmpChunk,
}
aTmpChunk.AllocateChannels(outputChannelCount);
// The static storage here should be 1KB, so it's fine
AutoTArray<float, GUESS_AUDIO_CHANNELS*WEBAUDIO_BLOCK_SIZE> downmixBuffer;
// TODO: See Bug 1261168. Ideally we would use an aligned version of
// AutoTArray (of size GUESS_AUDIO_CHANNELS*WEBAUDIO_BLOCK_SIZE) here.
AlignedTArray<float, 16> downmixBuffer;
downmixBuffer.SetLength(GUESS_AUDIO_CHANNELS*WEBAUDIO_BLOCK_SIZE);
for (uint32_t i = 0; i < inputChunkCount; ++i) {
AccumulateInputChunk(i, *inputChunks[i], &aTmpChunk, &downmixBuffer);
@ -465,7 +467,7 @@ void
AudioNodeStream::AccumulateInputChunk(uint32_t aInputIndex,
const AudioBlock& aChunk,
AudioBlock* aBlock,
nsTArray<float>* aDownmixBuffer)
AlignedTArray<float, 16>* aDownmixBuffer)
{
AutoTArray<const float*,GUESS_AUDIO_CHANNELS> channels;
UpMixDownMixChunk(&aChunk, aBlock->ChannelCount(), channels, *aDownmixBuffer);
@ -491,7 +493,7 @@ void
AudioNodeStream::UpMixDownMixChunk(const AudioBlock* aChunk,
uint32_t aOutputChannelCount,
nsTArray<const float*>& aOutputChannels,
nsTArray<float>& aDownmixBuffer)
AlignedTArray<float, 16>& aDownmixBuffer)
{
for (uint32_t i = 0; i < aChunk->ChannelCount(); i++) {
aOutputChannels.AppendElement(static_cast<const float*>(aChunk->mChannelData[i]));

View File

@ -8,6 +8,7 @@
#include "MediaStreamGraph.h"
#include "mozilla/dom/AudioNodeBinding.h"
#include "AlignedTArray.h"
#include "AudioBlock.h"
namespace mozilla {
@ -190,10 +191,10 @@ protected:
void FinishOutput();
void AccumulateInputChunk(uint32_t aInputIndex, const AudioBlock& aChunk,
AudioBlock* aBlock,
nsTArray<float>* aDownmixBuffer);
AlignedTArray<float, 16>* aDownmixBuffer);
void UpMixDownMixChunk(const AudioBlock* aChunk, uint32_t aOutputChannelCount,
nsTArray<const float*>& aOutputChannels,
nsTArray<float>& aDownmixBuffer);
AlignedTArray<float, 16>& aDownmixBuffer);
uint32_t ComputedNumberOfChannels(uint32_t aInputChannelCount);
void ObtainInputBlock(AudioBlock& aTmpChunk, uint32_t aPortIndex);

View File

@ -5,6 +5,7 @@
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "BiquadFilterNode.h"
#include "AlignmentUtils.h"
#include "AudioNodeEngine.h"
#include "AudioNodeStream.h"
#include "AudioDestinationNode.h"
@ -137,7 +138,9 @@ public:
AudioBlock* aOutput,
bool* aFinished) override
{
float inputBuffer[WEBAUDIO_BLOCK_SIZE];
float inputBuffer[WEBAUDIO_BLOCK_SIZE + 4];
float* alignedInputBuffer = ALIGNED16(inputBuffer);
ASSERT_ALIGNED16(alignedInputBuffer);
if (aInput.IsNull()) {
bool hasTail = false;
@ -191,12 +194,12 @@ public:
for (uint32_t i = 0; i < numberOfChannels; ++i) {
const float* input;
if (aInput.IsNull()) {
input = inputBuffer;
input = alignedInputBuffer;
} else {
input = static_cast<const float*>(aInput.mChannelData[i]);
if (aInput.mVolume != 1.0) {
AudioBlockCopyChannelWithScale(input, aInput.mVolume, inputBuffer);
input = inputBuffer;
AudioBlockCopyChannelWithScale(input, aInput.mVolume, alignedInputBuffer);
input = alignedInputBuffer;
}
}
SetParamsOnBiquad(mBiquads[i], aStream->SampleRate(), mType, freq, q, gain, detune);

View File

@ -6,6 +6,7 @@
#include "ConvolverNode.h"
#include "mozilla/dom/ConvolverNodeBinding.h"
#include "AlignmentUtils.h"
#include "AudioNodeEngine.h"
#include "AudioNodeStream.h"
#include "blink/Reverb.h"
@ -261,11 +262,13 @@ ConvolverNode::SetBuffer(JSContext* aCx, AudioBuffer* aBuffer, ErrorResult& aRv)
length = WEBAUDIO_BLOCK_SIZE;
RefPtr<ThreadSharedFloatArrayBufferList> paddedBuffer =
new ThreadSharedFloatArrayBufferList(data->GetChannels());
float* channelData = (float*) malloc(sizeof(float) * length * data->GetChannels());
void* channelData = malloc(sizeof(float) * length * data->GetChannels() + 15);
float* alignedChannelData = ALIGNED16(channelData);
ASSERT_ALIGNED16(alignedChannelData);
for (uint32_t i = 0; i < data->GetChannels(); ++i) {
PodCopy(channelData + length * i, data->GetData(i), mBuffer->Length());
PodZero(channelData + length * i + mBuffer->Length(), WEBAUDIO_BLOCK_SIZE - mBuffer->Length());
paddedBuffer->SetData(i, (i == 0) ? channelData : nullptr, free, channelData);
PodCopy(alignedChannelData + length * i, data->GetData(i), mBuffer->Length());
PodZero(alignedChannelData + length * i + mBuffer->Length(), WEBAUDIO_BLOCK_SIZE - mBuffer->Length());
paddedBuffer->SetData(i, (i == 0) ? channelData : nullptr, free, alignedChannelData);
}
data = paddedBuffer;
}

View File

@ -6,6 +6,7 @@
#include "GainNode.h"
#include "mozilla/dom/GainNodeBinding.h"
#include "AlignmentUtils.h"
#include "AudioNodeEngine.h"
#include "AudioNodeStream.h"
#include "AudioDestinationNode.h"
@ -79,18 +80,20 @@ public:
// Compute the gain values for the duration of the input AudioChunk
StreamTime tick = mDestination->GraphTimeToStreamTime(aFrom);
float computedGain[WEBAUDIO_BLOCK_SIZE];
mGain.GetValuesAtTime(tick, computedGain, WEBAUDIO_BLOCK_SIZE);
float computedGain[WEBAUDIO_BLOCK_SIZE + 4];
float* alignedComputedGain = ALIGNED16(computedGain);
ASSERT_ALIGNED16(alignedComputedGain);
mGain.GetValuesAtTime(tick, alignedComputedGain, WEBAUDIO_BLOCK_SIZE);
for (size_t counter = 0; counter < WEBAUDIO_BLOCK_SIZE; ++counter) {
computedGain[counter] *= aInput.mVolume;
alignedComputedGain[counter] *= aInput.mVolume;
}
// Apply the gain to the output buffer
for (size_t channel = 0; channel < aOutput->ChannelCount(); ++channel) {
const float* inputBuffer = static_cast<const float*> (aInput.mChannelData[channel]);
float* buffer = aOutput->ChannelFloatsForWrite(channel);
AudioBlockCopyChannelWithScale(inputBuffer, computedGain, buffer);
AudioBlockCopyChannelWithScale(inputBuffer, alignedComputedGain, buffer);
}
}
}

View File

@ -9,6 +9,7 @@
#include "AudioNodeEngine.h"
#include "AudioNodeStream.h"
#include "AudioDestinationNode.h"
#include "AlignmentUtils.h"
#include "WebAudioUtils.h"
#include "PanningUtils.h"
#include "AudioParamTimeline.h"
@ -137,24 +138,26 @@ public:
panning <= 0);
}
} else {
float computedGain[2][WEBAUDIO_BLOCK_SIZE];
float computedGain[2*WEBAUDIO_BLOCK_SIZE + 4];
bool onLeft[WEBAUDIO_BLOCK_SIZE];
float values[WEBAUDIO_BLOCK_SIZE];
StreamTime tick = mDestination->GraphTimeToStreamTime(aFrom);
mPan.GetValuesAtTime(tick, values, WEBAUDIO_BLOCK_SIZE);
float* alignedComputedGain = ALIGNED16(computedGain);
ASSERT_ALIGNED16(alignedComputedGain);
for (size_t counter = 0; counter < WEBAUDIO_BLOCK_SIZE; ++counter) {
float left, right;
GetGainValuesForPanning(values[counter], monoToStereo, left, right);
computedGain[0][counter] = left * aInput.mVolume;
computedGain[1][counter] = right * aInput.mVolume;
alignedComputedGain[counter] = left * aInput.mVolume;
alignedComputedGain[WEBAUDIO_BLOCK_SIZE + counter] = right * aInput.mVolume;
onLeft[counter] = values[counter] <= 0;
}
// Apply the gain to the output buffer
ApplyStereoPanning(aInput, aOutput, computedGain[0], computedGain[1], onLeft);
ApplyStereoPanning(aInput, aOutput, alignedComputedGain, &alignedComputedGain[WEBAUDIO_BLOCK_SIZE], onLeft);
}
}

View File

@ -6,6 +6,7 @@
#include "WaveShaperNode.h"
#include "mozilla/dom/WaveShaperNodeBinding.h"
#include "AlignmentUtils.h"
#include "AudioNode.h"
#include "AudioNodeEngine.h"
#include "AudioNodeStream.h"
@ -231,13 +232,15 @@ public:
aOutput->AllocateChannels(channelCount);
for (uint32_t i = 0; i < channelCount; ++i) {
const float* inputSamples;
float scaledInput[WEBAUDIO_BLOCK_SIZE];
float scaledInput[WEBAUDIO_BLOCK_SIZE + 4];
float* alignedScaledInput = ALIGNED16(scaledInput);
ASSERT_ALIGNED16(alignedScaledInput);
if (aInput.mVolume != 1.0f) {
AudioBlockCopyChannelWithScale(
static_cast<const float*>(aInput.mChannelData[i]),
aInput.mVolume,
scaledInput);
inputSamples = scaledInput;
alignedScaledInput);
inputSamples = alignedScaledInput;
} else {
inputSamples = static_cast<const float*>(aInput.mChannelData[i]);
}

View File

@ -29,13 +29,11 @@
#ifndef ReverbAccumulationBuffer_h
#define ReverbAccumulationBuffer_h
#include "nsTArray.h"
#include "AlignedTArray.h"
#include "mozilla/MemoryReporting.h"
namespace WebCore {
typedef nsTArray<float> AudioFloatArray;
// ReverbAccumulationBuffer is a circular delay buffer with one client reading from it and multiple clients
// writing/accumulating to it at different delay offsets from the read position. The read operation will zero the memory
// just read from the buffer, so it will be ready for accumulation the next time around.
@ -65,7 +63,7 @@ public:
}
private:
AudioFloatArray m_buffer;
AlignedTArray<float, 16> m_buffer;
size_t m_readIndex;
size_t m_readTimeFrame; // for debugging (frame on continuous timeline)
};