Bug 877662 - Align audio buffer allocations to 16 byte boundaries r=padenot

To be able to use SSE2 routines, we need to audio buffers to be allocated on 16 byte boundaries. MozReview-Commit-ID: 2mjxMWqysFd --HG-- extra : rebase_source : 8bd7d48b767b7bcfa5874061586b9b41c26a18ae
2024-10-14 22:05:44 +00:00 · 2016-04-13 15:31:50 -04:00 · 2016-04-13 15:31:50 -04:00 · 8c7cbbbf79
commit 8c7cbbbf79
parent 7bd057f84a
11 changed files with 73 additions and 39 deletions
--- a/dom/media/webaudio/AudioBlock.cpp
+++ b/dom/media/webaudio/AudioBlock.cpp
@ -5,6 +5,7 @@
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

 #include "AudioBlock.h"
+#include "AlignmentUtils.h"

 namespace mozilla {

@ -17,9 +18,7 @@ namespace mozilla {
 * buffer can reuse and modify its contents next iteration if other references
 * are all downstream temporary references held by AudioBlock.
 *
- * This only guarantees 4-byte alignment of the data. For alignment we simply
- * assume that the memory from malloc is at least 4-byte aligned and that
- * AudioBlockBuffer's size is divisible by 4.
+ * We guarantee 16 byte alignment of the channel data.
 */
 class AudioBlockBuffer final : public ThreadSharedObject {
 public:
@ -28,7 +27,9 @@ public:

  float* ChannelData(uint32_t aChannel)
  {
-    return reinterpret_cast<float*>(this + 1) + aChannel * WEBAUDIO_BLOCK_SIZE;
+    float* base = reinterpret_cast<float*>(((uintptr_t)(this + 1) + 15) & ~0x0F);
+    ASSERT_ALIGNED16(base);
+    return base + aChannel * WEBAUDIO_BLOCK_SIZE;
  }

  static already_AddRefed<AudioBlockBuffer> Create(uint32_t aChannelCount)
@ -37,9 +38,11 @@ public:
    size *= aChannelCount;
    size *= sizeof(float);
    size += sizeof(AudioBlockBuffer);
+    size += 15;  //padding for alignment
    if (!size.isValid()) {
      MOZ_CRASH();
    }
+
    void* m = moz_xmalloc(size.value());
    RefPtr<AudioBlockBuffer> p = new (m) AudioBlockBuffer();
    NS_ASSERTION((reinterpret_cast<char*>(p.get() + 1) - reinterpret_cast<char*>(p.get())) % 4 == 0,
@ -150,8 +153,6 @@ AudioBlock::AllocateChannels(uint32_t aChannelCount)
    }
  }

-  // XXX for SIMD purposes we should do something here to make sure the
-  // channel buffers are 16-byte aligned.
  RefPtr<AudioBlockBuffer> buffer = AudioBlockBuffer::Create(aChannelCount);
  mChannelData.SetLength(aChannelCount);
  for (uint32_t i = 0; i < aChannelCount; ++i) {
--- a/dom/media/webaudio/AudioDestinationNode.cpp
+++ b/dom/media/webaudio/AudioDestinationNode.cpp
@ -5,6 +5,7 @@
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

 #include "AudioDestinationNode.h"
+#include "AlignmentUtils.h"
 #include "AudioContext.h"
 #include "mozilla/dom/AudioDestinationNodeBinding.h"
 #include "mozilla/dom/ScriptSettings.h"
@ -87,7 +88,7 @@ public:
        PodZero(outputData, duration);
      } else {
        const float* inputBuffer = static_cast<const float*>(aInput.mChannelData[i]);
-        if (duration == WEBAUDIO_BLOCK_SIZE) {
+        if (duration == WEBAUDIO_BLOCK_SIZE && IS_ALIGNED16(inputBuffer)) {
          // Use the optimized version of the copy with scale operation
          AudioBlockCopyChannelWithScale(inputBuffer, aInput.mVolume,
                                         outputData);
--- a/dom/media/webaudio/AudioNodeExternalInputStream.cpp
+++ b/dom/media/webaudio/AudioNodeExternalInputStream.cpp
@ -3,6 +3,8 @@
 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
 * You can obtain one at http://mozilla.org/MPL/2.0/. */

+#include "AlignedTArray.h"
+#include "AlignmentUtils.h"
 #include "AudioNodeEngine.h"
 #include "AudioNodeExternalInputStream.h"
 #include "AudioChannelFormat.h"
@ -90,9 +92,20 @@ static void ConvertSegmentToAudioBlock(AudioSegment* aSegment,
    NS_ASSERTION(!ci.IsEnded(), "Should be at least one chunk!");
    if (ci->GetDuration() == WEBAUDIO_BLOCK_SIZE &&
        (ci->IsNull() || ci->mBufferFormat == AUDIO_FORMAT_FLOAT32)) {
+
+      bool aligned = true;
+      for (size_t i = 0; i < ci->mChannelData.Length(); ++i) {
+        if (!IS_ALIGNED16(ci->mChannelData[i])) {
+            aligned = false;
+            break;
+        }
+      }
+
      // Return this chunk directly to avoid copying data.
-      *aBlock = *ci;
-      return;
+      if (aligned) {
+        *aBlock = *ci;
+        return;
+      }
    }
  }

@ -192,7 +205,10 @@ AudioNodeExternalInputStream::ProcessInput(GraphTime aFrom, GraphTime aTo,

  uint32_t accumulateIndex = 0;
  if (inputChannels) {
-    AutoTArray<float,GUESS_AUDIO_CHANNELS*WEBAUDIO_BLOCK_SIZE> downmixBuffer;
+    // TODO: See Bug 1261168. Ideally we would use an aligned version of
+    // AutoTArray (of size GUESS_AUDIO_CHANNELS*WEBAUDIO_BLOCK_SIZE) here.
+    AlignedTArray<float,16> downmixBuffer;
+    downmixBuffer.SetLength(GUESS_AUDIO_CHANNELS*WEBAUDIO_BLOCK_SIZE);
    for (uint32_t i = 0; i < audioSegments.Length(); ++i) {
      AudioBlock tmpChunk;
      ConvertSegmentToAudioBlock(&audioSegments[i], &tmpChunk, inputChannels);
--- a/dom/media/webaudio/AudioNodeStream.cpp
+++ b/dom/media/webaudio/AudioNodeStream.cpp
@ -453,8 +453,10 @@ AudioNodeStream::ObtainInputBlock(AudioBlock& aTmpChunk,
  }

  aTmpChunk.AllocateChannels(outputChannelCount);
-  // The static storage here should be 1KB, so it's fine
-  AutoTArray<float, GUESS_AUDIO_CHANNELS*WEBAUDIO_BLOCK_SIZE> downmixBuffer;
+  // TODO: See Bug 1261168. Ideally we would use an aligned version of
+  // AutoTArray (of size GUESS_AUDIO_CHANNELS*WEBAUDIO_BLOCK_SIZE) here.
+  AlignedTArray<float, 16> downmixBuffer;
+  downmixBuffer.SetLength(GUESS_AUDIO_CHANNELS*WEBAUDIO_BLOCK_SIZE);

  for (uint32_t i = 0; i < inputChunkCount; ++i) {
    AccumulateInputChunk(i, *inputChunks[i], &aTmpChunk, &downmixBuffer);
@ -465,7 +467,7 @@ void
 AudioNodeStream::AccumulateInputChunk(uint32_t aInputIndex,
                                      const AudioBlock& aChunk,
                                      AudioBlock* aBlock,
-                                      nsTArray<float>* aDownmixBuffer)
+                                      AlignedTArray<float, 16>* aDownmixBuffer)
 {
  AutoTArray<const float*,GUESS_AUDIO_CHANNELS> channels;
  UpMixDownMixChunk(&aChunk, aBlock->ChannelCount(), channels, *aDownmixBuffer);
@ -491,7 +493,7 @@ void
 AudioNodeStream::UpMixDownMixChunk(const AudioBlock* aChunk,
                                   uint32_t aOutputChannelCount,
                                   nsTArray<const float*>& aOutputChannels,
-                                   nsTArray<float>& aDownmixBuffer)
+                                   AlignedTArray<float, 16>& aDownmixBuffer)
 {
  for (uint32_t i = 0; i < aChunk->ChannelCount(); i++) {
    aOutputChannels.AppendElement(static_cast<const float*>(aChunk->mChannelData[i]));
--- a/dom/media/webaudio/AudioNodeStream.h
+++ b/dom/media/webaudio/AudioNodeStream.h
@ -8,6 +8,7 @@

 #include "MediaStreamGraph.h"
 #include "mozilla/dom/AudioNodeBinding.h"
+#include "AlignedTArray.h"
 #include "AudioBlock.h"

 namespace mozilla {
@ -190,10 +191,10 @@ protected:
  void FinishOutput();
  void AccumulateInputChunk(uint32_t aInputIndex, const AudioBlock& aChunk,
                            AudioBlock* aBlock,
-                            nsTArray<float>* aDownmixBuffer);
+                            AlignedTArray<float, 16>* aDownmixBuffer);
  void UpMixDownMixChunk(const AudioBlock* aChunk, uint32_t aOutputChannelCount,
                         nsTArray<const float*>& aOutputChannels,
-                         nsTArray<float>& aDownmixBuffer);
+                         AlignedTArray<float, 16>& aDownmixBuffer);

  uint32_t ComputedNumberOfChannels(uint32_t aInputChannelCount);
  void ObtainInputBlock(AudioBlock& aTmpChunk, uint32_t aPortIndex);
--- a/dom/media/webaudio/BiquadFilterNode.cpp
+++ b/dom/media/webaudio/BiquadFilterNode.cpp
@ -5,6 +5,7 @@
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

 #include "BiquadFilterNode.h"
+#include "AlignmentUtils.h"
 #include "AudioNodeEngine.h"
 #include "AudioNodeStream.h"
 #include "AudioDestinationNode.h"
@ -137,7 +138,9 @@ public:
                    AudioBlock* aOutput,
                    bool* aFinished) override
  {
-    float inputBuffer[WEBAUDIO_BLOCK_SIZE];
+    float inputBuffer[WEBAUDIO_BLOCK_SIZE + 4];
+    float* alignedInputBuffer = ALIGNED16(inputBuffer);
+    ASSERT_ALIGNED16(alignedInputBuffer);

    if (aInput.IsNull()) {
      bool hasTail = false;
@ -191,12 +194,12 @@ public:
    for (uint32_t i = 0; i < numberOfChannels; ++i) {
      const float* input;
      if (aInput.IsNull()) {
-        input = inputBuffer;
+        input = alignedInputBuffer;
      } else {
        input = static_cast<const float*>(aInput.mChannelData[i]);
        if (aInput.mVolume != 1.0) {
-          AudioBlockCopyChannelWithScale(input, aInput.mVolume, inputBuffer);
-          input = inputBuffer;
+          AudioBlockCopyChannelWithScale(input, aInput.mVolume, alignedInputBuffer);
+          input = alignedInputBuffer;
        }
      }
      SetParamsOnBiquad(mBiquads[i], aStream->SampleRate(), mType, freq, q, gain, detune);
--- a/dom/media/webaudio/ConvolverNode.cpp
+++ b/dom/media/webaudio/ConvolverNode.cpp
@ -6,6 +6,7 @@

 #include "ConvolverNode.h"
 #include "mozilla/dom/ConvolverNodeBinding.h"
+#include "AlignmentUtils.h"
 #include "AudioNodeEngine.h"
 #include "AudioNodeStream.h"
 #include "blink/Reverb.h"
@ -261,11 +262,13 @@ ConvolverNode::SetBuffer(JSContext* aCx, AudioBuffer* aBuffer, ErrorResult& aRv)
      length = WEBAUDIO_BLOCK_SIZE;
      RefPtr<ThreadSharedFloatArrayBufferList> paddedBuffer =
        new ThreadSharedFloatArrayBufferList(data->GetChannels());
-      float* channelData = (float*) malloc(sizeof(float) * length * data->GetChannels());
+      void* channelData = malloc(sizeof(float) * length * data->GetChannels() + 15);
+      float* alignedChannelData = ALIGNED16(channelData);
+      ASSERT_ALIGNED16(alignedChannelData);
      for (uint32_t i = 0; i < data->GetChannels(); ++i) {
-        PodCopy(channelData + length * i, data->GetData(i), mBuffer->Length());
-        PodZero(channelData + length * i + mBuffer->Length(), WEBAUDIO_BLOCK_SIZE - mBuffer->Length());
-        paddedBuffer->SetData(i, (i == 0) ? channelData : nullptr, free, channelData);
+        PodCopy(alignedChannelData + length * i, data->GetData(i), mBuffer->Length());
+        PodZero(alignedChannelData + length * i + mBuffer->Length(), WEBAUDIO_BLOCK_SIZE - mBuffer->Length());
+        paddedBuffer->SetData(i, (i == 0) ? channelData : nullptr, free, alignedChannelData);
      }
      data = paddedBuffer;
    }
--- a/dom/media/webaudio/GainNode.cpp
+++ b/dom/media/webaudio/GainNode.cpp
@ -6,6 +6,7 @@

 #include "GainNode.h"
 #include "mozilla/dom/GainNodeBinding.h"
+#include "AlignmentUtils.h"
 #include "AudioNodeEngine.h"
 #include "AudioNodeStream.h"
 #include "AudioDestinationNode.h"
@ -79,18 +80,20 @@ public:

      // Compute the gain values for the duration of the input AudioChunk
      StreamTime tick = mDestination->GraphTimeToStreamTime(aFrom);
-      float computedGain[WEBAUDIO_BLOCK_SIZE];
-      mGain.GetValuesAtTime(tick, computedGain, WEBAUDIO_BLOCK_SIZE);
+      float computedGain[WEBAUDIO_BLOCK_SIZE + 4];
+      float* alignedComputedGain = ALIGNED16(computedGain);
+      ASSERT_ALIGNED16(alignedComputedGain);
+      mGain.GetValuesAtTime(tick, alignedComputedGain, WEBAUDIO_BLOCK_SIZE);

      for (size_t counter = 0; counter < WEBAUDIO_BLOCK_SIZE; ++counter) {
-        computedGain[counter] *= aInput.mVolume;
+        alignedComputedGain[counter] *= aInput.mVolume;
      }

      // Apply the gain to the output buffer
      for (size_t channel = 0; channel < aOutput->ChannelCount(); ++channel) {
        const float* inputBuffer = static_cast<const float*> (aInput.mChannelData[channel]);
        float* buffer = aOutput->ChannelFloatsForWrite(channel);
-        AudioBlockCopyChannelWithScale(inputBuffer, computedGain, buffer);
+        AudioBlockCopyChannelWithScale(inputBuffer, alignedComputedGain, buffer);
      }
    }
  }
--- a/dom/media/webaudio/StereoPannerNode.cpp
+++ b/dom/media/webaudio/StereoPannerNode.cpp
@ -9,6 +9,7 @@
 #include "AudioNodeEngine.h"
 #include "AudioNodeStream.h"
 #include "AudioDestinationNode.h"
+#include "AlignmentUtils.h"
 #include "WebAudioUtils.h"
 #include "PanningUtils.h"
 #include "AudioParamTimeline.h"
@ -137,24 +138,26 @@ public:
                           panning <= 0);
      }
    } else {
-      float computedGain[2][WEBAUDIO_BLOCK_SIZE];
+      float computedGain[2*WEBAUDIO_BLOCK_SIZE + 4];
      bool onLeft[WEBAUDIO_BLOCK_SIZE];

      float values[WEBAUDIO_BLOCK_SIZE];
      StreamTime tick = mDestination->GraphTimeToStreamTime(aFrom);
      mPan.GetValuesAtTime(tick, values, WEBAUDIO_BLOCK_SIZE);

+      float* alignedComputedGain = ALIGNED16(computedGain);
+      ASSERT_ALIGNED16(alignedComputedGain);
      for (size_t counter = 0; counter < WEBAUDIO_BLOCK_SIZE; ++counter) {
        float left, right;
        GetGainValuesForPanning(values[counter], monoToStereo, left, right);

-        computedGain[0][counter] = left * aInput.mVolume;
-        computedGain[1][counter] = right * aInput.mVolume;
+        alignedComputedGain[counter] = left * aInput.mVolume;
+        alignedComputedGain[WEBAUDIO_BLOCK_SIZE + counter] = right * aInput.mVolume;
        onLeft[counter] = values[counter] <= 0;
      }

      // Apply the gain to the output buffer
-      ApplyStereoPanning(aInput, aOutput, computedGain[0], computedGain[1], onLeft);
+      ApplyStereoPanning(aInput, aOutput, alignedComputedGain, &alignedComputedGain[WEBAUDIO_BLOCK_SIZE], onLeft);
    }
  }

--- a/dom/media/webaudio/WaveShaperNode.cpp
+++ b/dom/media/webaudio/WaveShaperNode.cpp
@ -6,6 +6,7 @@

 #include "WaveShaperNode.h"
 #include "mozilla/dom/WaveShaperNodeBinding.h"
+#include "AlignmentUtils.h"
 #include "AudioNode.h"
 #include "AudioNodeEngine.h"
 #include "AudioNodeStream.h"
@ -231,13 +232,15 @@ public:
    aOutput->AllocateChannels(channelCount);
    for (uint32_t i = 0; i < channelCount; ++i) {
      const float* inputSamples;
-      float scaledInput[WEBAUDIO_BLOCK_SIZE];
+      float scaledInput[WEBAUDIO_BLOCK_SIZE + 4];
+      float* alignedScaledInput = ALIGNED16(scaledInput);
+      ASSERT_ALIGNED16(alignedScaledInput);
      if (aInput.mVolume != 1.0f) {
        AudioBlockCopyChannelWithScale(
            static_cast<const float*>(aInput.mChannelData[i]),
                                      aInput.mVolume,
-                                      scaledInput);
-        inputSamples = scaledInput;
+                                      alignedScaledInput);
+        inputSamples = alignedScaledInput;
      } else {
        inputSamples = static_cast<const float*>(aInput.mChannelData[i]);
      }
--- a/dom/media/webaudio/blink/ReverbAccumulationBuffer.h
+++ b/dom/media/webaudio/blink/ReverbAccumulationBuffer.h
@ -29,13 +29,11 @@
 #ifndef ReverbAccumulationBuffer_h
 #define ReverbAccumulationBuffer_h

-#include "nsTArray.h"
+#include "AlignedTArray.h"
 #include "mozilla/MemoryReporting.h"

 namespace WebCore {

-typedef nsTArray<float> AudioFloatArray;
-
 // ReverbAccumulationBuffer is a circular delay buffer with one client reading from it and multiple clients
 // writing/accumulating to it at different delay offsets from the read position.  The read operation will zero the memory
 // just read from the buffer, so it will be ready for accumulation the next time around.
@ -65,7 +63,7 @@ public:
    }

 private:
-    AudioFloatArray m_buffer;
+    AlignedTArray<float, 16> m_buffer;
    size_t m_readIndex;
    size_t m_readTimeFrame; // for debugging (frame on continuous timeline)
 };