Bug 881587 - Use SSE2 version of AudioNodeEngine.cpp routines added in bug 815643. r=padenot

MozReview-Commit-ID: 3cfU3oTruAC --HG-- extra : rebase_source : 527061c1b0ff8b9905f6b91f5d29f61adbdfe2d2
2024-10-21 17:25:36 +00:00 · 2016-04-14 08:57:21 -04:00 · 2016-04-14 08:57:21 -04:00 · d54abe3ef7
commit d54abe3ef7
parent ed065c9fa9
2 changed files with 37 additions and 0 deletions
--- a/dom/media/webaudio/AudioNodeEngine.cpp
+++ b/dom/media/webaudio/AudioNodeEngine.cpp
@ -135,6 +135,14 @@ BufferComplexMultiply(const float* aInput,
                      float* aOutput,
                      uint32_t aSize)
 {
+
+#ifdef USE_SSE2
+  if (mozilla::supports_sse()) {
+    BufferComplexMultiply_SSE(aInput, aScale, aOutput, aSize);
+    return;
+  }
+#endif
+
  for (uint32_t i = 0; i < aSize * 2; i += 2) {
    float real1 = aInput[i];
    float imag1 = aInput[i + 1];
@ -313,6 +321,27 @@ float
 AudioBufferSumOfSquares(const float* aInput, uint32_t aLength)
 {
  float sum = 0.0f;
+
+#ifdef USE_SSE2
+  if (mozilla::supports_sse()) {
+    const float* alignedInput = ALIGNED16(aInput);
+    float vLength = (aLength >> 4) << 4;
+
+    // use scalar operations for any unaligned data at the beginning
+    while (aInput != alignedInput) {
+        sum += *aInput * *aInput;
+        ++aInput;
+    }
+
+    sum += AudioBufferSumOfSquares_SSE(alignedInput, vLength);
+
+    // adjust aInput and aLength to use scalar operations for any
+    // remaining values
+    aInput = alignedInput + 1;
+    aLength -= vLength;
+  }
+#endif
+
  while (aLength--) {
    sum += *aInput * *aInput;
    ++aInput;
--- a/dom/media/webaudio/AudioNodeEngineSSE2.cpp
+++ b/dom/media/webaudio/AudioNodeEngineSSE2.cpp
@ -223,6 +223,11 @@ void BufferComplexMultiply_SSE(const float* aInput,
         outreal0, outreal1, outreal2, outreal3,
         outimag0, outimag1, outimag2, outimag3;

+  ASSERT_ALIGNED16(aInput);
+  ASSERT_ALIGNED16(aScale);
+  ASSERT_ALIGNED16(aOutput);
+  ASSERT_MULTIPLE16(aSize);
+
  for (i = 0; i < aSize * 2; i += 16) {
    in0 = _mm_load_ps(&aInput[i]);
    in1 = _mm_load_ps(&aInput[i + 4]);
@ -273,6 +278,9 @@ AudioBufferSumOfSquares_SSE(const float* aInput, uint32_t aLength)
         acc0, acc1, acc2, acc3;
  float out[4];

+  ASSERT_ALIGNED16(aInput);
+  ASSERT_MULTIPLE16(aLength);
+
  acc0 = _mm_setzero_ps();
  acc1 = _mm_setzero_ps();
  acc2 = _mm_setzero_ps();