Bug 881587 - Use SSE2 version of AudioNodeEngine.cpp routines added in bug 815643. r=padenot

MozReview-Commit-ID: 3cfU3oTruAC

--HG--
extra : rebase_source : 527061c1b0ff8b9905f6b91f5d29f61adbdfe2d2
This commit is contained in:
Dan Minor 2016-04-14 08:57:21 -04:00
parent ed065c9fa9
commit d54abe3ef7
2 changed files with 37 additions and 0 deletions

View File

@ -135,6 +135,14 @@ BufferComplexMultiply(const float* aInput,
float* aOutput,
uint32_t aSize)
{
#ifdef USE_SSE2
if (mozilla::supports_sse()) {
BufferComplexMultiply_SSE(aInput, aScale, aOutput, aSize);
return;
}
#endif
for (uint32_t i = 0; i < aSize * 2; i += 2) {
float real1 = aInput[i];
float imag1 = aInput[i + 1];
@ -313,6 +321,27 @@ float
AudioBufferSumOfSquares(const float* aInput, uint32_t aLength)
{
float sum = 0.0f;
#ifdef USE_SSE2
if (mozilla::supports_sse()) {
const float* alignedInput = ALIGNED16(aInput);
float vLength = (aLength >> 4) << 4;
// use scalar operations for any unaligned data at the beginning
while (aInput != alignedInput) {
sum += *aInput * *aInput;
++aInput;
}
sum += AudioBufferSumOfSquares_SSE(alignedInput, vLength);
// adjust aInput and aLength to use scalar operations for any
// remaining values
aInput = alignedInput + 1;
aLength -= vLength;
}
#endif
while (aLength--) {
sum += *aInput * *aInput;
++aInput;

View File

@ -223,6 +223,11 @@ void BufferComplexMultiply_SSE(const float* aInput,
outreal0, outreal1, outreal2, outreal3,
outimag0, outimag1, outimag2, outimag3;
ASSERT_ALIGNED16(aInput);
ASSERT_ALIGNED16(aScale);
ASSERT_ALIGNED16(aOutput);
ASSERT_MULTIPLE16(aSize);
for (i = 0; i < aSize * 2; i += 16) {
in0 = _mm_load_ps(&aInput[i]);
in1 = _mm_load_ps(&aInput[i + 4]);
@ -273,6 +278,9 @@ AudioBufferSumOfSquares_SSE(const float* aInput, uint32_t aLength)
acc0, acc1, acc2, acc3;
float out[4];
ASSERT_ALIGNED16(aInput);
ASSERT_MULTIPLE16(aLength);
acc0 = _mm_setzero_ps();
acc1 = _mm_setzero_ps();
acc2 = _mm_setzero_ps();