mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-10-21 17:25:36 +00:00
Bug 881587 - Use SSE2 version of AudioNodeEngine.cpp routines added in bug 815643. r=padenot
MozReview-Commit-ID: 3cfU3oTruAC --HG-- extra : rebase_source : 527061c1b0ff8b9905f6b91f5d29f61adbdfe2d2
This commit is contained in:
parent
ed065c9fa9
commit
d54abe3ef7
@ -135,6 +135,14 @@ BufferComplexMultiply(const float* aInput,
|
||||
float* aOutput,
|
||||
uint32_t aSize)
|
||||
{
|
||||
|
||||
#ifdef USE_SSE2
|
||||
if (mozilla::supports_sse()) {
|
||||
BufferComplexMultiply_SSE(aInput, aScale, aOutput, aSize);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (uint32_t i = 0; i < aSize * 2; i += 2) {
|
||||
float real1 = aInput[i];
|
||||
float imag1 = aInput[i + 1];
|
||||
@ -313,6 +321,27 @@ float
|
||||
AudioBufferSumOfSquares(const float* aInput, uint32_t aLength)
|
||||
{
|
||||
float sum = 0.0f;
|
||||
|
||||
#ifdef USE_SSE2
|
||||
if (mozilla::supports_sse()) {
|
||||
const float* alignedInput = ALIGNED16(aInput);
|
||||
float vLength = (aLength >> 4) << 4;
|
||||
|
||||
// use scalar operations for any unaligned data at the beginning
|
||||
while (aInput != alignedInput) {
|
||||
sum += *aInput * *aInput;
|
||||
++aInput;
|
||||
}
|
||||
|
||||
sum += AudioBufferSumOfSquares_SSE(alignedInput, vLength);
|
||||
|
||||
// adjust aInput and aLength to use scalar operations for any
|
||||
// remaining values
|
||||
aInput = alignedInput + 1;
|
||||
aLength -= vLength;
|
||||
}
|
||||
#endif
|
||||
|
||||
while (aLength--) {
|
||||
sum += *aInput * *aInput;
|
||||
++aInput;
|
||||
|
@ -223,6 +223,11 @@ void BufferComplexMultiply_SSE(const float* aInput,
|
||||
outreal0, outreal1, outreal2, outreal3,
|
||||
outimag0, outimag1, outimag2, outimag3;
|
||||
|
||||
ASSERT_ALIGNED16(aInput);
|
||||
ASSERT_ALIGNED16(aScale);
|
||||
ASSERT_ALIGNED16(aOutput);
|
||||
ASSERT_MULTIPLE16(aSize);
|
||||
|
||||
for (i = 0; i < aSize * 2; i += 16) {
|
||||
in0 = _mm_load_ps(&aInput[i]);
|
||||
in1 = _mm_load_ps(&aInput[i + 4]);
|
||||
@ -273,6 +278,9 @@ AudioBufferSumOfSquares_SSE(const float* aInput, uint32_t aLength)
|
||||
acc0, acc1, acc2, acc3;
|
||||
float out[4];
|
||||
|
||||
ASSERT_ALIGNED16(aInput);
|
||||
ASSERT_MULTIPLE16(aLength);
|
||||
|
||||
acc0 = _mm_setzero_ps();
|
||||
acc1 = _mm_setzero_ps();
|
||||
acc2 = _mm_setzero_ps();
|
||||
|
Loading…
Reference in New Issue
Block a user