Merge pull request #82 from Themaister/neon_conv

Neon sample conversion
This commit is contained in:
Hans-Kristian Arntzen 2012-12-05 13:33:20 -08:00
commit b7e3bcc2c9
4 changed files with 125 additions and 6 deletions

View File

@ -20,6 +20,7 @@ ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
ifeq ($(HAVE_NEON),1)
LOCAL_CFLAGS += -DHAVE_NEON
LOCAL_SRC_FILES += ../../../audio/utils_neon.S.neon
endif
ifeq ($(HAVE_SINC),1)

View File

@ -15,9 +15,9 @@
#include "utils.h"
#if __SSE2__
#if defined(__SSE2__)
#include <emmintrin.h>
#elif __ALTIVEC__
#elif defined(__ALTIVEC__)
#include <altivec.h>
#endif
@ -39,7 +39,7 @@ void audio_convert_float_to_s16_C(int16_t *out,
}
}
#if __SSE2__
#if defined(__SSE2__)
void audio_convert_s16_to_float_SSE2(float *out,
const int16_t *in, size_t samples, float gain)
{
@ -84,7 +84,7 @@ void audio_convert_float_to_s16_SSE2(int16_t *out,
audio_convert_float_to_s16_C(out, in, samples - i);
}
#elif __ALTIVEC__
#elif defined(__ALTIVEC__)
void audio_convert_s16_to_float_altivec(float *out,
const int16_t *in, size_t samples, float gain)
{
@ -133,6 +133,28 @@ void audio_convert_float_to_s16_altivec(int16_t *out,
else
audio_convert_float_to_s16_C(out, in, samples);
}
#elif defined(HAVE_NEON)
void audio_convert_s16_float_asm(float *out, const int16_t *in, size_t samples);
void audio_convert_s16_to_float_neon(float *out, const int16_t *in, size_t samples,
float gain)
{
(void)gain; // gain is ignored for now.
size_t aligned_samples = samples & ~7;
audio_convert_s16_float_asm(out, in, aligned_samples);
// Could do all conversion in ASM, but keep it simple for now.
audio_convert_s16_to_float_C(out + aligned_samples, in + aligned_samples,
samples - aligned_samples, 1.0f);
}
void audio_convert_float_s16_asm(int16_t *out, const float *in, size_t samples);
void audio_convert_float_to_s16_neon(int16_t *out, const float *in, size_t samples)
{
size_t aligned_samples = samples & ~7;
audio_convert_float_s16_asm(out, in, aligned_samples);
audio_convert_float_to_s16_C(out + aligned_samples, in + aligned_samples,
samples - aligned_samples);
}
#endif

View File

@ -19,7 +19,7 @@
#include <stdint.h>
#include <stddef.h>
#if __SSE2__
#if defined(__SSE2__)
#define audio_convert_s16_to_float audio_convert_s16_to_float_SSE2
#define audio_convert_float_to_s16 audio_convert_float_to_s16_SSE2
@ -29,7 +29,7 @@ void audio_convert_s16_to_float_SSE2(float *out,
void audio_convert_float_to_s16_SSE2(int16_t *out,
const float *in, size_t samples);
#elif __ALTIVEC__
#elif defined(__ALTIVEC__)
#define audio_convert_s16_to_float audio_convert_s16_to_float_altivec
#define audio_convert_float_to_s16 audio_convert_float_to_s16_altivec
@ -39,6 +39,16 @@ void audio_convert_s16_to_float_altivec(float *out,
void audio_convert_float_to_s16_altivec(int16_t *out,
const float *in, size_t samples);
#elif defined(HAVE_NEON)
#define audio_convert_s16_to_float audio_convert_s16_to_float_neon
#define audio_convert_float_to_s16 audio_convert_float_to_s16_neon
void audio_convert_s16_to_float_neon(float *out,
const int16_t *in, size_t samples, float gain);
void audio_convert_float_to_s16_neon(int16_t *out,
const float *in, size_t samples);
#else
#define audio_convert_s16_to_float audio_convert_s16_to_float_C
#define audio_convert_float_to_s16 audio_convert_float_to_s16_C

86
audio/utils_neon.S Normal file
View File

@ -0,0 +1,86 @@
/* RetroArch - A frontend for libretro.
* Copyright (C) 2010-2012 - Hans-Kristian Arntzen
*
* RetroArch is free software: you can redistribute it and/or modify it under the terms
* of the GNU General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with RetroArch.
* If not, see <http://www.gnu.org/licenses/>.
*/
.arm
.align 4
.global audio_convert_s16_float_asm
# audio_convert_s16_float_asm(float *out, const int16_t *in, size_t samples)
audio_convert_s16_float_asm:
# Hacky way to get a constant of 2^-15.
# Might be faster to just load a constant from memory.
# It's just done once however ...
vmov.f32 q5, #0.25
vmul.f32 q5, q5, q5
vmul.f32 q5, q5, q5
vmul.f32 q5, q5, q5
vadd.f32 q5, q5, q5
1:
# Preload here?
vld1.s16 {q0}, [r1]!
# Widen to 32-bit
vmovl.s16 q1, d0
vmovl.s16 q2, d1
# Convert to float
vcvt.f32.s32 q3, q1
vcvt.f32.s32 q4, q2
vmul.f32 q3, q3, q5
vmul.f32 q4, q4, q5
vst1.f32 {q3-q4}, [r0]!
# Guaranteed to get samples in multiples of 8.
subs r2, r2, #8
bne 1b
bx lr
.align 4
.global audio_convert_float_s16_asm
# audio_convert_float_s16_asm(int16_t *out, const float *in, size_t samples)
audio_convert_float_s16_asm:
# Hacky way to get a constant of 2^15.
# ((2^4)^2)^2 * 0.5 = 2^15
vmov.f32 q5, #16.0
vmov.f32 q6, #0.5
vmul.f32 q5, q5, q5
vmul.f32 q5, q5, q5
vmul.f32 q5, q5, q6
1:
# Preload here?
vld1.f32 {q0-q1}, [r1]!
vmul.f32 q0, q0, q5
vmul.f32 q1, q1, q5
vcvt.s32.f32 q0, q0
vcvt.s32.f32 q1, q1
vqmovn.s32 d4, q0
vqmovn.s32 d5, q1
vst1.f32 {d4-d5}, [r0]!
# Guaranteed to get samples in multiples of 8.
subs r2, r2, #8
bne 1b
bx lr