mirror of
https://github.com/CTCaer/RetroArch.git
synced 2025-01-21 02:04:40 +00:00
Merge pull request #82 from Themaister/neon_conv
Neon sample conversion
This commit is contained in:
commit
b7e3bcc2c9
@ -20,6 +20,7 @@ ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
|
||||
|
||||
ifeq ($(HAVE_NEON),1)
|
||||
LOCAL_CFLAGS += -DHAVE_NEON
|
||||
LOCAL_SRC_FILES += ../../../audio/utils_neon.S.neon
|
||||
endif
|
||||
|
||||
ifeq ($(HAVE_SINC),1)
|
||||
|
@ -15,9 +15,9 @@
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
#if __SSE2__
|
||||
#if defined(__SSE2__)
|
||||
#include <emmintrin.h>
|
||||
#elif __ALTIVEC__
|
||||
#elif defined(__ALTIVEC__)
|
||||
#include <altivec.h>
|
||||
#endif
|
||||
|
||||
@ -39,7 +39,7 @@ void audio_convert_float_to_s16_C(int16_t *out,
|
||||
}
|
||||
}
|
||||
|
||||
#if __SSE2__
|
||||
#if defined(__SSE2__)
|
||||
void audio_convert_s16_to_float_SSE2(float *out,
|
||||
const int16_t *in, size_t samples, float gain)
|
||||
{
|
||||
@ -84,7 +84,7 @@ void audio_convert_float_to_s16_SSE2(int16_t *out,
|
||||
|
||||
audio_convert_float_to_s16_C(out, in, samples - i);
|
||||
}
|
||||
#elif __ALTIVEC__
|
||||
#elif defined(__ALTIVEC__)
|
||||
void audio_convert_s16_to_float_altivec(float *out,
|
||||
const int16_t *in, size_t samples, float gain)
|
||||
{
|
||||
@ -133,6 +133,28 @@ void audio_convert_float_to_s16_altivec(int16_t *out,
|
||||
else
|
||||
audio_convert_float_to_s16_C(out, in, samples);
|
||||
}
|
||||
#elif defined(HAVE_NEON)
|
||||
void audio_convert_s16_float_asm(float *out, const int16_t *in, size_t samples);
|
||||
void audio_convert_s16_to_float_neon(float *out, const int16_t *in, size_t samples,
|
||||
float gain)
|
||||
{
|
||||
(void)gain; // gain is ignored for now.
|
||||
|
||||
size_t aligned_samples = samples & ~7;
|
||||
audio_convert_s16_float_asm(out, in, aligned_samples);
|
||||
|
||||
// Could do all conversion in ASM, but keep it simple for now.
|
||||
audio_convert_s16_to_float_C(out + aligned_samples, in + aligned_samples,
|
||||
samples - aligned_samples, 1.0f);
|
||||
}
|
||||
|
||||
void audio_convert_float_s16_asm(int16_t *out, const float *in, size_t samples);
|
||||
void audio_convert_float_to_s16_neon(int16_t *out, const float *in, size_t samples)
|
||||
{
|
||||
size_t aligned_samples = samples & ~7;
|
||||
audio_convert_float_s16_asm(out, in, aligned_samples);
|
||||
audio_convert_float_to_s16_C(out + aligned_samples, in + aligned_samples,
|
||||
samples - aligned_samples);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -19,7 +19,7 @@
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#if __SSE2__
|
||||
#if defined(__SSE2__)
|
||||
#define audio_convert_s16_to_float audio_convert_s16_to_float_SSE2
|
||||
#define audio_convert_float_to_s16 audio_convert_float_to_s16_SSE2
|
||||
|
||||
@ -29,7 +29,7 @@ void audio_convert_s16_to_float_SSE2(float *out,
|
||||
void audio_convert_float_to_s16_SSE2(int16_t *out,
|
||||
const float *in, size_t samples);
|
||||
|
||||
#elif __ALTIVEC__
|
||||
#elif defined(__ALTIVEC__)
|
||||
#define audio_convert_s16_to_float audio_convert_s16_to_float_altivec
|
||||
#define audio_convert_float_to_s16 audio_convert_float_to_s16_altivec
|
||||
|
||||
@ -39,6 +39,16 @@ void audio_convert_s16_to_float_altivec(float *out,
|
||||
void audio_convert_float_to_s16_altivec(int16_t *out,
|
||||
const float *in, size_t samples);
|
||||
|
||||
#elif defined(HAVE_NEON)
|
||||
#define audio_convert_s16_to_float audio_convert_s16_to_float_neon
|
||||
#define audio_convert_float_to_s16 audio_convert_float_to_s16_neon
|
||||
|
||||
void audio_convert_s16_to_float_neon(float *out,
|
||||
const int16_t *in, size_t samples, float gain);
|
||||
|
||||
void audio_convert_float_to_s16_neon(int16_t *out,
|
||||
const float *in, size_t samples);
|
||||
|
||||
#else
|
||||
#define audio_convert_s16_to_float audio_convert_s16_to_float_C
|
||||
#define audio_convert_float_to_s16 audio_convert_float_to_s16_C
|
||||
|
86
audio/utils_neon.S
Normal file
86
audio/utils_neon.S
Normal file
@ -0,0 +1,86 @@
|
||||
/* RetroArch - A frontend for libretro.
|
||||
* Copyright (C) 2010-2012 - Hans-Kristian Arntzen
|
||||
*
|
||||
* RetroArch is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with RetroArch.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
.arm
|
||||
|
||||
.align 4
|
||||
.global audio_convert_s16_float_asm
|
||||
# audio_convert_s16_float_asm(float *out, const int16_t *in, size_t samples)
|
||||
audio_convert_s16_float_asm:
|
||||
# Hacky way to get a constant of 2^-15.
|
||||
# Might be faster to just load a constant from memory.
|
||||
# It's just done once however ...
|
||||
vmov.f32 q5, #0.25
|
||||
vmul.f32 q5, q5, q5
|
||||
vmul.f32 q5, q5, q5
|
||||
vmul.f32 q5, q5, q5
|
||||
vadd.f32 q5, q5, q5
|
||||
|
||||
1:
|
||||
# Preload here?
|
||||
vld1.s16 {q0}, [r1]!
|
||||
|
||||
# Widen to 32-bit
|
||||
vmovl.s16 q1, d0
|
||||
vmovl.s16 q2, d1
|
||||
|
||||
# Convert to float
|
||||
vcvt.f32.s32 q3, q1
|
||||
vcvt.f32.s32 q4, q2
|
||||
|
||||
vmul.f32 q3, q3, q5
|
||||
vmul.f32 q4, q4, q5
|
||||
|
||||
vst1.f32 {q3-q4}, [r0]!
|
||||
|
||||
# Guaranteed to get samples in multiples of 8.
|
||||
subs r2, r2, #8
|
||||
bne 1b
|
||||
|
||||
bx lr
|
||||
|
||||
.align 4
|
||||
.global audio_convert_float_s16_asm
|
||||
# audio_convert_float_s16_asm(int16_t *out, const float *in, size_t samples)
|
||||
audio_convert_float_s16_asm:
|
||||
# Hacky way to get a constant of 2^15.
|
||||
# ((2^4)^2)^2 * 0.5 = 2^15
|
||||
vmov.f32 q5, #16.0
|
||||
vmov.f32 q6, #0.5
|
||||
vmul.f32 q5, q5, q5
|
||||
vmul.f32 q5, q5, q5
|
||||
vmul.f32 q5, q5, q6
|
||||
|
||||
1:
|
||||
# Preload here?
|
||||
vld1.f32 {q0-q1}, [r1]!
|
||||
|
||||
vmul.f32 q0, q0, q5
|
||||
vmul.f32 q1, q1, q5
|
||||
|
||||
vcvt.s32.f32 q0, q0
|
||||
vcvt.s32.f32 q1, q1
|
||||
|
||||
vqmovn.s32 d4, q0
|
||||
vqmovn.s32 d5, q1
|
||||
|
||||
vst1.f32 {d4-d5}, [r0]!
|
||||
|
||||
# Guaranteed to get samples in multiples of 8.
|
||||
subs r2, r2, #8
|
||||
bne 1b
|
||||
|
||||
bx lr
|
||||
|
Loading…
x
Reference in New Issue
Block a user