From 2b04e1ece6def1bd836dee9c223f5c1be59c403e Mon Sep 17 00:00:00 2001 From: Themaister Date: Mon, 3 Dec 2012 00:22:07 +0100 Subject: [PATCH 1/7] Start adding NEON sinc. --- audio/sinc.c | 14 +++++++++++++ audio/sinc_neon.S | 52 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100644 audio/sinc_neon.S diff --git a/audio/sinc.c b/audio/sinc.c index 4041e8caed..e4ab461755 100644 --- a/audio/sinc.c +++ b/audio/sinc.c @@ -123,6 +123,8 @@ rarch_resampler_t *resampler_new(void) RARCH_LOG("Sinc resampler [AVX]\n"); #elif defined(__SSE__) RARCH_LOG("Sinc resampler [SSE]\n"); +#elif defined(HAVE_NEON) + RARCH_LOG("Sinc resampler [NEON]\n"); #else RARCH_LOG("Sinc resampler [C]\n"); #endif @@ -208,6 +210,18 @@ static void process_sinc(rarch_resampler_t *resamp, float *out_buffer) // movehl { X, R, X, L } == { X, R, X, R } _mm_store_ss(out_buffer + 1, _mm_movehl_ps(sum, sum)); } +#elif defined(HAVE_NEON) +void process_sinc_neon_asm(float *out, const float *left, const float *right, const float *coeff); +static void process_sinc(rarch_resampler_t *resamp, float *out_buffer) +{ + const float *buffer_l = resamp->buffer_l + resamp->ptr; + const float *buffer_r = resamp->buffer_r + resamp->ptr; + + unsigned phase = resamp->time >> SUBPHASE_BITS; + const float *phase_table = resamp->phase_table[phase]; + + process_sinc_neon_asm(out_buffer, buffer_l, buffer_r, phase_table); +} #else // Plain ol' C99 static void process_sinc(rarch_resampler_t *resamp, float *out_buffer) { diff --git a/audio/sinc_neon.S b/audio/sinc_neon.S new file mode 100644 index 0000000000..f6c78ce122 --- /dev/null +++ b/audio/sinc_neon.S @@ -0,0 +1,52 @@ +/* RetroArch - A frontend for libretro. + * Copyright (C) 2010-2012 - Hans-Kristian Arntzen + * + * RetroArch is free software: you can redistribute it and/or modify it under the terms + * of the GNU General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with RetroArch. + * If not, see . + */ + +.arm +.align 4 +.global process_sinc_neon_asm +# void process_sinc_neon(float *out, const float *left, const float *right, const float *coeff) +# Hardcoded to 16 taps. +process_sinc_neon_asm: + # Left + vld1.f32 {q0-q1}, [r1]! + vld1.f32 {q2-q3}, [r1]! + # Right + vld1.f32 {q4-q5}, [r2]! + vld1.f32 {q6-q7}, [r2]! + # Coeff + vld1.f32 {q8-q9}, [r3, :1024]! + vld1.f32 {q10-q11}, [r3, :1024]! + + # Left + vmul.f32 q0, q0, q8 + vmul.f32 q1, q1, q9 + vmla.f32 q0, q2, q10 + vmla.f32 q1, q3, q11 + + # Right + vmul.f32 q4, q4, q8 + vmul.f32 q5, q5, q9 + vmla.f32 q4, q6, q10 + vmla.f32 q5, q7, q11 + + # Add everything together + vadd.f32 q0, q0, q1 + vadd.f32 q4, q4, q5 + vadd.f32 d0, d0, d1 + vadd.f32 d8, d8, d9 + vpadd.f32 d0, d0, d8 + vst1.f32 d0, [r0] + + bx lr From 1c9dec56a8b8a5b3c6ec173a33fd6e9daea56b11 Mon Sep 17 00:00:00 2001 From: twinaphex Date: Sun, 2 Dec 2012 23:50:32 +0100 Subject: [PATCH 2/7] (NEON/Android) Some fixes --- android/native/jni/Android.mk | 9 +++++---- audio/sinc_neon.S | 4 ++-- console/griffin/griffin.c | 4 ++++ gfx/scaler/filter.c | 4 ++-- 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/android/native/jni/Android.mk b/android/native/jni/Android.mk index ca27487b94..a84b349509 100644 --- a/android/native/jni/Android.mk +++ b/android/native/jni/Android.mk @@ -1,6 +1,6 @@ RARCH_VERSION = "0.9.8-beta3" LOCAL_PATH := $(call my-dir) -PERF_TEST := 0 +PERF_TEST := 1 HAVE_OPENSL := 1 include $(CLEAR_VARS) @@ -15,8 +15,9 @@ LOCAL_CFLAGS += -DANDROID_X86 -DHAVE_SSSE3 endif ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) -#LOCAL_CFLAGS += -mfpu=neon -LOCAL_CFLAGS += -DANDROID_ARM_V7 +LOCAL_CFLAGS += -mfpu=neon +LOCAL_CFLAGS += -DANDROID_ARM_V7 -DHAVE_NEON +LOCAL_SRC_FILES += ../../../audio/sinc_neon.S.neon endif ifeq ($(TARGET_ARCH),mips) @@ -27,7 +28,7 @@ LOCAL_MODULE := retroarch-activity RARCH_PATH := ../../.. LIBXML_PATH := ../../../deps/libxml2 -LOCAL_SRC_FILES = $(RARCH_PATH)/console/griffin/griffin.c +LOCAL_SRC_FILES += $(RARCH_PATH)/console/griffin/griffin.c ifeq ($(PERF_TEST), 1) diff --git a/audio/sinc_neon.S b/audio/sinc_neon.S index f6c78ce122..2b67215214 100644 --- a/audio/sinc_neon.S +++ b/audio/sinc_neon.S @@ -26,8 +26,8 @@ process_sinc_neon_asm: vld1.f32 {q4-q5}, [r2]! vld1.f32 {q6-q7}, [r2]! # Coeff - vld1.f32 {q8-q9}, [r3, :1024]! - vld1.f32 {q10-q11}, [r3, :1024]! + vld1.f32 {q8-q9}, [r3, :128]! + vld1.f32 {q10-q11}, [r3, :128]! # Left vmul.f32 q0, q0, q8 diff --git a/console/griffin/griffin.c b/console/griffin/griffin.c index cf397a897c..e5e7cdd580 100644 --- a/console/griffin/griffin.c +++ b/console/griffin/griffin.c @@ -247,7 +247,11 @@ FIFO BUFFER /*============================================================ AUDIO HERMITE ============================================================ */ +#ifdef HAVE_NEON +#include "../../audio/sinc.c" +#else #include "../../audio/hermite.c" +#endif /*============================================================ RSOUND diff --git a/gfx/scaler/filter.c b/gfx/scaler/filter.c index c4bb12f606..df469eca98 100644 --- a/gfx/scaler/filter.c +++ b/gfx/scaler/filter.c @@ -95,7 +95,7 @@ static bool gen_filter_bilinear(struct scaler_ctx *ctx) return true; } -static inline double sinc(double phase) +static inline double filter_sinc(double phase) { if (fabs(phase) < 0.0001) return 1.0; @@ -116,7 +116,7 @@ static void gen_filter_sinc_sub(struct scaler_filter *filter, int len, int pos, { double sinc_phase = M_PI * ((double)((sinc_size << 15) + (pos & 0xffff)) / 0x10000 - j); double lanczos_phase = sinc_phase / ((sinc_size >> 1)); - int16_t sinc_val = FILTER_UNITY * sinc(sinc_phase * phase_mul) * sinc(lanczos_phase) * phase_mul; + int16_t sinc_val = FILTER_UNITY * filter_sinc(sinc_phase * phase_mul) * filter_sinc(lanczos_phase) * phase_mul; //sinc_sum += sinc_val; filter->filter[i * sinc_size + j] = sinc_val; From 63cf9247f1f2c8481e4a0c2ad72c18df0608d0b4 Mon Sep 17 00:00:00 2001 From: twinaphex Date: Sun, 2 Dec 2012 23:53:08 +0100 Subject: [PATCH 3/7] (Hermite) Add log --- audio/hermite.c | 1 + 1 file changed, 1 insertion(+) diff --git a/audio/hermite.c b/audio/hermite.c index c96008b8a3..97fd75b125 100644 --- a/audio/hermite.c +++ b/audio/hermite.c @@ -52,6 +52,7 @@ static inline float hermite_kernel(float mu1, float a, float b, float c, float d rarch_resampler_t *resampler_new(void) { + RARCH_LOG("Hermite resampler [C]\n"); return (rarch_resampler_t*)calloc(1, sizeof(rarch_resampler_t)); } From 7403faa6083325d1c31debb2f031754346f8c576 Mon Sep 17 00:00:00 2001 From: twinaphex Date: Mon, 3 Dec 2012 00:20:21 +0100 Subject: [PATCH 4/7] HAVE_NEON define seems to give problems - WANT_NEON seems to work - so quick-fix hack for now --- android/native/jni/Android.mk | 4 ++-- audio/sinc.c | 4 ++-- console/griffin/griffin.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/android/native/jni/Android.mk b/android/native/jni/Android.mk index a84b349509..b32b8a6fbe 100644 --- a/android/native/jni/Android.mk +++ b/android/native/jni/Android.mk @@ -15,8 +15,8 @@ LOCAL_CFLAGS += -DANDROID_X86 -DHAVE_SSSE3 endif ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) -LOCAL_CFLAGS += -mfpu=neon -LOCAL_CFLAGS += -DANDROID_ARM_V7 -DHAVE_NEON +LOCAL_CFLAGS += -DANDROID_ARM_V7 +LOCAL_CFLAGS += -DWANT_NEON LOCAL_SRC_FILES += ../../../audio/sinc_neon.S.neon endif diff --git a/audio/sinc.c b/audio/sinc.c index e4ab461755..519254d116 100644 --- a/audio/sinc.c +++ b/audio/sinc.c @@ -123,7 +123,7 @@ rarch_resampler_t *resampler_new(void) RARCH_LOG("Sinc resampler [AVX]\n"); #elif defined(__SSE__) RARCH_LOG("Sinc resampler [SSE]\n"); -#elif defined(HAVE_NEON) +#elif defined(WANT_NEON) RARCH_LOG("Sinc resampler [NEON]\n"); #else RARCH_LOG("Sinc resampler [C]\n"); @@ -210,7 +210,7 @@ static void process_sinc(rarch_resampler_t *resamp, float *out_buffer) // movehl { X, R, X, L } == { X, R, X, R } _mm_store_ss(out_buffer + 1, _mm_movehl_ps(sum, sum)); } -#elif defined(HAVE_NEON) +#elif defined(WANT_NEON) void process_sinc_neon_asm(float *out, const float *left, const float *right, const float *coeff); static void process_sinc(rarch_resampler_t *resamp, float *out_buffer) { diff --git a/console/griffin/griffin.c b/console/griffin/griffin.c index e5e7cdd580..c355d5fa64 100644 --- a/console/griffin/griffin.c +++ b/console/griffin/griffin.c @@ -247,7 +247,7 @@ FIFO BUFFER /*============================================================ AUDIO HERMITE ============================================================ */ -#ifdef HAVE_NEON +#ifdef WANT_NEON #include "../../audio/sinc.c" #else #include "../../audio/hermite.c" From 5eaaaacaf6cd60e1ca0599e35d218650a2e54198 Mon Sep 17 00:00:00 2001 From: twinaphex Date: Mon, 3 Dec 2012 01:29:04 +0100 Subject: [PATCH 5/7] (Android) Make Neon compilation optional with a switch --- android/native/jni/Android.mk | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/android/native/jni/Android.mk b/android/native/jni/Android.mk index b32b8a6fbe..ac8f268e00 100644 --- a/android/native/jni/Android.mk +++ b/android/native/jni/Android.mk @@ -2,6 +2,7 @@ RARCH_VERSION = "0.9.8-beta3" LOCAL_PATH := $(call my-dir) PERF_TEST := 1 HAVE_OPENSL := 1 +WANT_NEON := 0 include $(CLEAR_VARS) @@ -15,10 +16,13 @@ LOCAL_CFLAGS += -DANDROID_X86 -DHAVE_SSSE3 endif ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) -LOCAL_CFLAGS += -DANDROID_ARM_V7 +ifeq ($(WANT_NEON),1) LOCAL_CFLAGS += -DWANT_NEON LOCAL_SRC_FILES += ../../../audio/sinc_neon.S.neon endif +LOCAL_CFLAGS += -DANDROID_ARM_V7 +endif + ifeq ($(TARGET_ARCH),mips) LOCAL_CFLAGS += -DANDROID_MIPS -D__mips__ -D__MIPSEL__ From 6a7e03aadb54bb5598664d205059e8fa850179f1 Mon Sep 17 00:00:00 2001 From: twinaphex Date: Mon, 3 Dec 2012 03:21:10 +0100 Subject: [PATCH 6/7] (Android) Fix sound - don't fix samplerate at 44Khz --- audio/opensl.c | 2 -- gfx/gl.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/audio/opensl.c b/audio/opensl.c index 8483e815d6..f672b99e89 100644 --- a/audio/opensl.c +++ b/audio/opensl.c @@ -109,8 +109,6 @@ static void *sl_init(const char *device, unsigned rate, unsigned latency) { (void)device; - g_settings.audio.out_rate = 44100; - SLDataFormat_PCM fmt_pcm = {0}; SLDataSource audio_src = {0}; SLDataSink audio_sink = {0}; diff --git a/gfx/gl.c b/gfx/gl.c index 2606410ea3..bccfd634fe 100644 --- a/gfx/gl.c +++ b/gfx/gl.c @@ -1175,7 +1175,7 @@ static bool gl_frame(void *data, const void *frame, unsigned width, unsigned hei if (msg) gl_render_msg(gl, msg); -#if !defined(RARCH_CONSOLE) && !defined(ANDROID) +#if !defined(RARCH_CONSOLE) context_update_window_title_func(false); #endif From ddc9c94884f061b6e92782921483cea0a0b1ca39 Mon Sep 17 00:00:00 2001 From: twinaphex Date: Mon, 3 Dec 2012 04:58:44 +0100 Subject: [PATCH 7/7] (Android) Refactor Android.mk - better defines --- android/native/jni/Android.mk | 11 ++++++++++- console/griffin/griffin.c | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/android/native/jni/Android.mk b/android/native/jni/Android.mk index ac8f268e00..d318c9e179 100644 --- a/android/native/jni/Android.mk +++ b/android/native/jni/Android.mk @@ -2,7 +2,8 @@ RARCH_VERSION = "0.9.8-beta3" LOCAL_PATH := $(call my-dir) PERF_TEST := 1 HAVE_OPENSL := 1 -WANT_NEON := 0 +WANT_NEON := 1 +HAVE_SINC := 1 include $(CLEAR_VARS) @@ -16,10 +17,18 @@ LOCAL_CFLAGS += -DANDROID_X86 -DHAVE_SSSE3 endif ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) + ifeq ($(WANT_NEON),1) LOCAL_CFLAGS += -DWANT_NEON +endif + +ifeq ($(HAVE_SINC),1) +ifeq ($(WANT_NEON),1) LOCAL_SRC_FILES += ../../../audio/sinc_neon.S.neon endif +LOCAL_CFLAGS += -DHAVE_SINC +endif + LOCAL_CFLAGS += -DANDROID_ARM_V7 endif diff --git a/console/griffin/griffin.c b/console/griffin/griffin.c index c355d5fa64..d0bd986416 100644 --- a/console/griffin/griffin.c +++ b/console/griffin/griffin.c @@ -247,7 +247,7 @@ FIFO BUFFER /*============================================================ AUDIO HERMITE ============================================================ */ -#ifdef WANT_NEON +#ifdef HAVE_SINC #include "../../audio/sinc.c" #else #include "../../audio/hermite.c"