(libretro-common) Create libretro-common/conversion/float_to_s16

2025-01-18 08:52:41 +00:00 · 2016-05-16 11:11:02 +02:00 · 2016-05-16 11:11:02 +02:00 · 2422dfbbbe
commit 2422dfbbbe
parent d7b28d3b24
13 changed files with 388 additions and 270 deletions
--- a/Makefile.common
+++ b/Makefile.common
@ -369,9 +369,11 @@ ifeq ($(HAVE_NEON),1)
   DEFINES += -DSINC_LOWER_QUALITY
 endif

-OBJ += audio/audio_utils.o
+OBJ += audio/audio_utils.o \
+		 libretro-common/conversion/float_to_s16.o
 ifeq ($(HAVE_NEON),1)
-   OBJ += audio/audio_utils_neon.o
+   OBJ += audio/audio_utils_neon.o \
+			 libretro-common/conversion/float_to_s16_neon.o
 endif

 HW_CONTEXT_MENU_DRIVERS=$(HAVE_RGUI)
--- a/Makefile.openpandora
+++ b/Makefile.openpandora
@ -10,7 +10,7 @@ TARGET := retroarch-pandora
 LDDIRS = -L. -L$(PNDSDK)/usr/lib
 INCDIRS = -I. -I$(PNDSDK)/usr/include

-OBJ = griffin/griffin.o audio/resamplers/sinc_resampler_neon.o audio/audio_utils_neon.o
+OBJ = griffin/griffin.o audio/resamplers/sinc_resampler_neon.o audio/audio_utils_neon.o libretro-common/conversion/float_to_s16_neon.o
 LDFLAGS = -L$(PNDSDK)/usr/lib -Wl,-rpath,$(PNDSDK)/usr/lib

 LIBS = -lGLESv2 -lEGL -ldl -lm -lpthread -lrt -lasound
--- a/audio/audio_driver.c
+++ b/audio/audio_driver.c
@ -19,6 +19,7 @@
 #include <retro_assert.h>

 #include <lists/string_list.h>
+#include <conversion/float_to_s16.h>

 #include "audio_driver.h"
 #include "audio_resampler_driver.h"
@ -581,7 +582,7 @@ static bool audio_driver_flush(const int16_t *data, size_t samples)
   {
      performance_counter_init(&audio_convert_float, "audio_convert_float");
      performance_counter_start(&audio_convert_float);
-      audio_convert_float_to_s16(audio_driver_data.output_samples.conv_buf,
+      convert_float_to_s16(audio_driver_data.output_samples.conv_buf,
            (const float*)output_data, output_frames * 2);
      performance_counter_stop(&audio_convert_float);

--- a/audio/audio_utils.c
+++ b/audio/audio_utils.c
@ -21,6 +21,7 @@

 #include <boolean.h>
 #include <features/features_cpu.h>
+#include <conversion/float_to_s16.h>

 #include "audio_utils.h"

@ -49,28 +50,6 @@ void audio_convert_s16_to_float_C(float *out,
      out[i] = (float)in[i] * gain; 
 }

-/**
- * audio_convert_float_to_s16_C:
- * @out               : output buffer
- * @in                : input buffer
- * @samples           : size of samples to be converted
- *
- * Converts audio samples from floating point 
- * to signed integer 16-bit.
- *
- * C implementation callback function.
- **/
-void audio_convert_float_to_s16_C(int16_t *out,
-      const float *in, size_t samples)
-{
-   size_t i;
-   for (i = 0; i < samples; i++)
-   {
-      int32_t val = (int32_t)(in[i] * 0x8000);
-      out[i]      = (val > 0x7FFF) ? 0x7FFF :
-         (val < -0x8000 ? -0x8000 : (int16_t)val);
-   }
-}

 #if defined(__SSE2__)
 /**
@ -107,38 +86,6 @@ void audio_convert_s16_to_float_SSE2(float *out,
   audio_convert_s16_to_float_C(out, in, samples - i, gain);
 }

-/**
- * audio_convert_float_to_s16_SSE2:
- * @out               : output buffer
- * @in                : input buffer
- * @samples           : size of samples to be converted
- *
- * Converts audio samples from floating point 
- * to signed integer 16-bit.
- *
- * SSE2 implementation callback function.
- **/
-void audio_convert_float_to_s16_SSE2(int16_t *out,
-      const float *in, size_t samples)
-{
-   size_t i;
-   __m128 factor = _mm_set1_ps((float)0x8000);
-
-   for (i = 0; i + 8 <= samples; i += 8, in += 8, out += 8)
-   {
-      __m128 input_l = _mm_loadu_ps(in + 0);
-      __m128 input_r = _mm_loadu_ps(in + 4);
-      __m128 res_l   = _mm_mul_ps(input_l, factor);
-      __m128 res_r   = _mm_mul_ps(input_r, factor);
-      __m128i ints_l = _mm_cvtps_epi32(res_l);
-      __m128i ints_r = _mm_cvtps_epi32(res_r);
-      __m128i packed = _mm_packs_epi32(ints_l, ints_r);
-
-      _mm_storeu_si128((__m128i *)out, packed);
-   }
-
-   audio_convert_float_to_s16_C(out, in, samples - i);
-}
 #elif defined(__ALTIVEC__)
 /**
 * audio_convert_s16_to_float_altivec:
@ -182,40 +129,6 @@ void audio_convert_s16_to_float_altivec(float *out,
   audio_convert_s16_to_float_C(out, in, samples_in, gain);
 }

-/**
- * audio_convert_float_to_s16_altivec:
- * @out               : output buffer
- * @in                : input buffer
- * @samples           : size of samples to be converted
- *
- * Converts audio samples from floating point 
- * to signed integer 16-bit.
- *
- * AltiVec implementation callback function.
- **/
-void audio_convert_float_to_s16_altivec(int16_t *out,
-      const float *in, size_t samples)
-{
-   int samples_in = samples;
-
-   /* Unaligned loads/store is a bit expensive, 
-    * so we optimize for the good path (very likely). */
-   if (((uintptr_t)out & 15) + ((uintptr_t)in & 15) == 0)
-   {
-      size_t i;
-      for (i = 0; i + 8 <= samples; i += 8, in += 8, out += 8)
-      {
-         vector float       input0 = vec_ld( 0, in);
-         vector float       input1 = vec_ld(16, in);
-         vector signed int result0 = vec_cts(input0, 15);
-         vector signed int result1 = vec_cts(input1, 15);
-         vec_st(vec_packs(result0, result1), 0, out);
-      }
-
-      samples_in -= i;
-   }
-   audio_convert_float_to_s16_C(out, in, samples_in);
-}
 #elif defined(__ARM_NEON__) && !defined(VITA)
 /* Avoid potential hard-float/soft-float ABI issues. */
 void audio_convert_s16_float_asm(float *out, const int16_t *in,
@ -244,30 +157,6 @@ static void audio_convert_s16_to_float_neon(float *out,
   audio_convert_s16_to_float_C(out + aligned_samples, in + aligned_samples,
         samples - aligned_samples, gain);
 }
-
-void audio_convert_float_s16_asm(int16_t *out, const float *in, size_t samples);
-
-/**
- * audio_convert_float_to_s16_neon:
- * @out               : output buffer
- * @in                : input buffer
- * @samples           : size of samples to be converted
- *
- * Converts audio samples from floating point 
- * to signed integer 16-bit.
- *
- * ARM NEON implementation callback function.
- **/
-static void audio_convert_float_to_s16_neon(int16_t *out,
-      const float *in, size_t samples)
-{
-   size_t aligned_samples = samples & ~7;
-   if (aligned_samples)
-      audio_convert_float_s16_asm(out, in, aligned_samples);
-
-   audio_convert_float_to_s16_C(out + aligned_samples, in + aligned_samples,
-         samples - aligned_samples);
-}
 #elif defined(_MIPS_ARCH_ALLEGREX)

 /**
@ -340,58 +229,6 @@ void audio_convert_s16_to_float_ALLEGREX(float *out,
   for (; i < samples; i++)
      out[i] = (float)in[i] * gain;
 }
-
-/**
- * audio_convert_float_to_s16_ALLEGREX:
- * @out               : output buffer
- * @in                : input buffer
- * @samples           : size of samples to be converted
- *
- * Converts audio samples from floating point 
- * to signed integer 16-bit.
- *
- * MIPS ALLEGREX implementation callback function.
- **/
-void audio_convert_float_to_s16_ALLEGREX(int16_t *out,
-      const float *in, size_t samples)
-{
-   size_t i;
-
-#ifdef DEBUG
-   /* Make sure the buffers are 16 byte aligned, this should be 
-    * the default behaviour of malloc in the PSPSDK.
-    * Both buffers are allocated by RetroArch, so can assume alignment. */
-   retro_assert(((uintptr_t)in  & 0xf) == 0);
-   retro_assert(((uintptr_t)out & 0xf) == 0);
-#endif
-
-   for (i = 0; i + 8 <= samples; i += 8)
-   {
-      __asm__ (
-            ".set    push                 \n"
-            ".set    noreorder            \n"
-
-            "lv.q    c100,  0(%0)         \n"
-            "lv.q    c110,  16(%0)        \n"
-
-            "vf2in.q c100, c100, 31       \n"
-            "vf2in.q c110, c110, 31       \n"
-            "vi2s.q  c100, c100           \n"
-            "vi2s.q  c102, c110           \n"
-
-            "sv.q    c100,  0(%1)         \n"
-
-            ".set    pop                  \n"
-            :: "r"(in + i), "r"(out + i));
-   }
-
-   for (; i < samples; i++)
-   {
-      int32_t val = (int32_t)(in[i] * 0x8000);
-      out[i]      = (val > 0x7FFF) ? 0x7FFF :
-         (val < -0x8000 ? -0x8000 : (int16_t)val);
-   }
-}
 #endif

 #ifndef RARCH_INTERNAL
@ -431,6 +268,6 @@ void audio_convert_init_simd(void)
   audio_convert_s16_to_float_arm = (cpu & RETRO_SIMD_NEON) ?
      audio_convert_s16_to_float_neon : audio_convert_s16_to_float_C;
   audio_convert_float_to_s16_arm = (cpu & RETRO_SIMD_NEON) ?
-      audio_convert_float_to_s16_neon : audio_convert_float_to_s16_C;
+      convert_float_to_s16_neon : convert_float_to_s16_C;
 #endif
 }
--- a/audio/audio_utils.h
+++ b/audio/audio_utils.h
@ -29,7 +29,6 @@ extern "C" {

 #if defined(__SSE2__)
 #define audio_convert_s16_to_float audio_convert_s16_to_float_SSE2
-#define audio_convert_float_to_s16 audio_convert_float_to_s16_SSE2

 /**
 * audio_convert_s16_to_float_SSE2:
@ -46,23 +45,8 @@ extern "C" {
 void audio_convert_s16_to_float_SSE2(float *out,
      const int16_t *in, size_t samples, float gain);

-/**
- * audio_convert_float_to_s16_SSE2:
- * @out               : output buffer
- * @in                : input buffer
- * @samples           : size of samples to be converted
- *
- * Converts audio samples from floating point 
- * to signed integer 16-bit.
- *
- * SSE2 implementation callback function.
- **/
-void audio_convert_float_to_s16_SSE2(int16_t *out,
-      const float *in, size_t samples);
-
 #elif defined(__ALTIVEC__)
 #define audio_convert_s16_to_float audio_convert_s16_to_float_altivec
-#define audio_convert_float_to_s16 audio_convert_float_to_s16_altivec

 /**
 * audio_convert_s16_to_float_altivec:
@ -79,33 +63,14 @@ void audio_convert_float_to_s16_SSE2(int16_t *out,
 void audio_convert_s16_to_float_altivec(float *out,
      const int16_t *in, size_t samples, float gain);

-/**
- * audio_convert_float_to_s16_altivec:
- * @out               : output buffer
- * @in                : input buffer
- * @samples           : size of samples to be converted
- *
- * Converts audio samples from floating point 
- * to signed integer 16-bit.
- *
- * AltiVec implementation callback function.
- **/
-void audio_convert_float_to_s16_altivec(int16_t *out,
-      const float *in, size_t samples);
-
 #elif defined(__ARM_NEON__) && !defined(VITA)
 #define audio_convert_s16_to_float audio_convert_s16_to_float_arm
-#define audio_convert_float_to_s16 audio_convert_float_to_s16_arm

 void (*audio_convert_s16_to_float_arm)(float *out,
      const int16_t *in, size_t samples, float gain);

-void (*audio_convert_float_to_s16_arm)(int16_t *out,
-      const float *in, size_t samples);
-
 #elif defined(_MIPS_ARCH_ALLEGREX)
 #define audio_convert_s16_to_float audio_convert_s16_to_float_ALLEGREX
-#define audio_convert_float_to_s16 audio_convert_float_to_s16_ALLEGREX

 /**
 * audio_convert_s16_to_float_ALLEGREX:
@ -121,23 +86,8 @@ void (*audio_convert_float_to_s16_arm)(int16_t *out,
 **/
 void audio_convert_s16_to_float_ALLEGREX(float *out,
      const int16_t *in, size_t samples, float gain);
-
-/**
- * audio_convert_float_to_s16_ALLEGREX:
- * @out               : output buffer
- * @in                : input buffer
- * @samples           : size of samples to be converted
- *
- * Converts audio samples from floating point 
- * to signed integer 16-bit.
- *
- * MIPS ALLEGREX implementation callback function.
- **/
-void audio_convert_float_to_s16_ALLEGREX(int16_t *out,
-      const float *in, size_t samples);
 #else
 #define audio_convert_s16_to_float audio_convert_s16_to_float_C
-#define audio_convert_float_to_s16 audio_convert_float_to_s16_C
 #endif

 /**
@ -155,20 +105,6 @@ void audio_convert_float_to_s16_ALLEGREX(int16_t *out,
 void audio_convert_s16_to_float_C(float *out,
      const int16_t *in, size_t samples, float gain);

-/**
- * audio_convert_float_to_s16_C:
- * @out               : output buffer
- * @in                : input buffer
- * @samples           : size of samples to be converted
- *
- * Converts audio samples from floating point 
- * to signed integer 16-bit.
- *
- * C implementation callback function.
- **/
-void audio_convert_float_to_s16_C(int16_t *out,
-      const float *in, size_t samples);
-
 /**
 * audio_convert_init_simd:
 *
--- a/audio/audio_utils_neon.S
+++ b/audio/audio_utils_neon.S
@ -60,39 +60,4 @@ _audio_convert_s16_float_asm:

   bx lr

-.align 4
-.globl audio_convert_float_s16_asm
-.globl _audio_convert_float_s16_asm
-# audio_convert_float_s16_asm(int16_t *out, const float *in, size_t samples)
-audio_convert_float_s16_asm:
-_audio_convert_float_s16_asm:
-   # Hacky way to get a constant of 2^15.
-   # ((2^4)^2)^2 * 0.5 = 2^15
-   vmov.f32 q8, #16.0
-   vmov.f32 q9, #0.5
-   vmul.f32 q8, q8, q8
-   vmul.f32 q8, q8, q8
-   vmul.f32 q8, q8, q9
-
-1:
-   # Preload here?
-   vld1.f32 {q0-q1}, [r1]!
-
-   vmul.f32 q0, q0, q8
-   vmul.f32 q1, q1, q8
-
-   vcvt.s32.f32 q0, q0
-   vcvt.s32.f32 q1, q1
-
-   vqmovn.s32 d4, q0
-   vqmovn.s32 d5, q1
-
-   vst1.f32 {d4-d5}, [r0]!
-
-   # Guaranteed to get samples in multiples of 8.
-   subs r2, r2, #8
-   bne 1b
-
-   bx lr
-
 #endif
--- a/griffin/griffin.c
+++ b/griffin/griffin.c
@ -983,6 +983,7 @@ XML
 AUDIO UTILS
 ============================================================ */
 #include "../audio/audio_utils.c"
+#include "../libretro-common/conversion/float_to_s16.c"

 /*============================================================
 LIBRETRODB
--- a/libretro-common/conversion/float_to_s16.c
+++ b/libretro-common/conversion/float_to_s16.c
@ -0,0 +1,196 @@
+/* Copyright  (C) 2010-2016 The RetroArch team
+ *
+ * ---------------------------------------------------------------------------------------
+ * The following license statement only applies to this file (float_to_s16.c).
+ * ---------------------------------------------------------------------------------------
+ *
+ * Permission is hereby granted, free of charge,
+ * to any person obtaining a copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+ * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <stdint.h>
+
+#if defined(__SSE2__)
+#include <emmintrin.h>
+#elif defined(__ALTIVEC__)
+#include <altivec.h>
+#endif
+
+/**
+ * convert_float_to_s16_C:
+ * @out               : output buffer
+ * @in                : input buffer
+ * @samples           : size of samples to be converted
+ *
+ * Converts floating point 
+ * to signed integer 16-bit.
+ *
+ * C implementation callback function.
+ **/
+void convert_float_to_s16_C(int16_t *out,
+      const float *in, size_t samples)
+{
+   size_t i;
+   for (i = 0; i < samples; i++)
+   {
+      int32_t val = (int32_t)(in[i] * 0x8000);
+      out[i]      = (val > 0x7FFF) ? 0x7FFF :
+         (val < -0x8000 ? -0x8000 : (int16_t)val);
+   }
+}
+
+#if defined(__SSE2__)
+/**
+ * convert_float_to_s16_SSE2:
+ * @out               : output buffer
+ * @in                : input buffer
+ * @samples           : size of samples to be converted
+ *
+ * Converts floating point 
+ * to signed integer 16-bit.
+ *
+ * SSE2 implementation callback function.
+ **/
+void convert_float_to_s16_SSE2(int16_t *out,
+      const float *in, size_t samples)
+{
+   size_t i;
+   __m128 factor = _mm_set1_ps((float)0x8000);
+
+   for (i = 0; i + 8 <= samples; i += 8, in += 8, out += 8)
+   {
+      __m128 input_l = _mm_loadu_ps(in + 0);
+      __m128 input_r = _mm_loadu_ps(in + 4);
+      __m128 res_l   = _mm_mul_ps(input_l, factor);
+      __m128 res_r   = _mm_mul_ps(input_r, factor);
+      __m128i ints_l = _mm_cvtps_epi32(res_l);
+      __m128i ints_r = _mm_cvtps_epi32(res_r);
+      __m128i packed = _mm_packs_epi32(ints_l, ints_r);
+
+      _mm_storeu_si128((__m128i *)out, packed);
+   }
+
+   convert_float_to_s16_C(out, in, samples - i);
+}
+#elif defined(__ALTIVEC__)
+/**
+ * convert_float_to_s16_altivec:
+ * @out               : output buffer
+ * @in                : input buffer
+ * @samples           : size of samples to be converted
+ *
+ * Converts floating point 
+ * to signed integer 16-bit.
+ *
+ * AltiVec implementation callback function.
+ **/
+void convert_float_to_s16_altivec(int16_t *out,
+      const float *in, size_t samples)
+{
+   int samples_in = samples;
+
+   /* Unaligned loads/store is a bit expensive, 
+    * so we optimize for the good path (very likely). */
+   if (((uintptr_t)out & 15) + ((uintptr_t)in & 15) == 0)
+   {
+      size_t i;
+      for (i = 0; i + 8 <= samples; i += 8, in += 8, out += 8)
+      {
+         vector float       input0 = vec_ld( 0, in);
+         vector float       input1 = vec_ld(16, in);
+         vector signed int result0 = vec_cts(input0, 15);
+         vector signed int result1 = vec_cts(input1, 15);
+         vec_st(vec_packs(result0, result1), 0, out);
+      }
+
+      samples_in -= i;
+   }
+   convert_float_to_s16_C(out, in, samples_in);
+}
+#elif defined(__ARM_NEON__) && !defined(VITA)
+void convert_float_s16_asm(int16_t *out, const float *in, size_t samples);
+/**
+ * convert_float_to_s16_neon:
+ * @out               : output buffer
+ * @in                : input buffer
+ * @samples           : size of samples to be converted
+ *
+ * Converts floating point 
+ * to signed integer 16-bit.
+ *
+ * ARM NEON implementation callback function.
+ **/
+static void convert_float_to_s16_neon(int16_t *out,
+      const float *in, size_t samples)
+{
+   size_t aligned_samples = samples & ~7;
+   if (aligned_samples)
+      convert_float_s16_asm(out, in, aligned_samples);
+
+   convert_float_to_s16_C(out + aligned_samples, in + aligned_samples,
+         samples - aligned_samples);
+}
+#elif defined(_MIPS_ARCH_ALLEGREX)
+/**
+ * convert_float_to_s16_ALLEGREX:
+ * @out               : output buffer
+ * @in                : input buffer
+ * @samples           : size of samples to be converted
+ *
+ * Converts floating point 
+ * to signed integer 16-bit.
+ *
+ * MIPS ALLEGREX implementation callback function.
+ **/
+void convert_float_to_s16_ALLEGREX(int16_t *out,
+      const float *in, size_t samples)
+{
+   size_t i;
+
+#ifdef DEBUG
+   /* Make sure the buffers are 16 byte aligned, this should be 
+    * the default behaviour of malloc in the PSPSDK.
+    * Assume alignment. */
+   retro_assert(((uintptr_t)in  & 0xf) == 0);
+   retro_assert(((uintptr_t)out & 0xf) == 0);
+#endif
+
+   for (i = 0; i + 8 <= samples; i += 8)
+   {
+      __asm__ (
+            ".set    push                 \n"
+            ".set    noreorder            \n"
+
+            "lv.q    c100,  0(%0)         \n"
+            "lv.q    c110,  16(%0)        \n"
+
+            "vf2in.q c100, c100, 31       \n"
+            "vf2in.q c110, c110, 31       \n"
+            "vi2s.q  c100, c100           \n"
+            "vi2s.q  c102, c110           \n"
+
+            "sv.q    c100,  0(%1)         \n"
+
+            ".set    pop                  \n"
+            :: "r"(in + i), "r"(out + i));
+   }
+
+   for (; i < samples; i++)
+   {
+      int32_t val = (int32_t)(in[i] * 0x8000);
+      out[i]      = (val > 0x7FFF) ? 0x7FFF :
+         (val < -0x8000 ? -0x8000 : (int16_t)val);
+   }
+}
+#endif
--- a/libretro-common/conversion/float_to_s16_neon.S
+++ b/libretro-common/conversion/float_to_s16_neon.S
@ -0,0 +1,56 @@
+/*  RetroArch - A frontend for libretro.
+ *  Copyright (C) 2010-2014 - Hans-Kristian Arntzen
+ * 
+ *  RetroArch is free software: you can redistribute it and/or modify it under the terms
+ *  of the GNU General Public License as published by the Free Software Found-
+ *  ation, either version 3 of the License, or (at your option) any later version.
+ *
+ *  RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+ *  without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ *  PURPOSE.  See the GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along with RetroArch.
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+#if defined(__ARM_NEON__)
+
+#ifndef __MACH__
+.arm
+#endif
+
+.align 4
+.globl audio_convert_float_s16_asm
+.globl _audio_convert_float_s16_asm
+# audio_convert_float_s16_asm(int16_t *out, const float *in, size_t samples)
+audio_convert_float_s16_asm:
+_audio_convert_float_s16_asm:
+   # Hacky way to get a constant of 2^15.
+   # ((2^4)^2)^2 * 0.5 = 2^15
+   vmov.f32 q8, #16.0
+   vmov.f32 q9, #0.5
+   vmul.f32 q8, q8, q8
+   vmul.f32 q8, q8, q8
+   vmul.f32 q8, q8, q9
+
+1:
+   # Preload here?
+   vld1.f32 {q0-q1}, [r1]!
+
+   vmul.f32 q0, q0, q8
+   vmul.f32 q1, q1, q8
+
+   vcvt.s32.f32 q0, q0
+   vcvt.s32.f32 q1, q1
+
+   vqmovn.s32 d4, q0
+   vqmovn.s32 d5, q1
+
+   vst1.f32 {d4-d5}, [r0]!
+
+   # Guaranteed to get samples in multiples of 8.
+   subs r2, r2, #8
+   bne 1b
+
+   bx lr
+
+#endif
--- a/libretro-common/include/conversion/float_to_s16.h
+++ b/libretro-common/include/conversion/float_to_s16.h
@ -0,0 +1,120 @@
+/* Copyright  (C) 2010-2016 The RetroArch team
+ *
+ * ---------------------------------------------------------------------------------------
+ * The following license statement only applies to this file (float_to_s16.h).
+ * ---------------------------------------------------------------------------------------
+ *
+ * Permission is hereby granted, free of charge,
+ * to any person obtaining a copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+ * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __LIBRETRO_SDK_CONVERSION_FLOAT_TO_S16_H__
+#define __LIBRETRO_SDK_CONVERSION_FLOAT_TO_S16_H__
+
+#include <stdint.h>
+#include <stddef.h>
+
+#include <retro_common_api.h>
+
+RETRO_BEGIN_DECLS
+
+/**
+ * convert_float_to_s16_C:
+ * @out               : output buffer
+ * @in                : input buffer
+ * @samples           : size of samples to be converted
+ *
+ * Converts floating point 
+ * to signed integer 16-bit.
+ *
+ * C implementation callback function.
+ **/
+void convert_float_to_s16_C(int16_t *out,
+      const float *in, size_t samples);
+
+#if defined(__SSE2__)
+#define convert_float_to_s16 convert_float_to_s16_SSE2
+/**
+ * convert_float_to_s16_SSE2:
+ * @out               : output buffer
+ * @in                : input buffer
+ * @samples           : size of samples to be converted
+ *
+ * Converts floating point 
+ * to signed integer 16-bit.
+ *
+ * SSE2 implementation callback function.
+ **/
+void convert_float_to_s16_SSE2(int16_t *out,
+      const float *in, size_t samples);
+#elif defined(__ALTIVEC__)
+#define convert_float_to_s16 convert_float_to_s16_altivec
+/**
+ * convert_float_to_s16_altivec:
+ * @out               : output buffer
+ * @in                : input buffer
+ * @samples           : size of samples to be converted
+ *
+ * Converts floating point 
+ * to signed integer 16-bit.
+ *
+ * AltiVec implementation callback function.
+ **/
+void convert_float_to_s16_altivec(int16_t *out,
+      const float *in, size_t samples);
+#elif defined(__ARM_NEON__) && !defined(VITA)
+#define convert_float_to_s16 convert_float_to_s16_arm
+
+void (*convert_float_to_s16_arm)(int16_t *out,
+      const float *in, size_t samples);
+
+void convert_float_s16_asm(int16_t *out, const float *in, size_t samples);
+
+/**
+ * convert_float_to_s16_neon:
+ * @out               : output buffer
+ * @in                : input buffer
+ * @samples           : size of samples to be converted
+ *
+ * Converts floating point 
+ * to signed integer 16-bit.
+ *
+ * ARM NEON implementation callback function.
+ **/
+static void convert_float_to_s16_neon(int16_t *out,
+      const float *in, size_t samples);
+#elif defined(_MIPS_ARCH_ALLEGREX)
+#define convert_float_to_s16 convert_float_to_s16_ALLEGREX
+/**
+ * convert_float_to_s16_ALLEGREX:
+ * @out               : output buffer
+ * @in                : input buffer
+ * @samples           : size of samples to be converted
+ *
+ * Converts floating point 
+ * to signed integer 16-bit.
+ *
+ * MIPS ALLEGREX implementation callback function.
+ **/
+void convert_float_to_s16_ALLEGREX(int16_t *out,
+      const float *in, size_t samples)
+#else
+#define convert_float_to_s16 convert_float_to_s16_C
+#endif
+
+
+RETRO_END_DECLS
+
+#endif
--- a/pkg/android/phoenix/jni/Android.mk
+++ b/pkg/android/phoenix/jni/Android.mk
@ -25,6 +25,7 @@ ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
 ifeq ($(HAVE_NEON),1)
 	DEFINES += -D__ARM_NEON__
   LOCAL_SRC_FILES += $(RARCH_DIR)/audio/audio_utils_neon.S.neon
+	LOCAL_SRC_FILES += $(RARCH_DIR)/libretro-common/conversion/float_to_s16_neon.S.neon
   LOCAL_SRC_FILES += $(RARCH_DIR)/audio/drivers_resampler/sinc_resampler_neon.S.neon
   LOCAL_SRC_FILES += $(RARCH_DIR)/audio/drivers_resampler/cc_resampler_neon.S.neon
 endif
--- a/pkg/android/phoenix/jni/Android2.mk
+++ b/pkg/android/phoenix/jni/Android2.mk
@ -25,6 +25,7 @@ ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
 ifeq ($(HAVE_NEON),1)
 	DEFINES += -D__ARM_NEON__
   LOCAL_SRC_FILES += $(RARCH_DIR)/audio/audio_utils_neon.S.neon
+   LOCAL_SRC_FILES += $(RARCH_DIR)/libretro-common/conversion/float_to_s16_neon.S.neon
   LOCAL_SRC_FILES += $(RARCH_DIR)/audio/drivers_resampler/sinc_resampler_neon.S.neon
   LOCAL_SRC_FILES += $(RARCH_DIR)/audio/drivers_resampler/cc_resampler_neon.S.neon
 endif
--- a/record/drivers/record_ffmpeg.c
+++ b/record/drivers/record_ffmpeg.c
@ -59,11 +59,13 @@ extern "C" {
 #include <rthreads/rthreads.h>
 #include <gfx/scaler/scaler.h>
 #include <file/config_file.h>
+#include <conversion/float_to_s16.h>
+
+#include "../../audio/audio_utils.h"

 #include "../../general.h"
 #include "../../verbosity.h"
 #include "../../audio/audio_resampler_driver.h"
-#include "../../audio/audio_utils.h"
 #include "../record_driver.h"

 #include "../../gfx/video_frame.h"
@ -1189,7 +1191,7 @@ static void ffmpeg_audio_resample(ffmpeg_t *handle,

      if (!handle->audio.use_float)
      {
-         audio_convert_float_to_s16(handle->audio.fixed_conv,
+         convert_float_to_s16(handle->audio.fixed_conv,
               handle->audio.resample_out,
               aud->frames * handle->params.channels);
         aud->data = handle->audio.fixed_conv;