RetroArch/audio/utils.c

/*  RetroArch - A frontend for libretro.
 *  Copyright (C) 2010-2014 - Hans-Kristian Arntzen
 *  Copyright (C) 2014 - Ali Bouhlel ( aliaspider@gmail.com )
 *
 *  RetroArch is free software: you can redistribute it and/or modify it under the terms
 *  of the GNU General Public License as published by the Free Software Found-
 *  ation, either version 3 of the License, or (at your option) any later version.
 *
 *  RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 *  without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 *  PURPOSE.  See the GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along with RetroArch.
 *  If not, see <http://www.gnu.org/licenses/>.
 */

#include "../boolean.h"
#include "utils.h"

#include "../general.h"
#include "../performance.h"

#if defined(__SSE2__)
#include <emmintrin.h>
#elif defined(__ALTIVEC__)
#include <altivec.h>
#endif

void audio_convert_s16_to_float_C(float *out,
      const int16_t *in, size_t samples, float gain)
{
   size_t i;
   gain = gain / 0x8000;
   for (i = 0; i < samples; i++)
      out[i] = (float)in[i] * gain; 
}

void audio_convert_float_to_s16_C(int16_t *out,
      const float *in, size_t samples)
{
   size_t i;
   for (i = 0; i < samples; i++)
   {
      int32_t val = (int32_t)(in[i] * 0x8000);
      out[i] = (val > 0x7FFF) ? 0x7FFF : (val < -0x8000 ? -0x8000 : (int16_t)val);
   }
}

#if defined(__SSE2__)
void audio_convert_s16_to_float_SSE2(float *out,
      const int16_t *in, size_t samples, float gain)
{
   float fgain = gain / UINT32_C(0x80000000);
   __m128 factor = _mm_set1_ps(fgain);
   size_t i;
   for (i = 0; i + 8 <= samples; i += 8, in += 8, out += 8)
   {
      __m128i input = _mm_loadu_si128((const __m128i *)in);
      __m128i regs[2] = {
         _mm_unpacklo_epi16(_mm_setzero_si128(), input),
         _mm_unpackhi_epi16(_mm_setzero_si128(), input),
      };

      __m128 output[2] = {
         _mm_mul_ps(_mm_cvtepi32_ps(regs[0]), factor),
         _mm_mul_ps(_mm_cvtepi32_ps(regs[1]), factor),
      };

      _mm_storeu_ps(out + 0, output[0]);
      _mm_storeu_ps(out + 4, output[1]);
   }

   audio_convert_s16_to_float_C(out, in, samples - i, gain);
}

void audio_convert_float_to_s16_SSE2(int16_t *out,
      const float *in, size_t samples)
{
   __m128 factor = _mm_set1_ps((float)0x8000);
   size_t i;
   for (i = 0; i + 8 <= samples; i += 8, in += 8, out += 8)
   {
      __m128 input[2] = { _mm_loadu_ps(in + 0), _mm_loadu_ps(in + 4) };
      __m128 res[2] = { _mm_mul_ps(input[0], factor), _mm_mul_ps(input[1], factor) };

      __m128i ints[2] = { _mm_cvtps_epi32(res[0]), _mm_cvtps_epi32(res[1]) };
      __m128i packed = _mm_packs_epi32(ints[0], ints[1]);

      _mm_storeu_si128((__m128i *)out, packed);
   }

   audio_convert_float_to_s16_C(out, in, samples - i);
}
#elif defined(__ALTIVEC__)
void audio_convert_s16_to_float_altivec(float *out,
      const int16_t *in, size_t samples, float gain)
{
   const vector float gain_vec = vec_splat((vector float)gain, 0);
   const vector float zero_vec = vec_splat((vector float)0.0f, 0);
   // Unaligned loads/store is a bit expensive, so we optimize for the good path (very likely).
   if (((uintptr_t)out & 15) + ((uintptr_t)in & 15) == 0)
   {
      size_t i;
      for (i = 0; i + 8 <= samples; i += 8, in += 8, out += 8)
      {
         vector signed short input = vec_ld(0, in);
         vector signed int hi = vec_unpackh(input);
         vector signed int lo = vec_unpackl(input);
         vector float out_hi = vec_madd(vec_ctf(hi, 15), gain_vec, zero_vec);
         vector float out_lo = vec_madd(vec_ctf(lo, 15), gain_vec, zero_vec);

         vec_st(out_hi,  0, out);
         vec_st(out_lo, 16, out);
      }

      audio_convert_s16_to_float_C(out, in, samples - i, gain);
   }
   else
      audio_convert_s16_to_float_C(out, in, samples, gain);
}

void audio_convert_float_to_s16_altivec(int16_t *out,
      const float *in, size_t samples)
{
   // Unaligned loads/store is a bit expensive, so we optimize for the good path (very likely).
   if (((uintptr_t)out & 15) + ((uintptr_t)in & 15) == 0)
   {
      size_t i;
      for (i = 0; i + 8 <= samples; i += 8, in += 8, out += 8)
      {
         vector float input0 = vec_ld( 0, in);
         vector float input1 = vec_ld(16, in);
         vector signed int result0 = vec_cts(input0, 15);
         vector signed int result1 = vec_cts(input1, 15);
         vec_st(vec_packs(result0, result1), 0, out);
      }

      audio_convert_float_to_s16_C(out, in, samples - i);
   }
   else
      audio_convert_float_to_s16_C(out, in, samples);
}
#elif defined(HAVE_NEON)
void audio_convert_s16_float_asm(float *out, const int16_t *in, size_t samples, const float *gain); // Avoid potential hard-float/soft-float ABI issues.
static void audio_convert_s16_to_float_neon(float *out, const int16_t *in, size_t samples,
      float gain)
{
   size_t aligned_samples = samples & ~7;
   if (aligned_samples)
      audio_convert_s16_float_asm(out, in, aligned_samples, &gain);

   // Could do all conversion in ASM, but keep it simple for now.
   audio_convert_s16_to_float_C(out + aligned_samples, in + aligned_samples,
         samples - aligned_samples, gain);
}

void audio_convert_float_s16_asm(int16_t *out, const float *in, size_t samples);
static void audio_convert_float_to_s16_neon(int16_t *out, const float *in, size_t samples)
{
   size_t aligned_samples = samples & ~7;
   if (aligned_samples)
      audio_convert_float_s16_asm(out, in, aligned_samples);

   audio_convert_float_to_s16_C(out + aligned_samples, in + aligned_samples,
         samples - aligned_samples);
}
#elif defined(_MIPS_ARCH_ALLEGREX)
void audio_convert_s16_to_float_ALLEGREX(float *out,
      const int16_t *in, size_t samples, float gain)
{
#ifdef DEBUG
   // Make sure the buffer is 16 byte aligned, this should be the default behaviour of malloc in the PSPSDK.
   // Only the output buffer can be assumed to be 16-byte aligned.
   rarch_assert(((uintptr_t)out & 0xf) == 0);
#endif

   size_t i;
   gain = gain / 0x8000;
   __asm__ (
         ".set    push                    \n"
         ".set    noreorder               \n"
         "mtv     %0, s200                \n"
         ".set    pop                     \n"
         ::"r"(gain));

   for (i = 0; i + 16 <= samples; i += 16)
   {
      __asm__ (
            ".set    push                 \n"
            ".set    noreorder            \n"

            "lv.s    s100,  0(%0)         \n"
            "lv.s    s101,  4(%0)         \n"
            "lv.s    s110,  8(%0)         \n"
            "lv.s    s111, 12(%0)         \n"
            "lv.s    s120, 16(%0)         \n"
            "lv.s    s121, 20(%0)         \n"
            "lv.s    s130, 24(%0)         \n"
            "lv.s    s131, 28(%0)         \n"

            "vs2i.p  c100, c100           \n"
            "vs2i.p  c110, c110           \n"
            "vs2i.p  c120, c120           \n"
            "vs2i.p  c130, c130           \n"

            "vi2f.q  c100, c100, 16       \n"
            "vi2f.q  c110, c110, 16       \n"
            "vi2f.q  c120, c120, 16       \n"
            "vi2f.q  c130, c130, 16       \n"

            "vmscl.q e100, e100, s200     \n"

            "sv.q    c100,  0(%1)         \n"
            "sv.q    c110, 16(%1)         \n"
            "sv.q    c120, 32(%1)         \n"
            "sv.q    c130, 48(%1)         \n"

            ".set    pop                  \n"
            :: "r"(in + i), "r"(out + i));
   }

   for (; i < samples; i++)
      out[i] = (float)in[i] * gain;
}

void audio_convert_float_to_s16_ALLEGREX(int16_t *out,
      const float *in, size_t samples)
{
#ifdef DEBUG
   // Make sure the buffers are 16 byte aligned, this should be the default behaviour of malloc in the PSPSDK.
   // Both buffers are allocated by RetroArch, so can assume alignment.
   rarch_assert(((uintptr_t)in  & 0xf) == 0);
   rarch_assert(((uintptr_t)out & 0xf) == 0);
#endif

   size_t i;
   for (i = 0; i + 8 <= samples; i += 8)
   {
      __asm__ (
            ".set    push                 \n"
            ".set    noreorder            \n"

            "lv.q    c100,  0(%0)         \n"
            "lv.q    c110,  16(%0)        \n"

            "vf2in.q c100, c100, 31       \n"
            "vf2in.q c110, c110, 31       \n"
            "vi2s.q  c100, c100           \n"
            "vi2s.q  c102, c110           \n"

            "sv.q    c100,  0(%1)         \n"

            ".set    pop                  \n"
            :: "r"(in + i), "r"(out + i));
   }

   for (; i < samples; i++)
   {
      int32_t val = (int32_t)(in[i] * 0x8000);
      out[i] = (val > 0x7FFF) ? 0x7FFF : (val < -0x8000 ? -0x8000 : (int16_t)val);
   }
}
#endif

void audio_convert_init_simd(void)
{
#ifdef HAVE_NEON
   unsigned cpu = rarch_get_cpu_features();
   audio_convert_s16_to_float_arm = cpu & RETRO_SIMD_NEON ?
      audio_convert_s16_to_float_neon : audio_convert_s16_to_float_C;
   audio_convert_float_to_s16_arm = cpu & RETRO_SIMD_NEON ?
      audio_convert_float_to_s16_neon : audio_convert_float_to_s16_C;
#endif
}
SSNES => RetroArch. 2012-04-21 21:13:50 +00:00			`/* RetroArch - A frontend for libretro.`
Update years on all license headers 2014-01-01 00:50:59 +00:00			`* Copyright (C) 2010-2014 - Hans-Kristian Arntzen`
Stylistic cleanups in CC resampler. 2014-03-23 13:14:42 +00:00			`* Copyright (C) 2014 - Ali Bouhlel ( aliaspider@gmail.com )`
Fixup headers. 2012-04-07 11:26:27 +00:00			`*`
SSNES => RetroArch. 2012-04-21 21:13:50 +00:00			`* RetroArch is free software: you can redistribute it and/or modify it under the terms`
Fixup headers. 2012-04-07 11:26:27 +00:00			`* of the GNU General Public License as published by the Free Software Found-`
			`* ation, either version 3 of the License, or (at your option) any later version.`
			`*`
SSNES => RetroArch. 2012-04-21 21:13:50 +00:00			`* RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;`
Fixup headers. 2012-04-07 11:26:27 +00:00			`* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR`
			`* PURPOSE. See the GNU General Public License for more details.`
			`*`
More fixups. 2012-04-21 21:31:57 +00:00			`* You should have received a copy of the GNU General Public License along with RetroArch.`
Fixup headers. 2012-04-07 11:26:27 +00:00			`* If not, see <http://www.gnu.org/licenses/>.`
			`*/`

Fixes PC build 2013-01-17 13:24:26 +00:00			`#include "../boolean.h"`
Altivec sample conversion. 2011-12-02 00:34:02 +00:00			`#include "utils.h"`

Fixes PC build 2013-01-17 13:24:26 +00:00			`#include "../general.h"`
Use NEON conditionally for sample conversions. 2013-01-24 17:37:42 +00:00			`#include "../performance.h"`
Fixes PC build 2013-01-17 13:24:26 +00:00
Actually align samples to 8. 2012-12-05 22:17:07 +00:00			`#if defined(__SSE2__)`
Altivec sample conversion. 2011-12-02 00:34:02 +00:00			`#include <emmintrin.h>`
Actually align samples to 8. 2012-12-05 22:17:07 +00:00			`#elif defined(__ALTIVEC__)`
Altivec sample conversion. 2011-12-02 00:34:02 +00:00			`#include <altivec.h>`
			`#endif`

			`void audio_convert_s16_to_float_C(float *out,`
Add volume control. It imposes no performance loss as it is performed during s16->float conversion. It is however grouped together with check_mute. 2012-11-03 13:15:03 +00:00			`const int16_t *in, size_t samples, float gain)`
Altivec sample conversion. 2011-12-02 00:34:02 +00:00			`{`
Start using C89-style for loop initial declarations for reusable code module parts that we reuse in other projects. It's a huge maintenance burden having to change this stuff around everytime when compiling in non-C99 mode 2013-10-19 17:39:38 +00:00			`size_t i;`
Add volume control. It imposes no performance loss as it is performed during s16->float conversion. It is however grouped together with check_mute. 2012-11-03 13:15:03 +00:00			`gain = gain / 0x8000;`
Start using C89-style for loop initial declarations for reusable code module parts that we reuse in other projects. It's a huge maintenance burden having to change this stuff around everytime when compiling in non-C99 mode 2013-10-19 17:39:38 +00:00			`for (i = 0; i < samples; i++)`
Add volume control. It imposes no performance loss as it is performed during s16->float conversion. It is however grouped together with check_mute. 2012-11-03 13:15:03 +00:00			`out[i] = (float)in[i] * gain;`
Altivec sample conversion. 2011-12-02 00:34:02 +00:00			`}`

			`void audio_convert_float_to_s16_C(int16_t *out,`
			`const float *in, size_t samples)`
			`{`
Start using C89-style for loop initial declarations for reusable code module parts that we reuse in other projects. It's a huge maintenance burden having to change this stuff around everytime when compiling in non-C99 mode 2013-10-19 17:39:38 +00:00			`size_t i;`
			`for (i = 0; i < samples; i++)`
Altivec sample conversion. 2011-12-02 00:34:02 +00:00			`{`
MSVC compat. 2011-12-24 23:59:46 +00:00			`int32_t val = (int32_t)(in[i] * 0x8000);`
Altivec sample conversion. 2011-12-02 00:34:02 +00:00			`out[i] = (val > 0x7FFF) ? 0x7FFF : (val < -0x8000 ? -0x8000 : (int16_t)val);`
			`}`
			`}`

Actually align samples to 8. 2012-12-05 22:17:07 +00:00			`#if defined(__SSE2__)`
Altivec sample conversion. 2011-12-02 00:34:02 +00:00			`void audio_convert_s16_to_float_SSE2(float *out,`
Add volume control. It imposes no performance loss as it is performed during s16->float conversion. It is however grouped together with check_mute. 2012-11-03 13:15:03 +00:00			`const int16_t *in, size_t samples, float gain)`
Altivec sample conversion. 2011-12-02 00:34:02 +00:00			`{`
Use more correct modulation factor in SSE convert. Use same factor as C. 2013-02-27 19:44:54 +00:00			`float fgain = gain / UINT32_C(0x80000000);`
Add volume control. It imposes no performance loss as it is performed during s16->float conversion. It is however grouped together with check_mute. 2012-11-03 13:15:03 +00:00			`__m128 factor = _mm_set1_ps(fgain);`
Altivec sample conversion. 2011-12-02 00:34:02 +00:00			`size_t i;`
			`for (i = 0; i + 8 <= samples; i += 8, in += 8, out += 8)`
			`{`
			`__m128i input = _mm_loadu_si128((const __m128i *)in);`
			`__m128i regs[2] = {`
			`_mm_unpacklo_epi16(_mm_setzero_si128(), input),`
			`_mm_unpackhi_epi16(_mm_setzero_si128(), input),`
			`};`

			`__m128 output[2] = {`
			`_mm_mul_ps(_mm_cvtepi32_ps(regs[0]), factor),`
			`_mm_mul_ps(_mm_cvtepi32_ps(regs[1]), factor),`
			`};`

			`_mm_storeu_ps(out + 0, output[0]);`
			`_mm_storeu_ps(out + 4, output[1]);`
			`}`

Add volume control. It imposes no performance loss as it is performed during s16->float conversion. It is however grouped together with check_mute. 2012-11-03 13:15:03 +00:00			`audio_convert_s16_to_float_C(out, in, samples - i, gain);`
Altivec sample conversion. 2011-12-02 00:34:02 +00:00			`}`

			`void audio_convert_float_to_s16_SSE2(int16_t *out,`
			`const float *in, size_t samples)`
			`{`
Use more correct modulation factor in SSE convert. Use same factor as C. 2013-02-27 19:44:54 +00:00			`__m128 factor = _mm_set1_ps((float)0x8000);`
Altivec sample conversion. 2011-12-02 00:34:02 +00:00			`size_t i;`
			`for (i = 0; i + 8 <= samples; i += 8, in += 8, out += 8)`
			`{`
			`__m128 input[2] = { _mm_loadu_ps(in + 0), _mm_loadu_ps(in + 4) };`
			`__m128 res[2] = { _mm_mul_ps(input[0], factor), _mm_mul_ps(input[1], factor) };`

			`__m128i ints[2] = { _mm_cvtps_epi32(res[0]), _mm_cvtps_epi32(res[1]) };`
			`__m128i packed = _mm_packs_epi32(ints[0], ints[1]);`

			`_mm_storeu_si128((__m128i *)out, packed);`
			`}`

			`audio_convert_float_to_s16_C(out, in, samples - i);`
			`}`
Actually align samples to 8. 2012-12-05 22:17:07 +00:00			`#elif defined(__ALTIVEC__)`
Altivec sample conversion. 2011-12-02 00:34:02 +00:00			`void audio_convert_s16_to_float_altivec(float *out,`
Add volume control. It imposes no performance loss as it is performed during s16->float conversion. It is however grouped together with check_mute. 2012-11-03 13:15:03 +00:00			`const int16_t *in, size_t samples, float gain)`
Altivec sample conversion. 2011-12-02 00:34:02 +00:00			`{`
(PPC Altivec) turn vec_splats into vec_splat - PPC OSX (Leopard) doesn't support vec_splats intrinsic 2014-02-11 07:32:18 +00:00			`const vector float gain_vec = vec_splat((vector float)gain, 0);`
			`const vector float zero_vec = vec_splat((vector float)0.0f, 0);`
Altivec sample conversion. 2011-12-02 00:34:02 +00:00			`// Unaligned loads/store is a bit expensive, so we optimize for the good path (very likely).`
			`if (((uintptr_t)out & 15) + ((uintptr_t)in & 15) == 0)`
			`{`
			`size_t i;`
			`for (i = 0; i + 8 <= samples; i += 8, in += 8, out += 8)`
			`{`
			`vector signed short input = vec_ld(0, in);`
			`vector signed int hi = vec_unpackh(input);`
			`vector signed int lo = vec_unpackl(input);`
Use vec_madd instead of vec_mul. vec_mul was apparently a POWER7 instruction. 2012-11-06 08:26:28 +00:00			`vector float out_hi = vec_madd(vec_ctf(hi, 15), gain_vec, zero_vec);`
			`vector float out_lo = vec_madd(vec_ctf(lo, 15), gain_vec, zero_vec);`
Altivec sample conversion. 2011-12-02 00:34:02 +00:00
			`vec_st(out_hi, 0, out);`
			`vec_st(out_lo, 16, out);`
			`}`

Add volume control. It imposes no performance loss as it is performed during s16->float conversion. It is however grouped together with check_mute. 2012-11-03 13:15:03 +00:00			`audio_convert_s16_to_float_C(out, in, samples - i, gain);`
Altivec sample conversion. 2011-12-02 00:34:02 +00:00			`}`
			`else`
Add volume control. It imposes no performance loss as it is performed during s16->float conversion. It is however grouped together with check_mute. 2012-11-03 13:15:03 +00:00			`audio_convert_s16_to_float_C(out, in, samples, gain);`
Altivec sample conversion. 2011-12-02 00:34:02 +00:00			`}`

			`void audio_convert_float_to_s16_altivec(int16_t *out,`
			`const float *in, size_t samples)`
			`{`
			`// Unaligned loads/store is a bit expensive, so we optimize for the good path (very likely).`
			`if (((uintptr_t)out & 15) + ((uintptr_t)in & 15) == 0)`
			`{`
			`size_t i;`
			`for (i = 0; i + 8 <= samples; i += 8, in += 8, out += 8)`
			`{`
			`vector float input0 = vec_ld( 0, in);`
			`vector float input1 = vec_ld(16, in);`
			`vector signed int result0 = vec_cts(input0, 15);`
			`vector signed int result1 = vec_cts(input1, 15);`
			`vec_st(vec_packs(result0, result1), 0, out);`
			`}`

			`audio_convert_float_to_s16_C(out, in, samples - i);`
			`}`
			`else`
			`audio_convert_float_to_s16_C(out, in, samples);`
			`}`
Actually align samples to 8. 2012-12-05 22:17:07 +00:00			`#elif defined(HAVE_NEON)`
Add volume control to NEON ASM. 2013-12-29 11:59:57 +00:00			`void audio_convert_s16_float_asm(float out, const int16_t in, size_t samples, const float *gain); // Avoid potential hard-float/soft-float ABI issues.`
Use NEON conditionally for sample conversions. 2013-01-24 17:37:42 +00:00			`static void audio_convert_s16_to_float_neon(float out, const int16_t in, size_t samples,`
NEON optimized s16->float->s16. 2012-12-05 21:45:29 +00:00			`float gain)`
			`{`
Actually align samples to 8. 2012-12-05 22:17:07 +00:00			`size_t aligned_samples = samples & ~7;`
Fix segfault when samples < 8 on NEON. 2012-12-28 20:25:41 +00:00			`if (aligned_samples)`
Add volume control to NEON ASM. 2013-12-29 11:59:57 +00:00			`audio_convert_s16_float_asm(out, in, aligned_samples, &gain);`
NEON optimized s16->float->s16. 2012-12-05 21:45:29 +00:00
			`// Could do all conversion in ASM, but keep it simple for now.`
			`audio_convert_s16_to_float_C(out + aligned_samples, in + aligned_samples,`
Add volume control to NEON ASM. 2013-12-29 11:59:57 +00:00			`samples - aligned_samples, gain);`
NEON optimized s16->float->s16. 2012-12-05 21:45:29 +00:00			`}`
Altivec sample conversion. 2011-12-02 00:34:02 +00:00
NEON optimized s16->float->s16. 2012-12-05 21:45:29 +00:00			`void audio_convert_float_s16_asm(int16_t out, const float in, size_t samples);`
Use NEON conditionally for sample conversions. 2013-01-24 17:37:42 +00:00			`static void audio_convert_float_to_s16_neon(int16_t out, const float in, size_t samples)`
NEON optimized s16->float->s16. 2012-12-05 21:45:29 +00:00			`{`
Actually align samples to 8. 2012-12-05 22:17:07 +00:00			`size_t aligned_samples = samples & ~7;`
Fix segfault when samples < 8 on NEON. 2012-12-28 20:25:41 +00:00			`if (aligned_samples)`
			`audio_convert_float_s16_asm(out, in, aligned_samples);`

NEON optimized s16->float->s16. 2012-12-05 21:45:29 +00:00			`audio_convert_float_to_s16_C(out + aligned_samples, in + aligned_samples,`
			`samples - aligned_samples);`
			`}`
(PSP) add allegrex VFPU optimized s16 <-> float conversions. 2014-03-14 14:12:47 +00:00			`#elif defined(_MIPS_ARCH_ALLEGREX)`
			`void audio_convert_s16_to_float_ALLEGREX(float *out,`
			`const int16_t *in, size_t samples, float gain)`
			`{`
			`#ifdef DEBUG`
Stylistic cleanups in CC resampler. 2014-03-23 13:14:42 +00:00			`// Make sure the buffer is 16 byte aligned, this should be the default behaviour of malloc in the PSPSDK.`
			`// Only the output buffer can be assumed to be 16-byte aligned.`
			`rarch_assert(((uintptr_t)out & 0xf) == 0);`
(PSP) add allegrex VFPU optimized s16 <-> float conversions. 2014-03-14 14:12:47 +00:00			`#endif`
Stylistic cleanups in CC resampler. 2014-03-23 13:14:42 +00:00
(PSP) add allegrex VFPU optimized s16 <-> float conversions. 2014-03-14 14:12:47 +00:00			`size_t i;`
			`gain = gain / 0x8000;`
			`__asm__ (`
Stylistic cleanups in CC resampler. 2014-03-23 13:14:42 +00:00			`".set push \n"`
			`".set noreorder \n"`
			`"mtv %0, s200 \n"`
			`".set pop \n"`
			`::"r"(gain));`

			`for (i = 0; i + 16 <= samples; i += 16)`
(PSP) add allegrex VFPU optimized s16 <-> float conversions. 2014-03-14 14:12:47 +00:00			`{`
			`__asm__ (`
Stylistic cleanups in CC resampler. 2014-03-23 13:14:42 +00:00			`".set push \n"`
			`".set noreorder \n"`

			`"lv.s s100, 0(%0) \n"`
			`"lv.s s101, 4(%0) \n"`
			`"lv.s s110, 8(%0) \n"`
			`"lv.s s111, 12(%0) \n"`
			`"lv.s s120, 16(%0) \n"`
			`"lv.s s121, 20(%0) \n"`
			`"lv.s s130, 24(%0) \n"`
			`"lv.s s131, 28(%0) \n"`

			`"vs2i.p c100, c100 \n"`
			`"vs2i.p c110, c110 \n"`
			`"vs2i.p c120, c120 \n"`
			`"vs2i.p c130, c130 \n"`

			`"vi2f.q c100, c100, 16 \n"`
			`"vi2f.q c110, c110, 16 \n"`
			`"vi2f.q c120, c120, 16 \n"`
			`"vi2f.q c130, c130, 16 \n"`

			`"vmscl.q e100, e100, s200 \n"`

			`"sv.q c100, 0(%1) \n"`
			`"sv.q c110, 16(%1) \n"`
			`"sv.q c120, 32(%1) \n"`
			`"sv.q c130, 48(%1) \n"`

			`".set pop \n"`
			`:: "r"(in + i), "r"(out + i));`
(PSP) add allegrex VFPU optimized s16 <-> float conversions. 2014-03-14 14:12:47 +00:00			`}`

Stylistic cleanups in CC resampler. 2014-03-23 13:14:42 +00:00			`for (; i < samples; i++)`
(PSP) add allegrex VFPU optimized s16 <-> float conversions. 2014-03-14 14:12:47 +00:00			`out[i] = (float)in[i] * gain;`
			`}`

			`void audio_convert_float_to_s16_ALLEGREX(int16_t *out,`
			`const float *in, size_t samples)`
			`{`
			`#ifdef DEBUG`
Stylistic cleanups in CC resampler. 2014-03-23 13:14:42 +00:00			`// Make sure the buffers are 16 byte aligned, this should be the default behaviour of malloc in the PSPSDK.`
			`// Both buffers are allocated by RetroArch, so can assume alignment.`
			`rarch_assert(((uintptr_t)in & 0xf) == 0);`
			`rarch_assert(((uintptr_t)out & 0xf) == 0);`
(PSP) add allegrex VFPU optimized s16 <-> float conversions. 2014-03-14 14:12:47 +00:00			`#endif`

			`size_t i;`
Stylistic cleanups in CC resampler. 2014-03-23 13:14:42 +00:00			`for (i = 0; i + 8 <= samples; i += 8)`
(PSP) add allegrex VFPU optimized s16 <-> float conversions. 2014-03-14 14:12:47 +00:00			`{`
			`__asm__ (`
Stylistic cleanups in CC resampler. 2014-03-23 13:14:42 +00:00			`".set push \n"`
			`".set noreorder \n"`
(PSP) add allegrex VFPU optimized s16 <-> float conversions. 2014-03-14 14:12:47 +00:00
Stylistic cleanups in CC resampler. 2014-03-23 13:14:42 +00:00			`"lv.q c100, 0(%0) \n"`
			`"lv.q c110, 16(%0) \n"`
(PSP) add allegrex VFPU optimized s16 <-> float conversions. 2014-03-14 14:12:47 +00:00
Stylistic cleanups in CC resampler. 2014-03-23 13:14:42 +00:00			`"vf2in.q c100, c100, 31 \n"`
			`"vf2in.q c110, c110, 31 \n"`
			`"vi2s.q c100, c100 \n"`
			`"vi2s.q c102, c110 \n"`
(PSP) add allegrex VFPU optimized s16 <-> float conversions. 2014-03-14 14:12:47 +00:00
Stylistic cleanups in CC resampler. 2014-03-23 13:14:42 +00:00			`"sv.q c100, 0(%1) \n"`
(PSP) add allegrex VFPU optimized s16 <-> float conversions. 2014-03-14 14:12:47 +00:00
Stylistic cleanups in CC resampler. 2014-03-23 13:14:42 +00:00			`".set pop \n"`
			`:: "r"(in + i), "r"(out + i));`
(PSP) add allegrex VFPU optimized s16 <-> float conversions. 2014-03-14 14:12:47 +00:00			`}`

Stylistic cleanups in CC resampler. 2014-03-23 13:14:42 +00:00			`for (; i < samples; i++)`
(PSP) add allegrex VFPU optimized s16 <-> float conversions. 2014-03-14 14:12:47 +00:00			`{`
			`int32_t val = (int32_t)(in[i] * 0x8000);`
			`out[i] = (val > 0x7FFF) ? 0x7FFF : (val < -0x8000 ? -0x8000 : (int16_t)val);`
			`}`
			`}`
Altivec sample conversion. 2011-12-02 00:34:02 +00:00			`#endif`

Use NEON conditionally for sample conversions. 2013-01-24 17:37:42 +00:00			`void audio_convert_init_simd(void)`
			`{`
			`#ifdef HAVE_NEON`
(Android) Build fix 2013-12-19 02:45:17 +00:00			`unsigned cpu = rarch_get_cpu_features();`
(Android) Build fix 2013-12-17 19:56:31 +00:00			`audio_convert_s16_to_float_arm = cpu & RETRO_SIMD_NEON ?`
Use NEON conditionally for sample conversions. 2013-01-24 17:37:42 +00:00			`audio_convert_s16_to_float_neon : audio_convert_s16_to_float_C;`
(Android) Build fix 2013-12-17 19:56:31 +00:00			`audio_convert_float_to_s16_arm = cpu & RETRO_SIMD_NEON ?`
Use NEON conditionally for sample conversions. 2013-01-24 17:37:42 +00:00			`audio_convert_float_to_s16_neon : audio_convert_float_to_s16_C;`
			`#endif`
			`}`