mirror of
https://github.com/CTCaer/RetroArch.git
synced 2024-12-17 07:50:27 +00:00
289 lines
9.0 KiB
C
289 lines
9.0 KiB
C
/* RetroArch - A frontend for libretro.
|
|
* Copyright (C) 2010-2014 - Hans-Kristian Arntzen
|
|
* Copyright (C) 2014-2015 - Ali Bouhlel ( aliaspider@gmail.com )
|
|
*
|
|
* RetroArch is free software: you can redistribute it and/or modify it under the terms
|
|
* of the GNU General Public License as published by the Free Software Found-
|
|
* ation, either version 3 of the License, or (at your option) any later version.
|
|
*
|
|
* RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
|
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
* PURPOSE. See the GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along with RetroArch.
|
|
* If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include <boolean.h>
|
|
#include "utils.h"
|
|
|
|
#include "../performance.h"
|
|
|
|
#if defined(__SSE2__)
|
|
#include <emmintrin.h>
|
|
#elif defined(__ALTIVEC__)
|
|
#include <altivec.h>
|
|
#endif
|
|
|
|
void audio_convert_s16_to_float_C(float *out,
|
|
const int16_t *in, size_t samples, float gain)
|
|
{
|
|
size_t i;
|
|
gain = gain / 0x8000;
|
|
for (i = 0; i < samples; i++)
|
|
out[i] = (float)in[i] * gain;
|
|
}
|
|
|
|
void audio_convert_float_to_s16_C(int16_t *out,
|
|
const float *in, size_t samples)
|
|
{
|
|
size_t i;
|
|
for (i = 0; i < samples; i++)
|
|
{
|
|
int32_t val = (int32_t)(in[i] * 0x8000);
|
|
out[i] = (val > 0x7FFF) ? 0x7FFF :
|
|
(val < -0x8000 ? -0x8000 : (int16_t)val);
|
|
}
|
|
}
|
|
|
|
#if defined(__SSE2__)
|
|
void audio_convert_s16_to_float_SSE2(float *out,
|
|
const int16_t *in, size_t samples, float gain)
|
|
{
|
|
float fgain = gain / UINT32_C(0x80000000);
|
|
__m128 factor = _mm_set1_ps(fgain);
|
|
size_t i;
|
|
for (i = 0; i + 8 <= samples; i += 8, in += 8, out += 8)
|
|
{
|
|
__m128i input = _mm_loadu_si128((const __m128i *)in);
|
|
__m128i regs[2] = {
|
|
_mm_unpacklo_epi16(_mm_setzero_si128(), input),
|
|
_mm_unpackhi_epi16(_mm_setzero_si128(), input),
|
|
};
|
|
|
|
__m128 output[2] = {
|
|
_mm_mul_ps(_mm_cvtepi32_ps(regs[0]), factor),
|
|
_mm_mul_ps(_mm_cvtepi32_ps(regs[1]), factor),
|
|
};
|
|
|
|
_mm_storeu_ps(out + 0, output[0]);
|
|
_mm_storeu_ps(out + 4, output[1]);
|
|
}
|
|
|
|
audio_convert_s16_to_float_C(out, in, samples - i, gain);
|
|
}
|
|
|
|
void audio_convert_float_to_s16_SSE2(int16_t *out,
|
|
const float *in, size_t samples)
|
|
{
|
|
__m128 factor = _mm_set1_ps((float)0x8000);
|
|
size_t i;
|
|
for (i = 0; i + 8 <= samples; i += 8, in += 8, out += 8)
|
|
{
|
|
__m128 input[2] = { _mm_loadu_ps(in + 0), _mm_loadu_ps(in + 4) };
|
|
__m128 res[2] = { _mm_mul_ps(input[0], factor),
|
|
_mm_mul_ps(input[1], factor) };
|
|
|
|
__m128i ints[2] = { _mm_cvtps_epi32(res[0]), _mm_cvtps_epi32(res[1]) };
|
|
__m128i packed = _mm_packs_epi32(ints[0], ints[1]);
|
|
|
|
_mm_storeu_si128((__m128i *)out, packed);
|
|
}
|
|
|
|
audio_convert_float_to_s16_C(out, in, samples - i);
|
|
}
|
|
#elif defined(__ALTIVEC__)
|
|
void audio_convert_s16_to_float_altivec(float *out,
|
|
const int16_t *in, size_t samples, float gain)
|
|
{
|
|
size_t samples_in = samples;
|
|
const vector float gain_vec = { gain, gain , gain, gain };
|
|
const vector float zero_vec = { 0.0f, 0.0f, 0.0f, 0.0f};
|
|
|
|
/* Unaligned loads/store is a bit expensive, so we
|
|
* optimize for the good path (very likely). */
|
|
if (((uintptr_t)out & 15) + ((uintptr_t)in & 15) == 0)
|
|
{
|
|
size_t i;
|
|
for (i = 0; i + 8 <= samples; i += 8, in += 8, out += 8)
|
|
{
|
|
vector signed short input = vec_ld(0, in);
|
|
vector signed int hi = vec_unpackh(input);
|
|
vector signed int lo = vec_unpackl(input);
|
|
vector float out_hi = vec_madd(vec_ctf(hi, 15), gain_vec, zero_vec);
|
|
vector float out_lo = vec_madd(vec_ctf(lo, 15), gain_vec, zero_vec);
|
|
|
|
vec_st(out_hi, 0, out);
|
|
vec_st(out_lo, 16, out);
|
|
}
|
|
|
|
samples_in -= i;
|
|
}
|
|
audio_convert_s16_to_float_C(out, in, samples_in, gain);
|
|
}
|
|
|
|
void audio_convert_float_to_s16_altivec(int16_t *out,
|
|
const float *in, size_t samples)
|
|
{
|
|
int samples_in = samples;
|
|
|
|
/* Unaligned loads/store is a bit expensive,
|
|
* so we optimize for the good path (very likely). */
|
|
if (((uintptr_t)out & 15) + ((uintptr_t)in & 15) == 0)
|
|
{
|
|
size_t i;
|
|
for (i = 0; i + 8 <= samples; i += 8, in += 8, out += 8)
|
|
{
|
|
vector float input0 = vec_ld( 0, in);
|
|
vector float input1 = vec_ld(16, in);
|
|
vector signed int result0 = vec_cts(input0, 15);
|
|
vector signed int result1 = vec_cts(input1, 15);
|
|
vec_st(vec_packs(result0, result1), 0, out);
|
|
}
|
|
|
|
samples_in -= i;
|
|
}
|
|
audio_convert_float_to_s16_C(out, in, samples_in);
|
|
}
|
|
#elif defined(__ARM_NEON__)
|
|
/* Avoid potential hard-float/soft-float ABI issues. */
|
|
void audio_convert_s16_float_asm(float *out, const int16_t *in,
|
|
size_t samples, const float *gain);
|
|
|
|
static void audio_convert_s16_to_float_neon(float *out,
|
|
const int16_t *in, size_t samples, float gain)
|
|
{
|
|
size_t aligned_samples = samples & ~7;
|
|
if (aligned_samples)
|
|
audio_convert_s16_float_asm(out, in, aligned_samples, &gain);
|
|
|
|
/* Could do all conversion in ASM, but keep it simple for now. */
|
|
audio_convert_s16_to_float_C(out + aligned_samples, in + aligned_samples,
|
|
samples - aligned_samples, gain);
|
|
}
|
|
|
|
void audio_convert_float_s16_asm(int16_t *out, const float *in, size_t samples);
|
|
|
|
static void audio_convert_float_to_s16_neon(int16_t *out,
|
|
const float *in, size_t samples)
|
|
{
|
|
size_t aligned_samples = samples & ~7;
|
|
if (aligned_samples)
|
|
audio_convert_float_s16_asm(out, in, aligned_samples);
|
|
|
|
audio_convert_float_to_s16_C(out + aligned_samples, in + aligned_samples,
|
|
samples - aligned_samples);
|
|
}
|
|
#elif defined(_MIPS_ARCH_ALLEGREX)
|
|
void audio_convert_s16_to_float_ALLEGREX(float *out,
|
|
const int16_t *in, size_t samples, float gain)
|
|
{
|
|
#ifdef DEBUG
|
|
/* Make sure the buffer is 16 byte aligned, this should be the
|
|
* default behaviour of malloc in the PSPSDK.
|
|
* Only the output buffer can be assumed to be 16-byte aligned. */
|
|
rarch_assert(((uintptr_t)out & 0xf) == 0);
|
|
#endif
|
|
|
|
size_t i;
|
|
gain = gain / 0x8000;
|
|
__asm__ (
|
|
".set push \n"
|
|
".set noreorder \n"
|
|
"mtv %0, s200 \n"
|
|
".set pop \n"
|
|
::"r"(gain));
|
|
|
|
for (i = 0; i + 16 <= samples; i += 16)
|
|
{
|
|
__asm__ (
|
|
".set push \n"
|
|
".set noreorder \n"
|
|
|
|
"lv.s s100, 0(%0) \n"
|
|
"lv.s s101, 4(%0) \n"
|
|
"lv.s s110, 8(%0) \n"
|
|
"lv.s s111, 12(%0) \n"
|
|
"lv.s s120, 16(%0) \n"
|
|
"lv.s s121, 20(%0) \n"
|
|
"lv.s s130, 24(%0) \n"
|
|
"lv.s s131, 28(%0) \n"
|
|
|
|
"vs2i.p c100, c100 \n"
|
|
"vs2i.p c110, c110 \n"
|
|
"vs2i.p c120, c120 \n"
|
|
"vs2i.p c130, c130 \n"
|
|
|
|
"vi2f.q c100, c100, 16 \n"
|
|
"vi2f.q c110, c110, 16 \n"
|
|
"vi2f.q c120, c120, 16 \n"
|
|
"vi2f.q c130, c130, 16 \n"
|
|
|
|
"vmscl.q e100, e100, s200 \n"
|
|
|
|
"sv.q c100, 0(%1) \n"
|
|
"sv.q c110, 16(%1) \n"
|
|
"sv.q c120, 32(%1) \n"
|
|
"sv.q c130, 48(%1) \n"
|
|
|
|
".set pop \n"
|
|
:: "r"(in + i), "r"(out + i));
|
|
}
|
|
|
|
for (; i < samples; i++)
|
|
out[i] = (float)in[i] * gain;
|
|
}
|
|
|
|
void audio_convert_float_to_s16_ALLEGREX(int16_t *out,
|
|
const float *in, size_t samples)
|
|
{
|
|
#ifdef DEBUG
|
|
/* Make sure the buffers are 16 byte aligned, this should be
|
|
* the default behaviour of malloc in the PSPSDK.
|
|
* Both buffers are allocated by RetroArch, so can assume alignment. */
|
|
rarch_assert(((uintptr_t)in & 0xf) == 0);
|
|
rarch_assert(((uintptr_t)out & 0xf) == 0);
|
|
#endif
|
|
|
|
size_t i;
|
|
for (i = 0; i + 8 <= samples; i += 8)
|
|
{
|
|
__asm__ (
|
|
".set push \n"
|
|
".set noreorder \n"
|
|
|
|
"lv.q c100, 0(%0) \n"
|
|
"lv.q c110, 16(%0) \n"
|
|
|
|
"vf2in.q c100, c100, 31 \n"
|
|
"vf2in.q c110, c110, 31 \n"
|
|
"vi2s.q c100, c100 \n"
|
|
"vi2s.q c102, c110 \n"
|
|
|
|
"sv.q c100, 0(%1) \n"
|
|
|
|
".set pop \n"
|
|
:: "r"(in + i), "r"(out + i));
|
|
}
|
|
|
|
for (; i < samples; i++)
|
|
{
|
|
int32_t val = (int32_t)(in[i] * 0x8000);
|
|
out[i] = (val > 0x7FFF) ? 0x7FFF :
|
|
(val < -0x8000 ? -0x8000 : (int16_t)val);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
void audio_convert_init_simd(void)
|
|
{
|
|
#if defined(__ARM_NEON__)
|
|
unsigned cpu = rarch_get_cpu_features();
|
|
audio_convert_s16_to_float_arm = cpu & RETRO_SIMD_NEON ?
|
|
audio_convert_s16_to_float_neon : audio_convert_s16_to_float_C;
|
|
audio_convert_float_to_s16_arm = cpu & RETRO_SIMD_NEON ?
|
|
audio_convert_float_to_s16_neon : audio_convert_float_to_s16_C;
|
|
#endif
|
|
}
|
|
|