mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-12-18 02:48:28 +00:00
Merge pull request #19102 from hrydgard/atrac3-vec-cleanup
atrac3: Add a cross platform restrict modifier, use it for the vector math functions
This commit is contained in:
commit
0506adb65c
@ -143,7 +143,7 @@ static void imlt(ATRAC3Context *q, float *input, float *output, int odd_band)
|
||||
imdct_calc(&q->mdct_ctx, output, input);
|
||||
|
||||
/* Perform windowing on the output. */
|
||||
vector_fmul(output, output, mdct_window, MDCT_SIZE);
|
||||
vector_fmul(output, mdct_window, MDCT_SIZE);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -174,7 +174,7 @@ static void waves_synth(Atrac3pWaveSynthParams *synth_param,
|
||||
|
||||
/* invert phase if requested */
|
||||
if (invert_phase)
|
||||
vector_fmul_scalar(out, out, -1.0f, 128);
|
||||
vector_fmul_scalar(out, -1.0f, 128);
|
||||
|
||||
/* fade in with steep Hann window if requested */
|
||||
if (envelope->has_start_point) {
|
||||
@ -255,14 +255,14 @@ void ff_atrac3p_generate_tones(Atrac3pChanUnitCtx *ch_unit, int ch_num, int sb,
|
||||
/* Hann windowing for non-faded wave signals */
|
||||
if (tones_now->num_wavs && tones_next->num_wavs &&
|
||||
reg1_env_nonzero && reg2_env_nonzero) {
|
||||
vector_fmul(wavreg1, wavreg1, &hann_window[128], 128);
|
||||
vector_fmul(wavreg2, wavreg2, hann_window, 128);
|
||||
vector_fmul(wavreg1, &hann_window[128], 128);
|
||||
vector_fmul(wavreg2, hann_window, 128);
|
||||
} else {
|
||||
if (tones_now->num_wavs && !tones_now->curr_env.has_stop_point)
|
||||
vector_fmul(wavreg1, wavreg1, &hann_window[128], 128);
|
||||
vector_fmul(wavreg1, &hann_window[128], 128);
|
||||
|
||||
if (tones_next->num_wavs && !tones_next->curr_env.has_start_point)
|
||||
vector_fmul(wavreg2, wavreg2, hann_window, 128);
|
||||
vector_fmul(wavreg2, hann_window, 128);
|
||||
}
|
||||
|
||||
/* Overlap and add to residual */
|
||||
@ -502,15 +502,15 @@ void ff_atrac3p_imdct(FFTContext *mdct_ctx, float *pIn,
|
||||
* Both regions are 32 samples long. */
|
||||
if (wind_id & 2) { /* 1st half: steep window */
|
||||
memset(pOut, 0, sizeof(float) * 32);
|
||||
vector_fmul(&pOut[32], &pOut[32], av_sine_64, 64);
|
||||
vector_fmul(&pOut[32], av_sine_64, 64);
|
||||
} else /* 1st half: simple sine window */
|
||||
vector_fmul(pOut, pOut, av_sine_128, ATRAC3P_MDCT_SIZE / 2);
|
||||
vector_fmul(pOut, av_sine_128, ATRAC3P_MDCT_SIZE / 2);
|
||||
|
||||
if (wind_id & 1) { /* 2nd half: steep window */
|
||||
vector_fmul_reverse(&pOut[160], &pOut[160], av_sine_64, 64);
|
||||
vector_fmul_reverse(&pOut[160], av_sine_64, 64);
|
||||
memset(&pOut[224], 0, sizeof(float) * 32);
|
||||
} else /* 2nd half: simple sine window */
|
||||
vector_fmul_reverse(&pOut[128], &pOut[128], av_sine_128, ATRAC3P_MDCT_SIZE / 2);
|
||||
vector_fmul_reverse(&pOut[128], av_sine_128, ATRAC3P_MDCT_SIZE / 2);
|
||||
}
|
||||
|
||||
/* lookup table for fast modulo 23 op required for cyclic buffers of the IPQF */
|
||||
|
@ -4,15 +4,22 @@
|
||||
|
||||
// Compat hacks to make an FFMPEG-like environment, so we can keep the core code mostly unchanged.
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#if defined(__clang__)
|
||||
#define DECLARE_ALIGNED(n, t, v) t __attribute__((aligned(n))) v
|
||||
#define DECLARE_ASM_CONST(n, t, v) static const t av_used __attribute__((aligned(n))) v
|
||||
#define av_restrict __restrict
|
||||
#elif defined(__GNUC__)
|
||||
#define DECLARE_ALIGNED(n,t,v) t __attribute__ ((aligned (n))) v
|
||||
#define DECLARE_ASM_CONST(n,t,v) static const t av_used __attribute__ ((aligned (n))) v
|
||||
#define av_restrict __restrict__
|
||||
#elif defined(_MSC_VER)
|
||||
#define DECLARE_ALIGNED(n,t,v) __declspec(align(n)) t v
|
||||
#define DECLARE_ASM_CONST(n,t,v) __declspec(align(n)) static const t v
|
||||
#define av_restrict __restrict
|
||||
#else
|
||||
#define DECLARE_ALIGNED(n,t,v) t v
|
||||
#define DECLARE_ASM_CONST(n,t,v) static const t v
|
||||
#define av_restrict
|
||||
#endif
|
||||
|
||||
#define AV_HAVE_FAST_UNALIGNED 0
|
||||
@ -22,7 +29,6 @@
|
||||
// #define BITSTREAM_READER_LE
|
||||
|
||||
#define LOCAL_ALIGNED(bits, type, name, subscript) type name subscript
|
||||
#define av_restrict
|
||||
#define av_alias
|
||||
#define av_unused
|
||||
#define av_assert0(cond)
|
||||
|
@ -18,20 +18,20 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
inline void vector_fmul(float *dst, const float *src0, const float *src1, int len) {
|
||||
int i;
|
||||
for (i = 0; i < len; i++)
|
||||
dst[i] = src0[i] * src1[i];
|
||||
#include "compat.h"
|
||||
|
||||
inline void vector_fmul(float * av_restrict dst, const float * av_restrict src, int len) {
|
||||
for (int i = 0; i < len; i++)
|
||||
dst[i] = dst[i] * src[i];
|
||||
}
|
||||
|
||||
/**
|
||||
* Multiply a vector of floats by a scalar float. Source and
|
||||
* destination vectors must overlap exactly or not at all.
|
||||
*/
|
||||
inline void vector_fmul_scalar(float *dst, const float *src, float mul, int len) {
|
||||
int i;
|
||||
for (i = 0; i < len; i++)
|
||||
dst[i] = src[i] * mul;
|
||||
inline void vector_fmul_scalar(float *dst, float mul, int len) {
|
||||
for (int i = 0; i < len; i++)
|
||||
dst[i] *= mul;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -39,18 +39,15 @@ inline void vector_fmul_scalar(float *dst, const float *src, float mul, int len)
|
||||
* in a vector of floats. The second vector of floats is iterated over
|
||||
* in reverse order.
|
||||
*
|
||||
* @param dst output vector
|
||||
* @param dst output and first input vector
|
||||
* constraints: 32-byte aligned
|
||||
* @param src0 first input vector
|
||||
* constraints: 32-byte aligned
|
||||
* @param src1 second input vector
|
||||
* @param src second input vector
|
||||
* constraints: 32-byte aligned
|
||||
* @param len number of elements in the input
|
||||
* constraints: multiple of 16
|
||||
*/
|
||||
inline void vector_fmul_reverse(float *dst, const float *src0, const float *src1, int len) {
|
||||
int i;
|
||||
src1 += len - 1;
|
||||
for (i = 0; i < len; i++)
|
||||
dst[i] = src0[i] * src1[-i];
|
||||
inline void vector_fmul_reverse(float * av_restrict dst, const float * av_restrict src, int len) {
|
||||
src += len - 1;
|
||||
for (int i = 0; i < len; i++)
|
||||
dst[i] *= src[-i];
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user