Merge pull request #19102 from hrydgard/atrac3-vec-cleanup

atrac3: Add a cross platform restrict modifier, use it for the vector math functions
This commit is contained in:
Henrik Rydgård 2024-05-02 18:51:47 +01:00 committed by GitHub
commit 0506adb65c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 32 additions and 29 deletions

View File

@ -143,7 +143,7 @@ static void imlt(ATRAC3Context *q, float *input, float *output, int odd_band)
imdct_calc(&q->mdct_ctx, output, input);
/* Perform windowing on the output. */
vector_fmul(output, output, mdct_window, MDCT_SIZE);
vector_fmul(output, mdct_window, MDCT_SIZE);
}
/*

View File

@ -174,7 +174,7 @@ static void waves_synth(Atrac3pWaveSynthParams *synth_param,
/* invert phase if requested */
if (invert_phase)
vector_fmul_scalar(out, out, -1.0f, 128);
vector_fmul_scalar(out, -1.0f, 128);
/* fade in with steep Hann window if requested */
if (envelope->has_start_point) {
@ -255,14 +255,14 @@ void ff_atrac3p_generate_tones(Atrac3pChanUnitCtx *ch_unit, int ch_num, int sb,
/* Hann windowing for non-faded wave signals */
if (tones_now->num_wavs && tones_next->num_wavs &&
reg1_env_nonzero && reg2_env_nonzero) {
vector_fmul(wavreg1, wavreg1, &hann_window[128], 128);
vector_fmul(wavreg2, wavreg2, hann_window, 128);
vector_fmul(wavreg1, &hann_window[128], 128);
vector_fmul(wavreg2, hann_window, 128);
} else {
if (tones_now->num_wavs && !tones_now->curr_env.has_stop_point)
vector_fmul(wavreg1, wavreg1, &hann_window[128], 128);
vector_fmul(wavreg1, &hann_window[128], 128);
if (tones_next->num_wavs && !tones_next->curr_env.has_start_point)
vector_fmul(wavreg2, wavreg2, hann_window, 128);
vector_fmul(wavreg2, hann_window, 128);
}
/* Overlap and add to residual */
@ -502,15 +502,15 @@ void ff_atrac3p_imdct(FFTContext *mdct_ctx, float *pIn,
* Both regions are 32 samples long. */
if (wind_id & 2) { /* 1st half: steep window */
memset(pOut, 0, sizeof(float) * 32);
vector_fmul(&pOut[32], &pOut[32], av_sine_64, 64);
vector_fmul(&pOut[32], av_sine_64, 64);
} else /* 1st half: simple sine window */
vector_fmul(pOut, pOut, av_sine_128, ATRAC3P_MDCT_SIZE / 2);
vector_fmul(pOut, av_sine_128, ATRAC3P_MDCT_SIZE / 2);
if (wind_id & 1) { /* 2nd half: steep window */
vector_fmul_reverse(&pOut[160], &pOut[160], av_sine_64, 64);
vector_fmul_reverse(&pOut[160], av_sine_64, 64);
memset(&pOut[224], 0, sizeof(float) * 32);
} else /* 2nd half: simple sine window */
vector_fmul_reverse(&pOut[128], &pOut[128], av_sine_128, ATRAC3P_MDCT_SIZE / 2);
vector_fmul_reverse(&pOut[128], av_sine_128, ATRAC3P_MDCT_SIZE / 2);
}
/* lookup table for fast modulo 23 op required for cyclic buffers of the IPQF */

View File

@ -4,15 +4,22 @@
// Compat hacks to make an FFMPEG-like environment, so we can keep the core code mostly unchanged.
#if defined(__GNUC__)
#if defined(__clang__)
#define DECLARE_ALIGNED(n, t, v) t __attribute__((aligned(n))) v
#define DECLARE_ASM_CONST(n, t, v) static const t av_used __attribute__((aligned(n))) v
#define av_restrict __restrict
#elif defined(__GNUC__)
#define DECLARE_ALIGNED(n,t,v) t __attribute__ ((aligned (n))) v
#define DECLARE_ASM_CONST(n,t,v) static const t av_used __attribute__ ((aligned (n))) v
#define av_restrict __restrict__
#elif defined(_MSC_VER)
#define DECLARE_ALIGNED(n,t,v) __declspec(align(n)) t v
#define DECLARE_ASM_CONST(n,t,v) __declspec(align(n)) static const t v
#define av_restrict __restrict
#else
#define DECLARE_ALIGNED(n,t,v) t v
#define DECLARE_ASM_CONST(n,t,v) static const t v
#define av_restrict
#endif
#define AV_HAVE_FAST_UNALIGNED 0
@ -22,7 +29,6 @@
// #define BITSTREAM_READER_LE
#define LOCAL_ALIGNED(bits, type, name, subscript) type name subscript
#define av_restrict
#define av_alias
#define av_unused
#define av_assert0(cond)

View File

@ -18,20 +18,20 @@
#pragma once
inline void vector_fmul(float *dst, const float *src0, const float *src1, int len) {
int i;
for (i = 0; i < len; i++)
dst[i] = src0[i] * src1[i];
#include "compat.h"
inline void vector_fmul(float * av_restrict dst, const float * av_restrict src, int len) {
for (int i = 0; i < len; i++)
dst[i] = dst[i] * src[i];
}
/**
* Multiply a vector of floats by a scalar float. Source and
* destination vectors must overlap exactly or not at all.
*/
inline void vector_fmul_scalar(float *dst, const float *src, float mul, int len) {
int i;
for (i = 0; i < len; i++)
dst[i] = src[i] * mul;
inline void vector_fmul_scalar(float *dst, float mul, int len) {
for (int i = 0; i < len; i++)
dst[i] *= mul;
}
/**
@ -39,18 +39,15 @@ inline void vector_fmul_scalar(float *dst, const float *src, float mul, int len)
* in a vector of floats. The second vector of floats is iterated over
* in reverse order.
*
* @param dst output vector
* @param dst output and first input vector
* constraints: 32-byte aligned
* @param src0 first input vector
* constraints: 32-byte aligned
* @param src1 second input vector
* @param src second input vector
* constraints: 32-byte aligned
* @param len number of elements in the input
* constraints: multiple of 16
*/
inline void vector_fmul_reverse(float *dst, const float *src0, const float *src1, int len) {
int i;
src1 += len - 1;
for (i = 0; i < len; i++)
dst[i] = src0[i] * src1[-i];
inline void vector_fmul_reverse(float * av_restrict dst, const float * av_restrict src, int len) {
src += len - 1;
for (int i = 0; i < len; i++)
dst[i] *= src[-i];
}