mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-22 21:09:52 +00:00
Simplify the inner loop of ff_atrac3p_ipqf (prepare for SIMD)
This commit is contained in:
parent
effae82208
commit
52111103b8
@ -29,6 +29,10 @@ charset = utf-8-bom
|
||||
[Windows/{aboutbox.rc,version.rc}]
|
||||
charset = utf-8
|
||||
|
||||
[ext/at3_standalone/**.{cpp,h}]
|
||||
indent_style = space
|
||||
indent_size = 4
|
||||
|
||||
[libretro/**.{cpp,h}]
|
||||
indent_style = space
|
||||
indent_size = 3
|
||||
|
@ -2,6 +2,11 @@
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
// Notes
|
||||
//
|
||||
// Performance-wise, these are OK.
|
||||
// For Atrac3+, the bottleneck is two functions: decode_qu_spectra and ff_atrac3p_ipqf. At least the latter is quite SIMD-able.
|
||||
|
||||
// The full external API for the standalone Atrac3/3+ decoder.
|
||||
|
||||
struct ATRAC3Context;
|
||||
|
@ -624,11 +624,17 @@ void ff_atrac3p_ipqf(FFTContext *dct_ctx, Atrac3pIPQFChannelCtx *hist,
|
||||
pos_next = mod23_lut[pos_now + 2]; // pos_next = (pos_now + 1) % 23;
|
||||
|
||||
for (t = 0; t < ATRAC3P_PQF_FIR_LEN; t++) {
|
||||
const float *buf1 = hist->buf1[pos_now];
|
||||
const float *buf2 = hist->buf2[pos_next];
|
||||
const float *coeffs1 = ipqf_coeffs1[t];
|
||||
const float *coeffs2 = ipqf_coeffs2[t];
|
||||
|
||||
float *outp = out + s * 16;
|
||||
for (i = 0; i < 8; i++) {
|
||||
out[s * 16 + i + 0] += hist->buf1[pos_now][i] * ipqf_coeffs1[t][i] +
|
||||
hist->buf2[pos_next][i] * ipqf_coeffs2[t][i];
|
||||
out[s * 16 + i + 8] += hist->buf1[pos_now][7 - i] * ipqf_coeffs1[t][i + 8] +
|
||||
hist->buf2[pos_next][7 - i] * ipqf_coeffs2[t][i + 8];
|
||||
outp[i] += buf1[i] * coeffs1[i] + buf2[i] * coeffs2[i];
|
||||
}
|
||||
for (i = 0; i < 8; i++) {
|
||||
outp[i + 8] += buf1[7 - i] * coeffs1[i + 8] + buf2[7 - i] * coeffs2[i + 8];
|
||||
}
|
||||
|
||||
pos_now = mod23_lut[pos_next + 2]; // pos_now = (pos_now + 2) % 23;
|
||||
|
Loading…
Reference in New Issue
Block a user