Files
archived-pcsx2/pcsx2/SPU2/ReverbResample.cpp

261 lines
6.1 KiB
C++

// SPDX-FileCopyrightText: 2002-2026 PCSX2 Dev Team
// SPDX-License-Identifier: GPL-3.0+
#include "GS/GSVector.h"
#include "SPU2/defs.h"
MULTI_ISA_UNSHARED_START
static constexpr u32 NUM_TAPS = 39;
// 39 tap filter, the 0's could be optimized out
static constexpr std::array<s16, 48> filter_down_coefs alignas(32) = {
-1,
0,
2,
0,
-10,
0,
35,
0,
-103,
0,
266,
0,
-616,
0,
1332,
0,
-2960,
0,
10246,
16384,
10246,
0,
-2960,
0,
1332,
0,
-616,
0,
266,
0,
-103,
0,
35,
0,
-10,
0,
2,
0,
-1,
};
static constexpr std::array<s16, 48> make_up_coefs()
{
std::array<s16, 48> ret = {};
for (u32 i = 0; i < NUM_TAPS; i++)
{
ret[i] = static_cast<s16>(std::clamp<s32>(filter_down_coefs[i] * 2, INT16_MIN, INT16_MAX));
}
return ret;
}
static constexpr std::array<s16, 48> filter_up_coefs alignas(32) = make_up_coefs();
s32 __forceinline ReverbDownsample_reference(V_Core& core, bool right)
{
int index = (core.RevbSampleBufPos - NUM_TAPS) & 63;
s32 out = 0;
for (u32 i = 0; i < NUM_TAPS; i++)
{
out += core.RevbDownBuf[right][index + i] * filter_down_coefs[i];
}
out >>= 15;
return clamp_mix(out);
}
#if _M_SSE >= 0x501
s32 __forceinline ReverbDownsample_avx(V_Core& core, bool right)
{
int index = (core.RevbSampleBufPos - NUM_TAPS) & 63;
auto c = GSVector8i::load<true>(&filter_down_coefs[0]);
auto s = GSVector8i::load<false>(&core.RevbDownBuf[right][index]);
auto acc = s.mul16hrs(c);
c = GSVector8i::load<true>(&filter_down_coefs[16]);
s = GSVector8i::load<false>(&core.RevbDownBuf[right][index + 16]);
acc = acc.adds16(s.mul16hrs(c));
c = GSVector8i::load<true>(&filter_down_coefs[32]);
s = GSVector8i::load<false>(&core.RevbDownBuf[right][index + 32]);
acc = acc.adds16(s.mul16hrs(c));
acc = acc.adds16(acc.ba());
acc = acc.hadds16(acc);
acc = acc.hadds16(acc);
acc = acc.hadds16(acc);
return acc.I16[0];
}
#endif
s32 __forceinline ReverbDownsample_sse(V_Core& core, bool right)
{
int index = (core.RevbSampleBufPos - NUM_TAPS) & 63;
auto c = GSVector4i::load<true>(&filter_down_coefs[0]);
auto s = GSVector4i::load<false>(&core.RevbDownBuf[right][index]);
auto acc = s.mul16hrs(c);
c = GSVector4i::load<true>(&filter_down_coefs[8]);
s = GSVector4i::load<false>(&core.RevbDownBuf[right][index + 8]);
acc = acc.adds16(s.mul16hrs(c));
c = GSVector4i::load<true>(&filter_down_coefs[16]);
s = GSVector4i::load<false>(&core.RevbDownBuf[right][index + 16]);
acc = acc.adds16(s.mul16hrs(c));
c = GSVector4i::load<true>(&filter_down_coefs[24]);
s = GSVector4i::load<false>(&core.RevbDownBuf[right][index + 24]);
acc = acc.adds16(s.mul16hrs(c));
c = GSVector4i::load<true>(&filter_down_coefs[32]);
s = GSVector4i::load<false>(&core.RevbDownBuf[right][index + 32]);
acc = acc.adds16(s.mul16hrs(c));
acc = acc.hadds16(acc);
acc = acc.hadds16(acc);
acc = acc.hadds16(acc);
return acc.I16[0];
}
s32 ReverbDownsample(V_Core& core, bool right)
{
#if _M_SSE >= 0x501
return ReverbDownsample_avx(core, right);
#else
return ReverbDownsample_sse(core, right);
#endif
}
StereoOut32 __forceinline ReverbUpsample_reference(V_Core& core)
{
int index = (core.RevbSampleBufPos - NUM_TAPS) & 63;
s32 l = 0, r = 0;
for (u32 i = 0; i < NUM_TAPS; i++)
{
l += core.RevbUpBuf[0][index + i] * filter_up_coefs[i];
r += core.RevbUpBuf[1][index + i] * filter_up_coefs[i];
}
l >>= 15;
r >>= 15;
return {clamp_mix(l), clamp_mix(r)};
}
#if _M_SSE >= 0x501
StereoOut32 __forceinline ReverbUpsample_avx(V_Core& core)
{
int index = (core.RevbSampleBufPos - NUM_TAPS) & 63;
auto c = GSVector8i::load<true>(&filter_up_coefs[0]);
auto l = GSVector8i::load<false>(&core.RevbUpBuf[0][index]);
auto r = GSVector8i::load<false>(&core.RevbUpBuf[1][index]);
auto lacc = l.mul16hrs(c);
auto racc = r.mul16hrs(c);
c = GSVector8i::load<true>(&filter_up_coefs[16]);
l = GSVector8i::load<false>(&core.RevbUpBuf[0][index + 16]);
r = GSVector8i::load<false>(&core.RevbUpBuf[1][index + 16]);
lacc = lacc.adds16(l.mul16hrs(c));
racc = racc.adds16(r.mul16hrs(c));
c = GSVector8i::load<true>(&filter_up_coefs[32]);
l = GSVector8i::load<false>(&core.RevbUpBuf[0][index + 32]);
r = GSVector8i::load<false>(&core.RevbUpBuf[1][index + 32]);
lacc = lacc.adds16(l.mul16hrs(c));
racc = racc.adds16(r.mul16hrs(c));
lacc = lacc.adds16(lacc.ba());
racc = racc.adds16(racc.ba());
lacc = lacc.hadds16(lacc);
lacc = lacc.hadds16(lacc);
lacc = lacc.hadds16(lacc);
racc = racc.hadds16(racc);
racc = racc.hadds16(racc);
racc = racc.hadds16(racc);
return {lacc.I16[0], racc.I16[0]};
}
#endif
StereoOut32 __forceinline ReverbUpsample_sse(V_Core& core)
{
int index = (core.RevbSampleBufPos - NUM_TAPS) & 63;
auto c = GSVector4i::load<true>(&filter_up_coefs[0]);
auto l = GSVector4i::load<false>(&core.RevbUpBuf[0][index]);
auto r = GSVector4i::load<false>(&core.RevbUpBuf[1][index]);
auto lacc = l.mul16hrs(c);
auto racc = r.mul16hrs(c);
c = GSVector4i::load<true>(&filter_up_coefs[8]);
l = GSVector4i::load<false>(&core.RevbUpBuf[0][index + 8]);
r = GSVector4i::load<false>(&core.RevbUpBuf[1][index + 8]);
lacc = lacc.adds16(l.mul16hrs(c));
racc = racc.adds16(r.mul16hrs(c));
c = GSVector4i::load<true>(&filter_up_coefs[16]);
l = GSVector4i::load<false>(&core.RevbUpBuf[0][index + 16]);
r = GSVector4i::load<false>(&core.RevbUpBuf[1][index + 16]);
lacc = lacc.adds16(l.mul16hrs(c));
racc = racc.adds16(r.mul16hrs(c));
c = GSVector4i::load<true>(&filter_up_coefs[24]);
l = GSVector4i::load<false>(&core.RevbUpBuf[0][index + 24]);
r = GSVector4i::load<false>(&core.RevbUpBuf[1][index + 24]);
lacc = lacc.adds16(l.mul16hrs(c));
racc = racc.adds16(r.mul16hrs(c));
c = GSVector4i::load<true>(&filter_up_coefs[32]);
l = GSVector4i::load<false>(&core.RevbUpBuf[0][index + 32]);
r = GSVector4i::load<false>(&core.RevbUpBuf[1][index + 32]);
lacc = lacc.adds16(l.mul16hrs(c));
racc = racc.adds16(r.mul16hrs(c));
lacc = lacc.hadds16(lacc);
lacc = lacc.hadds16(lacc);
lacc = lacc.hadds16(lacc);
racc = racc.hadds16(racc);
racc = racc.hadds16(racc);
racc = racc.hadds16(racc);
return {lacc.I16[0], racc.I16[0]};
}
StereoOut32 ReverbUpsample(V_Core& core)
{
#if _M_SSE >= 0x501
return ReverbUpsample_avx(core);
#else
return ReverbUpsample_sse(core);
#endif
}
MULTI_ISA_UNSHARED_END