Neon-optimize the audio s32->s16 packing function

2024-11-24 05:49:58 +00:00 · 2017-08-15 16:15:28 +02:00 · 2017-08-15 16:15:28 +02:00 · b3b459507a
commit b3b459507a
parent d19d8a8bb8
1 changed files with 21 additions and 1 deletions
--- a/Core/HW/StereoResampler.cpp
+++ b/Core/HW/StereoResampler.cpp
@ -45,6 +45,9 @@
 #ifdef _M_SSE
 #include <emmintrin.h>
 #endif
+#if PPSSPP_ARCH(ARM_NEON)
+#include <arm_neon.h>
+#endif

 StereoResampler::StereoResampler()
 		: m_bufsize(MAX_SAMPLES_DEFAULT)
@ -103,8 +106,25 @@ inline void ClampBufferToS16(s16 *out, const s32 *in, size_t size, s8 volShift)
 		in += 8;
 		size -= 8;
 	}
+#elif PPSSPP_ARCH(ARM_NEON)
+	int16x4_t signedVolShift = vdup_n_s16 (-volShift); // Can only dynamic-shift right, but by a signed integer
+	while (size >= 8) {
+		int32x4_t in1 = vld1q_s32(in);
+		int32x4_t in2 = vld1q_s32(in + 4);
+		int16x4_t packed1 = vqmovn_s32(in1);
+		int16x4_t packed2 = vqmovn_s32(in2);
+		if (useShift) {
+			packed1 = vshl_s16(packed1, signedVolShift);
+			packed2 = vshl_s16(packed2, signedVolShift);
+		}
+		vst1_s16(out, packed1);
+		vst1_s16(out + 4, packed2);
+		out += 8;
+		in += 8;
+		size -= 8;
+	}
 #endif
-	// This does the remainder if SSE was used, otherwise it does it all.
+	// This does the remainder if SIMD was used, otherwise it does it all.
 	for (size_t i = 0; i < size; i++) {
 		out[i] = clamp_s16(useShift ? (in[i] >> volShift) : in[i]);
 	}