diff --git a/GPU/Math3D.h b/GPU/Math3D.h index 6309801c4e..96e3ae61eb 100644 --- a/GPU/Math3D.h +++ b/GPU/Math3D.h @@ -580,8 +580,25 @@ public: #endif template - Vec4 Cast() const - { + Vec4 Cast() const { + if constexpr (std::is_same::value && std::is_same::value) { +#if defined(_M_SSE) && !PPSSPP_ARCH(X86) + return _mm_cvtps_epi32(vec); +#elif defined(_M_SSE) + return _mm_cvtps_epi32(_mm_loadu_ps((float *)&vec)); +#elif PPSSPP_ARCH(ARM64_NEON) + return vcvtq_s32_f32(ivec); +#endif + } + if constexpr (std::is_same::value && std::is_same::value) { +#if defined(_M_SSE) && !PPSSPP_ARCH(X86) + return _mm_cvtepi32_ps(ivec); +#elif defined(_M_SSE) + return _mm_cvtepi32_ps(_mm_loadu_si128(&ivec)); +#elif PPSSPP_ARCH(ARM64_NEON) + return vcvtq_f32_s32(ivec); +#endif + } return Vec4((T2)x, (T2)y, (T2)z, (T2)w); }