mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-23 13:30:02 +00:00
softgpu: Use SIMD for more Vec4 casts.
A number of these were falling back to some pretty terrible code. Thanks to fp64 for noticing.
This commit is contained in:
parent
b2b61d58d4
commit
795de9b164
21
GPU/Math3D.h
21
GPU/Math3D.h
@ -580,8 +580,25 @@ public:
|
||||
#endif
|
||||
|
||||
template<typename T2>
|
||||
Vec4<T2> Cast() const
|
||||
{
|
||||
Vec4<T2> Cast() const {
|
||||
if constexpr (std::is_same<T, float>::value && std::is_same<T2, int>::value) {
|
||||
#if defined(_M_SSE) && !PPSSPP_ARCH(X86)
|
||||
return _mm_cvtps_epi32(vec);
|
||||
#elif defined(_M_SSE)
|
||||
return _mm_cvtps_epi32(_mm_loadu_ps((float *)&vec));
|
||||
#elif PPSSPP_ARCH(ARM64_NEON)
|
||||
return vcvtq_s32_f32(ivec);
|
||||
#endif
|
||||
}
|
||||
if constexpr (std::is_same<T, int>::value && std::is_same<T2, float>::value) {
|
||||
#if defined(_M_SSE) && !PPSSPP_ARCH(X86)
|
||||
return _mm_cvtepi32_ps(ivec);
|
||||
#elif defined(_M_SSE)
|
||||
return _mm_cvtepi32_ps(_mm_loadu_si128(&ivec));
|
||||
#elif PPSSPP_ARCH(ARM64_NEON)
|
||||
return vcvtq_f32_s32(ivec);
|
||||
#endif
|
||||
}
|
||||
return Vec4<T2>((T2)x, (T2)y, (T2)z, (T2)w);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user