mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-02-17 04:39:34 +00:00
Remove SSE4 path from Vec4<int>::operator*
This commit is contained in:
parent
f133739cd0
commit
cd9f01c4df
22
GPU/Math3D.h
22
GPU/Math3D.h
@ -1387,20 +1387,18 @@ template<>
|
||||
inline Vec4<int> Vec4<int>::operator * (const Vec4 &other) const {
|
||||
__m128i a = SAFE_M128I(ivec);
|
||||
__m128i b = SAFE_M128I(other.ivec);
|
||||
#if _M_SSE >= 0x401
|
||||
return Vec4<int>(_mm_mullo_epi32(a, b));
|
||||
#else
|
||||
// This is what clang does. Seems about as good
|
||||
// as it gets.
|
||||
// Intel in its immense wisdom decided that
|
||||
// SSE2 does not get _mm_mullo_epi32(),
|
||||
// so we do it this way. This is what clang does,
|
||||
// which seems about as good as it gets.
|
||||
__m128i m02 = _mm_mul_epu32(a, b);
|
||||
__m128i m13 = _mm_mul_epu32( // 0xF5 -> [1, 1, 3, 3]
|
||||
_mm_shuffle_epi32(a, 0xF5),
|
||||
_mm_shuffle_epi32(b, 0xF5));
|
||||
__m128i ret = _mm_unpacklo_epi32( // 0xE8 -> [0, 2, 2, 3]
|
||||
_mm_shuffle_epi32(m02, 0xE8),
|
||||
_mm_shuffle_epi32(m13, 0xE8));
|
||||
__m128i m13 = _mm_mul_epu32(
|
||||
_mm_shuffle_epi32(a, _MM_SHUFFLE(3, 3, 1, 1)),
|
||||
_mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1)));
|
||||
__m128i ret = _mm_unpacklo_epi32(
|
||||
_mm_shuffle_epi32(m02, _MM_SHUFFLE(3, 2, 2, 0)),
|
||||
_mm_shuffle_epi32(m13, _MM_SHUFFLE(3, 2, 2, 0)));
|
||||
return Vec4<int>(ret);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> template<>
|
||||
|
@ -748,7 +748,7 @@ static Vec4IntResult SOFTRAST_CALL SampleLinearLevel(float s, float t, const u8
|
||||
Vec4<int> texcolor_br = Vec4<int>::FromRGBA(c.v[3]);
|
||||
Vec4<int> top = texcolor_tl * (0x10 - frac_u) + texcolor_tr * frac_u;
|
||||
Vec4<int> bot = texcolor_bl * (0x10 - frac_u) + texcolor_br * frac_u;
|
||||
return ToVec4IntResult((top * (0x10 - frac_v) + bot * frac_v) >> 8);
|
||||
return ToVec4IntResult((top * (0x10 - frac_v) + bot * frac_v) >> (4 + 4));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user