TexCache: Correct alpha mask checks for SSE2.

Should have been shifts by byte (4/8), but let's just switch to shuffles
anyway.  These were always shifting in zeros and failing.
This commit is contained in:
Unknown W. Brackets 2022-12-03 12:38:01 -08:00
parent 38eb0a7a82
commit 0aba5ff3c1

View File

@ -641,16 +641,12 @@ void DecodeDXT5Block(u32 *dst, const DXT5Block *src, int pitch, int height) {
#ifdef _M_SSE
inline u32 SSEReduce32And(__m128i value) {
// TODO: Should use a shuffle instead of slri, probably.
value = _mm_and_si128(value, _mm_srli_si128(value, 64));
value = _mm_and_si128(value, _mm_srli_si128(value, 32));
value = _mm_and_si128(value, _mm_shuffle_epi32(value, _MM_SHUFFLE(1, 0, 3, 2)));
value = _mm_and_si128(value, _mm_shuffle_epi32(value, _MM_SHUFFLE(1, 1, 1, 1)));
return _mm_cvtsi128_si32(value);
}
inline u32 SSEReduce16And(__m128i value) {
// TODO: Should use a shuffle instead of slri, probably.
value = _mm_and_si128(value, _mm_srli_si128(value, 64));
value = _mm_and_si128(value, _mm_srli_si128(value, 32));
u32 mask = _mm_cvtsi128_si32(value);
u32 mask = SSEReduce32And(value);
return mask & (mask >> 16);
}
#endif