softgpu: Use SSE in ToRGB()/FromRGB() etc.

This commit is contained in:
Unknown W. Brackets 2014-03-16 19:21:35 -07:00
parent 07ca96e226
commit 627027307c
2 changed files with 58 additions and 4 deletions

View File

@ -69,29 +69,54 @@ float Vec2<float>::Normalize()
template<>
Vec3<float> Vec3<float>::FromRGB(unsigned int rgb)
{
#if defined(_M_SSE)
__m128i z = _mm_setzero_si128();
__m128i c = _mm_cvtsi32_si128(rgb);
c = _mm_unpacklo_epi16(_mm_unpacklo_epi8(c, z), z);
return Vec3<float>(_mm_cvtepi32_ps(c));
#else
return Vec3((rgb & 0xFF) * (1.0f/255.0f),
((rgb >> 8) & 0xFF) * (1.0f/255.0f),
((rgb >> 16) & 0xFF) * (1.0f/255.0f));
#endif
}
template<>
Vec3<int> Vec3<int>::FromRGB(unsigned int rgb)
{
#if defined(_M_SSE)
__m128i z = _mm_setzero_si128();
__m128i c = _mm_cvtsi32_si128(rgb);
c = _mm_unpacklo_epi16(_mm_unpacklo_epi8(c, z), z);
return Vec3<int>(c);
#else
return Vec3(rgb & 0xFF, (rgb >> 8) & 0xFF, (rgb >> 16) & 0xFF);
#endif
}
template<>
unsigned int Vec3<float>::ToRGB() const
{
#if defined(_M_SSE)
__m128i c = _mm_cvtps_epi32(vec);
__m128i c16 = _mm_packs_epi32(c, c);
return _mm_cvtsi128_si32(_mm_packus_epi16(c16, c16)) & 0x00FFFFFF;
#else
return ((unsigned int)(r()*255.f)) +
((unsigned int)(g()*255.f*256.f)) +
((unsigned int)(b()*255.f*256.f*256.f));
#endif
}
template<>
unsigned int Vec3<int>::ToRGB() const
{
#if defined(_M_SSE)
__m128i c16 = _mm_packs_epi32(ivec, ivec);
return _mm_cvtsi128_si32(_mm_packus_epi16(c16, c16)) & 0x00FFFFFF;
#else
return (r()&0xFF) | ((g()&0xFF)<<8) | ((b()&0xFF)<<16);
#endif
}
template<>
@ -145,31 +170,56 @@ float Vec3<float>::Normalize()
template<>
Vec4<float> Vec4<float>::FromRGBA(unsigned int rgba)
{
#if defined(_M_SSE)
__m128i z = _mm_setzero_si128();
__m128i c = _mm_cvtsi32_si128(rgba);
c = _mm_unpacklo_epi16(_mm_unpacklo_epi8(c, z), z);
return Vec4<float>(_mm_cvtepi32_ps(c));
#else
return Vec4((rgba & 0xFF) * (1.0f/255.0f),
((rgba >> 8) & 0xFF) * (1.0f/255.0f),
((rgba >> 16) & 0xFF) * (1.0f/255.0f),
((rgba >> 24) & 0xFF) * (1.0f/255.0f));
#endif
}
template<>
Vec4<int> Vec4<int>::FromRGBA(unsigned int rgba)
{
#if defined(_M_SSE)
__m128i z = _mm_setzero_si128();
__m128i c = _mm_cvtsi32_si128(rgba);
c = _mm_unpacklo_epi16(_mm_unpacklo_epi8(c, z), z);
return Vec4<int>(c);
#else
return Vec4(rgba & 0xFF, (rgba >> 8) & 0xFF, (rgba >> 16) & 0xFF, (rgba >> 24) & 0xFF);
#endif
}
template<>
unsigned int Vec4<float>::ToRGBA() const
{
#if defined(_M_SSE)
__m128i c = _mm_cvtps_epi32(vec);
__m128i c16 = _mm_packs_epi32(c, c);
return _mm_cvtsi128_si32(_mm_packus_epi16(c16, c16));
#else
return ((unsigned int)(r()*255.f)) +
((unsigned int)(g()*255.f*256.f)) +
((unsigned int)(b()*255.f*256.f*256.f)) +
((unsigned int)(a()*255.f*256.f*256.f*256.f));
#endif
}
template<>
unsigned int Vec4<int>::ToRGBA() const
{
#if defined(_M_SSE)
__m128i c16 = _mm_packs_epi32(ivec, ivec);
return _mm_cvtsi128_si32(_mm_packus_epi16(c16, c16));
#else
return (r()&0xFF) | ((g()&0xFF)<<8) | ((b()&0xFF)<<16) | ((a()&0xFF)<<24);
#endif
}
template<>

View File

@ -993,17 +993,21 @@ inline void DrawSinglePixel(const DrawingCoords &p, u16 z, Vec4<int> prim_color)
if (gstate.isAlphaBlendEnabled() && !clearMode) {
const Vec4<int> dst = Vec4<int>::FromRGBA(old_color);
#if defined(_M_SSE)
const Vec3<int> blended = AlphaBlendingResult(prim_color, dst);
const __m128i blended16 = _mm_packs_epi32(blended.ivec, blended.ivec);
new_color = _mm_cvtsi128_si32(_mm_packus_epi16(blended16, blended16));
new_color = (stencil << 24) | (new_color & 0x00FFFFFF);
// ToRGBA() on SSE automatically clamps.
new_color = AlphaBlendingResult(prim_color, dst).ToRGB();
new_color |= stencil << 24;
#else
new_color = Vec4<int>(AlphaBlendingResult(prim_color, dst).Clamp(0, 255), stencil).ToRGBA();
#endif
} else {
#if defined(_M_SSE)
new_color = Vec3<int>(prim_color.ivec).ToRGB();
new_color |= stencil << 24;
#else
if (!clearMode)
prim_color = prim_color.Clamp(0, 255);
new_color = Vec4<int>(prim_color.r(), prim_color.g(), prim_color.b(), stencil).ToRGBA();
#endif
}
// TODO: Is alpha blending still performed if logic ops are enabled?