mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-23 05:19:56 +00:00
softgpu: Use SSE in ToRGB()/FromRGB() etc.
This commit is contained in:
parent
07ca96e226
commit
627027307c
@ -69,29 +69,54 @@ float Vec2<float>::Normalize()
|
||||
template<>
|
||||
Vec3<float> Vec3<float>::FromRGB(unsigned int rgb)
|
||||
{
|
||||
#if defined(_M_SSE)
|
||||
__m128i z = _mm_setzero_si128();
|
||||
__m128i c = _mm_cvtsi32_si128(rgb);
|
||||
c = _mm_unpacklo_epi16(_mm_unpacklo_epi8(c, z), z);
|
||||
return Vec3<float>(_mm_cvtepi32_ps(c));
|
||||
#else
|
||||
return Vec3((rgb & 0xFF) * (1.0f/255.0f),
|
||||
((rgb >> 8) & 0xFF) * (1.0f/255.0f),
|
||||
((rgb >> 16) & 0xFF) * (1.0f/255.0f));
|
||||
#endif
|
||||
}
|
||||
|
||||
template<>
|
||||
Vec3<int> Vec3<int>::FromRGB(unsigned int rgb)
|
||||
{
|
||||
#if defined(_M_SSE)
|
||||
__m128i z = _mm_setzero_si128();
|
||||
__m128i c = _mm_cvtsi32_si128(rgb);
|
||||
c = _mm_unpacklo_epi16(_mm_unpacklo_epi8(c, z), z);
|
||||
return Vec3<int>(c);
|
||||
#else
|
||||
return Vec3(rgb & 0xFF, (rgb >> 8) & 0xFF, (rgb >> 16) & 0xFF);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<>
|
||||
unsigned int Vec3<float>::ToRGB() const
|
||||
{
|
||||
#if defined(_M_SSE)
|
||||
__m128i c = _mm_cvtps_epi32(vec);
|
||||
__m128i c16 = _mm_packs_epi32(c, c);
|
||||
return _mm_cvtsi128_si32(_mm_packus_epi16(c16, c16)) & 0x00FFFFFF;
|
||||
#else
|
||||
return ((unsigned int)(r()*255.f)) +
|
||||
((unsigned int)(g()*255.f*256.f)) +
|
||||
((unsigned int)(b()*255.f*256.f*256.f));
|
||||
#endif
|
||||
}
|
||||
|
||||
template<>
|
||||
unsigned int Vec3<int>::ToRGB() const
|
||||
{
|
||||
#if defined(_M_SSE)
|
||||
__m128i c16 = _mm_packs_epi32(ivec, ivec);
|
||||
return _mm_cvtsi128_si32(_mm_packus_epi16(c16, c16)) & 0x00FFFFFF;
|
||||
#else
|
||||
return (r()&0xFF) | ((g()&0xFF)<<8) | ((b()&0xFF)<<16);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<>
|
||||
@ -145,31 +170,56 @@ float Vec3<float>::Normalize()
|
||||
template<>
|
||||
Vec4<float> Vec4<float>::FromRGBA(unsigned int rgba)
|
||||
{
|
||||
#if defined(_M_SSE)
|
||||
__m128i z = _mm_setzero_si128();
|
||||
__m128i c = _mm_cvtsi32_si128(rgba);
|
||||
c = _mm_unpacklo_epi16(_mm_unpacklo_epi8(c, z), z);
|
||||
return Vec4<float>(_mm_cvtepi32_ps(c));
|
||||
#else
|
||||
return Vec4((rgba & 0xFF) * (1.0f/255.0f),
|
||||
((rgba >> 8) & 0xFF) * (1.0f/255.0f),
|
||||
((rgba >> 16) & 0xFF) * (1.0f/255.0f),
|
||||
((rgba >> 24) & 0xFF) * (1.0f/255.0f));
|
||||
#endif
|
||||
}
|
||||
|
||||
template<>
|
||||
Vec4<int> Vec4<int>::FromRGBA(unsigned int rgba)
|
||||
{
|
||||
#if defined(_M_SSE)
|
||||
__m128i z = _mm_setzero_si128();
|
||||
__m128i c = _mm_cvtsi32_si128(rgba);
|
||||
c = _mm_unpacklo_epi16(_mm_unpacklo_epi8(c, z), z);
|
||||
return Vec4<int>(c);
|
||||
#else
|
||||
return Vec4(rgba & 0xFF, (rgba >> 8) & 0xFF, (rgba >> 16) & 0xFF, (rgba >> 24) & 0xFF);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<>
|
||||
unsigned int Vec4<float>::ToRGBA() const
|
||||
{
|
||||
#if defined(_M_SSE)
|
||||
__m128i c = _mm_cvtps_epi32(vec);
|
||||
__m128i c16 = _mm_packs_epi32(c, c);
|
||||
return _mm_cvtsi128_si32(_mm_packus_epi16(c16, c16));
|
||||
#else
|
||||
return ((unsigned int)(r()*255.f)) +
|
||||
((unsigned int)(g()*255.f*256.f)) +
|
||||
((unsigned int)(b()*255.f*256.f*256.f)) +
|
||||
((unsigned int)(a()*255.f*256.f*256.f*256.f));
|
||||
#endif
|
||||
}
|
||||
|
||||
template<>
|
||||
unsigned int Vec4<int>::ToRGBA() const
|
||||
{
|
||||
#if defined(_M_SSE)
|
||||
__m128i c16 = _mm_packs_epi32(ivec, ivec);
|
||||
return _mm_cvtsi128_si32(_mm_packus_epi16(c16, c16));
|
||||
#else
|
||||
return (r()&0xFF) | ((g()&0xFF)<<8) | ((b()&0xFF)<<16) | ((a()&0xFF)<<24);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<>
|
||||
|
@ -993,17 +993,21 @@ inline void DrawSinglePixel(const DrawingCoords &p, u16 z, Vec4<int> prim_color)
|
||||
if (gstate.isAlphaBlendEnabled() && !clearMode) {
|
||||
const Vec4<int> dst = Vec4<int>::FromRGBA(old_color);
|
||||
#if defined(_M_SSE)
|
||||
const Vec3<int> blended = AlphaBlendingResult(prim_color, dst);
|
||||
const __m128i blended16 = _mm_packs_epi32(blended.ivec, blended.ivec);
|
||||
new_color = _mm_cvtsi128_si32(_mm_packus_epi16(blended16, blended16));
|
||||
new_color = (stencil << 24) | (new_color & 0x00FFFFFF);
|
||||
// ToRGBA() on SSE automatically clamps.
|
||||
new_color = AlphaBlendingResult(prim_color, dst).ToRGB();
|
||||
new_color |= stencil << 24;
|
||||
#else
|
||||
new_color = Vec4<int>(AlphaBlendingResult(prim_color, dst).Clamp(0, 255), stencil).ToRGBA();
|
||||
#endif
|
||||
} else {
|
||||
#if defined(_M_SSE)
|
||||
new_color = Vec3<int>(prim_color.ivec).ToRGB();
|
||||
new_color |= stencil << 24;
|
||||
#else
|
||||
if (!clearMode)
|
||||
prim_color = prim_color.Clamp(0, 255);
|
||||
new_color = Vec4<int>(prim_color.r(), prim_color.g(), prim_color.b(), stencil).ToRGBA();
|
||||
#endif
|
||||
}
|
||||
|
||||
// TODO: Is alpha blending still performed if logic ops are enabled?
|
||||
|
Loading…
Reference in New Issue
Block a user