Always clamp in ToRGB[A]?().

Before we only clamped with SSE, better to be consistent.  This may also
be slightly faster.
This commit is contained in:
Unknown W. Brackets 2014-10-31 09:07:54 -07:00
parent 0df4afce67
commit eee3ac79f4
4 changed files with 15 additions and 21 deletions

View File

@ -272,9 +272,9 @@ void SoftwareTransform(
c1[j] = litColor1[j];
}
} else {
// Summed color into c0
// Summed color into c0 (will clamp in ToRGBA().)
for (int j = 0; j < 4; j++) {
c0[j] = ((c0[j] + litColor1[j]) > 1.0f) ? 1.0f : (c0[j] + litColor1[j]);
c0[j] += litColor1[j];
}
}
} else {

View File

@ -174,9 +174,9 @@ void Lighter::Light(float colorOut0[4], float colorOut1[4], const float colorIn[
}
}
// 4?
// The colors must eventually be clamped, but we expect the caller to do that.
for (int i = 0; i < 4; i++) {
colorOut0[i] = lightSum0[i] > 1.0f ? 1.0f : lightSum0[i];
colorOut1[i] = lightSum1[i] > 1.0f ? 1.0f : lightSum1[i];
colorOut0[i] = lightSum0[i];
colorOut1[i] = lightSum1[i];
}
}

View File

@ -967,9 +967,9 @@ __forceinline unsigned int Vec3<float>::ToRGB() const
__m128i c16 = _mm_packs_epi32(c, c);
return _mm_cvtsi128_si32(_mm_packus_epi16(c16, c16)) & 0x00FFFFFF;
#else
return ((unsigned int)(r()*255.f) << 0) |
((unsigned int)(g()*255.f) << 8) |
((unsigned int)(b()*255.f) << 16);
return (clamp_u8((int)(r() * 255.f)) << 0) |
(clamp_u8(int)((g() * 255.f)) << 8) |
(clamp_u8((int)(b() * 255.f)) << 16);
#endif
}
@ -980,7 +980,7 @@ __forceinline unsigned int Vec3<int>::ToRGB() const
__m128i c16 = _mm_packs_epi32(ivec, ivec);
return _mm_cvtsi128_si32(_mm_packus_epi16(c16, c16)) & 0x00FFFFFF;
#else
return (r()&0xFF) | ((g()&0xFF)<<8) | ((b()&0xFF)<<16);
return clamp_u8(r()) | (clamp_u8(g()) << 8) | (clamp_u8(b()) << 16);
#endif
}
@ -1021,10 +1021,10 @@ __forceinline unsigned int Vec4<float>::ToRGBA() const
__m128i c16 = _mm_packs_epi32(c, c);
return _mm_cvtsi128_si32(_mm_packus_epi16(c16, c16));
#else
return ((unsigned int)(r()*255.f) << 0) |
((unsigned int)(g()*255.f) << 8) |
((unsigned int)(b()*255.f) << 16) |
((unsigned int)(a()*255.f) << 24);
return (clamp_u8((int)(r() * 255.f)) << 0) |
(clamp_u8((int)(g() * 255.f)) << 8) |
(clamp_u8((int)(b() * 255.f)) << 16) |
(clamp_u8((int)(a() * 255.f)) << 24);
#endif
}
@ -1035,7 +1035,7 @@ __forceinline unsigned int Vec4<int>::ToRGBA() const
__m128i c16 = _mm_packs_epi32(ivec, ivec);
return _mm_cvtsi128_si32(_mm_packus_epi16(c16, c16));
#else
return (r()&0xFF) | ((g()&0xFF)<<8) | ((b()&0xFF)<<16) | ((a()&0xFF)<<24);
return clamp_u8(r()) | (clamp_u8(g()) << 8) | (clamp_u8(b()) << 16) | (clamp_u8(a()) << 24);
#endif
}

View File

@ -993,20 +993,14 @@ inline void DrawSinglePixel(const DrawingCoords &p, u16 z, const Vec4<int> &colo
if (gstate.isAlphaBlendEnabled() && !clearMode) {
const Vec4<int> dst = Vec4<int>::FromRGBA(old_color);
#if defined(_M_SSE)
// ToRGBA() on SSE automatically clamps.
// ToRGBA() always automatically clamps.
new_color = AlphaBlendingResult(prim_color, dst).ToRGB();
new_color |= stencil << 24;
#else
new_color = Vec4<int>(AlphaBlendingResult(prim_color, dst).Clamp(0, 255), stencil).ToRGBA();
#endif
} else {
#if defined(_M_SSE)
new_color = Vec3<int>(prim_color.ivec).ToRGB();
new_color |= stencil << 24;
#else
if (!clearMode)
prim_color = prim_color.Clamp(0, 255);
new_color = Vec4<int>(prim_color.r(), prim_color.g(), prim_color.b(), stencil).ToRGBA();
#endif
}