diff --git a/GPU/Common/SoftwareTransformCommon.cpp b/GPU/Common/SoftwareTransformCommon.cpp
index e2b2a6ad4b..37328c4c45 100644
--- a/GPU/Common/SoftwareTransformCommon.cpp
+++ b/GPU/Common/SoftwareTransformCommon.cpp
@@ -272,9 +272,9 @@ void SoftwareTransform(
 						c1[j] = litColor1[j];
 					}
 				} else {
-					// Summed color into c0
+					// Summed color into c0 (will clamp in ToRGBA().)
 					for (int j = 0; j < 4; j++) {
-						c0[j] = ((c0[j] + litColor1[j]) > 1.0f) ? 1.0f : (c0[j] + litColor1[j]);
+						c0[j] += litColor1[j];
 					}
 				}
 			} else {
diff --git a/GPU/Common/TransformCommon.cpp b/GPU/Common/TransformCommon.cpp
index 26d963a37e..a80b2769e5 100644
--- a/GPU/Common/TransformCommon.cpp
+++ b/GPU/Common/TransformCommon.cpp
@@ -174,9 +174,9 @@ void Lighter::Light(float colorOut0[4], float colorOut1[4], const float colorIn[
 		}
 	}
 
-	// 4?
+	// The colors must eventually be clamped, but we expect the caller to do that.
 	for (int i = 0; i < 4; i++) {
-		colorOut0[i] = lightSum0[i] > 1.0f ? 1.0f : lightSum0[i];
-		colorOut1[i] = lightSum1[i] > 1.0f ? 1.0f : lightSum1[i];
+		colorOut0[i] = lightSum0[i];
+		colorOut1[i] = lightSum1[i];
 	}
 }
diff --git a/GPU/Math3D.h b/GPU/Math3D.h
index 61bb8982e0..6159266fa6 100644
--- a/GPU/Math3D.h
+++ b/GPU/Math3D.h
@@ -967,9 +967,9 @@ __forceinline unsigned int Vec3<float>::ToRGB() const
 	__m128i c16 = _mm_packs_epi32(c, c);
 	return _mm_cvtsi128_si32(_mm_packus_epi16(c16, c16)) & 0x00FFFFFF;
 #else
-	return ((unsigned int)(r()*255.f) << 0) |
-			((unsigned int)(g()*255.f) << 8) |
-			((unsigned int)(b()*255.f) << 16);
+	return (clamp_u8((int)(r() * 255.f)) << 0) |
+			(clamp_u8(int)((g() * 255.f)) << 8) |
+			(clamp_u8((int)(b() * 255.f)) << 16);
 #endif
 }
 
@@ -980,7 +980,7 @@ __forceinline unsigned int Vec3<int>::ToRGB() const
 	__m128i c16 = _mm_packs_epi32(ivec, ivec);
 	return _mm_cvtsi128_si32(_mm_packus_epi16(c16, c16)) & 0x00FFFFFF;
 #else
-	return (r()&0xFF) | ((g()&0xFF)<<8) | ((b()&0xFF)<<16);
+	return clamp_u8(r()) | (clamp_u8(g()) << 8) | (clamp_u8(b()) << 16);
 #endif
 }
 
@@ -1021,10 +1021,10 @@ __forceinline unsigned int Vec4<float>::ToRGBA() const
 	__m128i c16 = _mm_packs_epi32(c, c);
 	return _mm_cvtsi128_si32(_mm_packus_epi16(c16, c16));
 #else
-	return ((unsigned int)(r()*255.f) << 0) |
-			((unsigned int)(g()*255.f) << 8) |
-			((unsigned int)(b()*255.f) << 16) |
-			((unsigned int)(a()*255.f) << 24);
+	return (clamp_u8((int)(r() * 255.f)) << 0) |
+			(clamp_u8((int)(g() * 255.f)) << 8) |
+			(clamp_u8((int)(b() * 255.f)) << 16) |
+			(clamp_u8((int)(a() * 255.f)) << 24);
 #endif
 }
 
@@ -1035,7 +1035,7 @@ __forceinline unsigned int Vec4<int>::ToRGBA() const
 	__m128i c16 = _mm_packs_epi32(ivec, ivec);
 	return _mm_cvtsi128_si32(_mm_packus_epi16(c16, c16));
 #else
-	return (r()&0xFF) | ((g()&0xFF)<<8) | ((b()&0xFF)<<16) | ((a()&0xFF)<<24);
+	return clamp_u8(r()) | (clamp_u8(g()) << 8) | (clamp_u8(b()) << 16) | (clamp_u8(a()) << 24);
 #endif
 }
 
diff --git a/GPU/Software/Rasterizer.cpp b/GPU/Software/Rasterizer.cpp
index e1ec539432..149f723541 100644
--- a/GPU/Software/Rasterizer.cpp
+++ b/GPU/Software/Rasterizer.cpp
@@ -993,20 +993,14 @@ inline void DrawSinglePixel(const DrawingCoords &p, u16 z, const Vec4<int> &colo
 
 	if (gstate.isAlphaBlendEnabled() && !clearMode) {
 		const Vec4<int> dst = Vec4<int>::FromRGBA(old_color);
-#if defined(_M_SSE)
-		// ToRGBA() on SSE automatically clamps.
+		// ToRGBA() always automatically clamps.
 		new_color = AlphaBlendingResult(prim_color, dst).ToRGB();
 		new_color |= stencil << 24;
-#else
-		new_color = Vec4<int>(AlphaBlendingResult(prim_color, dst).Clamp(0, 255), stencil).ToRGBA();
-#endif
 	} else {
 #if defined(_M_SSE)
 		new_color = Vec3<int>(prim_color.ivec).ToRGB();
 		new_color |= stencil << 24;
 #else
-		if (!clearMode)
-			prim_color = prim_color.Clamp(0, 255);
 		new_color = Vec4<int>(prim_color.r(), prim_color.g(), prim_color.b(), stencil).ToRGBA();
 #endif
 	}