diff --git a/GPU/Common/VertexDecoderCommon.cpp b/GPU/Common/VertexDecoderCommon.cpp index e10666dc63..7f141d634d 100644 --- a/GPU/Common/VertexDecoderCommon.cpp +++ b/GPU/Common/VertexDecoderCommon.cpp @@ -658,9 +658,9 @@ void VertexDecoder::Step_NormalS8ToFloat() const { float *normal = (float *)(decoded_ + decFmt.nrmoff); const s8 *sv = (const s8*)(ptr_ + nrmoff); - normal[0] = sv[0] * (1.0f / 128.0f); - normal[1] = sv[1] * (1.0f / 128.0f); - normal[2] = sv[2] * (1.0f / 128.0f); + normal[0] = (float)sv[0] * (1.0f / 128.0f); + normal[1] = (float)sv[1] * (1.0f / 128.0f); + normal[2] = (float)sv[2] * (1.0f / 128.0f); } void VertexDecoder::Step_NormalS16() const diff --git a/GPU/Math3D.h b/GPU/Math3D.h index b9af0a7e44..0fa5ed78af 100644 --- a/GPU/Math3D.h +++ b/GPU/Math3D.h @@ -912,6 +912,7 @@ float MATH3D_CALL vectorGetByIndex(__m128 v) { #if defined(_M_SSE) // x, y, and z should be broadcast. Should only be used through Vec3f version. +// Note that this will read an extra float from the matrix, so it better not be at the end of an allocation! inline __m128 MATH3D_CALL Vec3ByMatrix43Internal(__m128 x, __m128 y, __m128 z, const float m[12]) { __m128 col0 = _mm_loadu_ps(m); __m128 col1 = _mm_loadu_ps(m + 3);