mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-02-21 14:41:39 +00:00
softgpu: Reduce some non-SIMD lighting math.
Small perf improvement for vertex/lighting heavy (i.e. 3D) scenes.
This commit is contained in:
parent
89250bae5f
commit
d5b4c98f96
10
GPU/Math3D.h
10
GPU/Math3D.h
@ -1367,6 +1367,16 @@ inline Vec3<float> Vec3<float>::operator + (const Vec3 &other) const {
|
||||
return Vec3<float>(_mm_add_ps(SAFE_M128(vec), SAFE_M128(other.vec)));
|
||||
}
|
||||
|
||||
template<>
|
||||
inline void Vec3<float>::operator -= (const Vec3<float> &other) {
|
||||
vec = _mm_sub_ps(SAFE_M128(vec), SAFE_M128(other.vec));
|
||||
}
|
||||
|
||||
template<>
|
||||
inline Vec3<float> Vec3<float>::operator - (const Vec3 &other) const {
|
||||
return Vec3<float>(_mm_sub_ps(SAFE_M128(vec), SAFE_M128(other.vec)));
|
||||
}
|
||||
|
||||
template<>
|
||||
inline Vec3<float> Vec3<float>::operator * (const Vec3 &other) const {
|
||||
return Vec3<float>(_mm_mul_ps(SAFE_M128(vec), SAFE_M128(other.vec)));
|
||||
|
@ -215,7 +215,7 @@ static inline __m128i LightColorScaleBy512SSE4(__m128i factor, __m128i color, __
|
||||
__m128i result18 = _mm_madd_epi16(factor, color);
|
||||
// But now with 18 bits, we need a full multiply.
|
||||
__m128i multiplied = _mm_mullo_epi32(result18, scale);
|
||||
return _mm_srai_epi32(multiplied, 19);
|
||||
return _mm_srai_epi32(multiplied, 10 + 9);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -240,9 +240,9 @@ static Vec4<int> LightColorScaleBy512(const Vec4<int> &factor, const Vec4<int> &
|
||||
return LightColorScaleBy512SSE4(factor.ivec, color.ivec, _mm_set1_epi32(scale));
|
||||
#elif PPSSPP_ARCH(ARM64_NEON)
|
||||
int32x4_t multiplied = vmulq_n_s32(vmulq_s32(factor.ivec, color.ivec), scale);
|
||||
return vshrq_n_s32(multiplied, 19);
|
||||
return vshrq_n_s32(multiplied, 10 + 19);
|
||||
#endif
|
||||
return (factor * color * scale) / (1024 * 512);
|
||||
return (factor * color * scale) >> (10 + 9);
|
||||
}
|
||||
|
||||
static inline void LightColorSum(Vec4<int> &sum, const Vec4<int> &src) {
|
||||
@ -296,25 +296,26 @@ static void ProcessSIMD(VertexData &vertex, const WorldCoords &worldpos, const W
|
||||
// L = vector from vertex to light source
|
||||
// TODO: Should transfer the light positions to world/view space for these calculations?
|
||||
Vec3<float> L = lstate.pos;
|
||||
float att = 1.0f;
|
||||
float attspot = 1.0f;
|
||||
if (!lstate.directional) {
|
||||
L -= worldpos;
|
||||
// TODO: Should this normalize (0, 0, 0) to (0, 0, 1)?
|
||||
float d = L.NormalizeOr001();
|
||||
|
||||
att = 1.0f / Dot33(lstate.att, Vec3f(1.0f, d, d * d));
|
||||
float att = 1.0f / Dot33(lstate.att, Vec3f(1.0f, d, d * d));
|
||||
if (!(att > 0.0f))
|
||||
att = 0.0f;
|
||||
else if (att > 1.0f)
|
||||
att = 1.0f;
|
||||
attspot = att;
|
||||
}
|
||||
|
||||
float spot = 1.0f;
|
||||
if (lstate.spot) {
|
||||
float rawSpot = Dot33(lstate.spotDir, L);
|
||||
if (std::isnan(rawSpot))
|
||||
rawSpot = std::signbit(rawSpot) ? 0.0f : 1.0f;
|
||||
|
||||
float spot = 1.0f;
|
||||
if (rawSpot >= lstate.spotCutoff) {
|
||||
spot = pspLightPow(rawSpot, lstate.spotExp);
|
||||
if (std::isnan(spot))
|
||||
@ -322,14 +323,16 @@ static void ProcessSIMD(VertexData &vertex, const WorldCoords &worldpos, const W
|
||||
} else {
|
||||
spot = 0.0f;
|
||||
}
|
||||
|
||||
attspot *= spot;
|
||||
}
|
||||
|
||||
// ambient lighting
|
||||
if (lstate.ambient) {
|
||||
int attspot = (int)LightCeil<useSSE4>(256 * 2 * att * spot + 1);
|
||||
if (attspot > 512)
|
||||
attspot = 512;
|
||||
Vec4<int> lambient = LightColorScaleBy512<useSSE4>(lstate.ambientColorFactor, mac, attspot);
|
||||
int attspot512 = (int)LightCeil<useSSE4>(256 * 2 * attspot + 1);
|
||||
if (attspot512 > 512)
|
||||
attspot512 = 512;
|
||||
Vec4<int> lambient = LightColorScaleBy512<useSSE4>(lstate.ambientColorFactor, mac, attspot512);
|
||||
LightColorSum(final_color, lambient);
|
||||
}
|
||||
|
||||
@ -343,7 +346,7 @@ static void ProcessSIMD(VertexData &vertex, const WorldCoords &worldpos, const W
|
||||
}
|
||||
|
||||
if (lstate.diffuse && diffuse_factor > 0.0f) {
|
||||
int diffuse_attspot = (int)LightCeil<useSSE4>(256 * 2 * att * spot * diffuse_factor + 1);
|
||||
int diffuse_attspot = (int)LightCeil<useSSE4>(256 * 2 * attspot * diffuse_factor + 1);
|
||||
if (diffuse_attspot > 512)
|
||||
diffuse_attspot = 512;
|
||||
Vec4<int> mdc = state.colorForDiffuse ? colorFactor : state.material.diffuseColorFactor;
|
||||
@ -358,7 +361,7 @@ static void ProcessSIMD(VertexData &vertex, const WorldCoords &worldpos, const W
|
||||
specular_factor = pspLightPow(specular_factor, state.specularExp);
|
||||
|
||||
if (specular_factor > 0.0f) {
|
||||
int specular_attspot = (int)LightCeil<useSSE4>(256 * 2 * att * spot * specular_factor + 1);
|
||||
int specular_attspot = (int)LightCeil<useSSE4>(256 * 2 * attspot * specular_factor + 1);
|
||||
if (specular_attspot > 512)
|
||||
specular_attspot = 512;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user