mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-03-04 12:17:16 +00:00
Merge pull request #17729 from unknownbrackets/softgpu-lighting
softgpu: Reduce some non-SIMD lighting math
This commit is contained in:
commit
89d846ecbe
10
GPU/Math3D.h
10
GPU/Math3D.h
@ -1367,6 +1367,16 @@ inline Vec3<float> Vec3<float>::operator + (const Vec3 &other) const {
|
||||
return Vec3<float>(_mm_add_ps(SAFE_M128(vec), SAFE_M128(other.vec)));
|
||||
}
|
||||
|
||||
template<>
|
||||
inline void Vec3<float>::operator -= (const Vec3<float> &other) {
|
||||
vec = _mm_sub_ps(SAFE_M128(vec), SAFE_M128(other.vec));
|
||||
}
|
||||
|
||||
template<>
|
||||
inline Vec3<float> Vec3<float>::operator - (const Vec3 &other) const {
|
||||
return Vec3<float>(_mm_sub_ps(SAFE_M128(vec), SAFE_M128(other.vec)));
|
||||
}
|
||||
|
||||
template<>
|
||||
inline Vec3<float> Vec3<float>::operator * (const Vec3 &other) const {
|
||||
return Vec3<float>(_mm_mul_ps(SAFE_M128(vec), SAFE_M128(other.vec)));
|
||||
|
@ -86,15 +86,13 @@ void ComputeState(State *state, bool hasColor0) {
|
||||
bool anyAmbient = false;
|
||||
bool anyDiffuse = false;
|
||||
bool anySpecular = false;
|
||||
bool anyDirectional = false;
|
||||
bool anyNonDirectional = false;
|
||||
for (int light = 0; light < 4; ++light) {
|
||||
auto &lstate = state->lights[light];
|
||||
lstate.enabled = gstate.isLightChanEnabled(light);
|
||||
if (!lstate.enabled)
|
||||
continue;
|
||||
|
||||
lstate.spot = gstate.isSpotLight(light);
|
||||
lstate.directional = gstate.isDirectionalLight(light);
|
||||
lstate.poweredDiffuse = gstate.isUsingPoweredDiffuseLight(light);
|
||||
lstate.specular = gstate.isUsingSpecularLight(light);
|
||||
|
||||
@ -112,14 +110,22 @@ void ComputeState(State *state, bool hasColor0) {
|
||||
anySpecular = anySpecular || lstate.specular;
|
||||
}
|
||||
|
||||
lstate.pos = GetLightVec(gstate.lpos, light);
|
||||
if (lstate.directional) {
|
||||
lstate.pos.NormalizeOr001();
|
||||
anyDirectional = true;
|
||||
} else {
|
||||
lstate.att = GetLightVec(gstate.latt, light);
|
||||
// Doesn't actually need to be on if nothing will affect it.
|
||||
if (!lstate.specular && !lstate.ambient && !lstate.diffuse) {
|
||||
lstate.enabled = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
lstate.pos = GetLightVec(gstate.lpos, light);
|
||||
lstate.directional = gstate.isDirectionalLight(light);
|
||||
if (lstate.directional) {
|
||||
lstate.pos.NormalizeOr001();
|
||||
} else {
|
||||
lstate.att = GetLightVec(gstate.latt, light);
|
||||
anyNonDirectional = true;
|
||||
}
|
||||
|
||||
lstate.spot = gstate.isSpotLight(light);
|
||||
if (lstate.spot) {
|
||||
lstate.spotDir = GetLightVec(gstate.ldir, light);
|
||||
lstate.spotDir.Normalize();
|
||||
@ -177,7 +183,7 @@ void ComputeState(State *state, bool hasColor0) {
|
||||
state->baseAmbientColorFactor = LightColorFactor(gstate.getAmbientRGBA(), ones);
|
||||
state->setColor1 = gstate.isUsingSecondaryColor() && anySpecular;
|
||||
state->addColor1 = !gstate.isUsingSecondaryColor() && anySpecular;
|
||||
state->usesWorldPos = anyDirectional;
|
||||
state->usesWorldPos = anyNonDirectional;
|
||||
state->usesWorldNormal = gstate.getUVGenMode() == GE_TEXMAP_ENVIRONMENT_MAP || anyDiffuse || anySpecular;
|
||||
}
|
||||
|
||||
@ -215,7 +221,7 @@ static inline __m128i LightColorScaleBy512SSE4(__m128i factor, __m128i color, __
|
||||
__m128i result18 = _mm_madd_epi16(factor, color);
|
||||
// But now with 18 bits, we need a full multiply.
|
||||
__m128i multiplied = _mm_mullo_epi32(result18, scale);
|
||||
return _mm_srai_epi32(multiplied, 19);
|
||||
return _mm_srai_epi32(multiplied, 10 + 9);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -240,9 +246,9 @@ static Vec4<int> LightColorScaleBy512(const Vec4<int> &factor, const Vec4<int> &
|
||||
return LightColorScaleBy512SSE4(factor.ivec, color.ivec, _mm_set1_epi32(scale));
|
||||
#elif PPSSPP_ARCH(ARM64_NEON)
|
||||
int32x4_t multiplied = vmulq_n_s32(vmulq_s32(factor.ivec, color.ivec), scale);
|
||||
return vshrq_n_s32(multiplied, 19);
|
||||
return vshrq_n_s32(multiplied, 10 + 19);
|
||||
#endif
|
||||
return (factor * color * scale) / (1024 * 512);
|
||||
return (factor * color * scale) >> (10 + 9);
|
||||
}
|
||||
|
||||
static inline void LightColorSum(Vec4<int> &sum, const Vec4<int> &src) {
|
||||
@ -296,25 +302,26 @@ static void ProcessSIMD(VertexData &vertex, const WorldCoords &worldpos, const W
|
||||
// L = vector from vertex to light source
|
||||
// TODO: Should transfer the light positions to world/view space for these calculations?
|
||||
Vec3<float> L = lstate.pos;
|
||||
float att = 1.0f;
|
||||
float attspot = 1.0f;
|
||||
if (!lstate.directional) {
|
||||
L -= worldpos;
|
||||
// TODO: Should this normalize (0, 0, 0) to (0, 0, 1)?
|
||||
float d = L.NormalizeOr001();
|
||||
|
||||
att = 1.0f / Dot33(lstate.att, Vec3f(1.0f, d, d * d));
|
||||
float att = 1.0f / Dot33(lstate.att, Vec3f(1.0f, d, d * d));
|
||||
if (!(att > 0.0f))
|
||||
att = 0.0f;
|
||||
else if (att > 1.0f)
|
||||
att = 1.0f;
|
||||
attspot = att;
|
||||
}
|
||||
|
||||
float spot = 1.0f;
|
||||
if (lstate.spot) {
|
||||
float rawSpot = Dot33(lstate.spotDir, L);
|
||||
if (std::isnan(rawSpot))
|
||||
rawSpot = std::signbit(rawSpot) ? 0.0f : 1.0f;
|
||||
|
||||
float spot = 1.0f;
|
||||
if (rawSpot >= lstate.spotCutoff) {
|
||||
spot = pspLightPow(rawSpot, lstate.spotExp);
|
||||
if (std::isnan(spot))
|
||||
@ -322,14 +329,16 @@ static void ProcessSIMD(VertexData &vertex, const WorldCoords &worldpos, const W
|
||||
} else {
|
||||
spot = 0.0f;
|
||||
}
|
||||
|
||||
attspot *= spot;
|
||||
}
|
||||
|
||||
// ambient lighting
|
||||
if (lstate.ambient) {
|
||||
int attspot = (int)LightCeil<useSSE4>(256 * 2 * att * spot + 1);
|
||||
if (attspot > 512)
|
||||
attspot = 512;
|
||||
Vec4<int> lambient = LightColorScaleBy512<useSSE4>(lstate.ambientColorFactor, mac, attspot);
|
||||
int attspot512 = (int)LightCeil<useSSE4>(256 * 2 * attspot + 1);
|
||||
if (attspot512 > 512)
|
||||
attspot512 = 512;
|
||||
Vec4<int> lambient = LightColorScaleBy512<useSSE4>(lstate.ambientColorFactor, mac, attspot512);
|
||||
LightColorSum(final_color, lambient);
|
||||
}
|
||||
|
||||
@ -343,7 +352,7 @@ static void ProcessSIMD(VertexData &vertex, const WorldCoords &worldpos, const W
|
||||
}
|
||||
|
||||
if (lstate.diffuse && diffuse_factor > 0.0f) {
|
||||
int diffuse_attspot = (int)LightCeil<useSSE4>(256 * 2 * att * spot * diffuse_factor + 1);
|
||||
int diffuse_attspot = (int)LightCeil<useSSE4>(256 * 2 * attspot * diffuse_factor + 1);
|
||||
if (diffuse_attspot > 512)
|
||||
diffuse_attspot = 512;
|
||||
Vec4<int> mdc = state.colorForDiffuse ? colorFactor : state.material.diffuseColorFactor;
|
||||
@ -358,7 +367,7 @@ static void ProcessSIMD(VertexData &vertex, const WorldCoords &worldpos, const W
|
||||
specular_factor = pspLightPow(specular_factor, state.specularExp);
|
||||
|
||||
if (specular_factor > 0.0f) {
|
||||
int specular_attspot = (int)LightCeil<useSSE4>(256 * 2 * att * spot * specular_factor + 1);
|
||||
int specular_attspot = (int)LightCeil<useSSE4>(256 * 2 * attspot * specular_factor + 1);
|
||||
if (specular_attspot > 512)
|
||||
specular_attspot = 512;
|
||||
|
||||
|
@ -104,14 +104,14 @@
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>StaticLibrary</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<WholeProgramOptimization>false</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
<PlatformToolset>$(DefaultPlatformToolset)</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM'" Label="Configuration">
|
||||
<ConfigurationType>StaticLibrary</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<WholeProgramOptimization>false</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
<PlatformToolset>$(DefaultPlatformToolset)</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
@ -130,14 +130,14 @@
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>StaticLibrary</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<WholeProgramOptimization>false</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
<PlatformToolset>$(DefaultPlatformToolset)</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'" Label="Configuration">
|
||||
<ConfigurationType>StaticLibrary</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<WholeProgramOptimization>false</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
<PlatformToolset>$(DefaultPlatformToolset)</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
@ -174,11 +174,15 @@
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<PreprocessorDefinitions>RC_DISABLE_LUA;WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
<PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
|
||||
<AdditionalIncludeDirectories>../rcheevos/include</AdditionalIncludeDirectories>
|
||||
<EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
|
||||
<FloatingPointModel>Precise</FloatingPointModel>
|
||||
<BasicRuntimeChecks>Default</BasicRuntimeChecks>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
<MinimalRebuild>false</MinimalRebuild>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
@ -190,11 +194,15 @@
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<PreprocessorDefinitions>RC_DISABLE_LUA;WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
<PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
|
||||
<AdditionalIncludeDirectories>../rcheevos/include</AdditionalIncludeDirectories>
|
||||
<OmitFramePointers>false</OmitFramePointers>
|
||||
<EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
|
||||
<FloatingPointModel>Precise</FloatingPointModel>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
<MinimalRebuild>false</MinimalRebuild>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
@ -206,13 +214,19 @@
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<PreprocessorDefinitions>RC_DISABLE_LUA;WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
<PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
|
||||
<AdditionalIncludeDirectories>../rcheevos/include</AdditionalIncludeDirectories>
|
||||
<EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
|
||||
<BufferSecurityCheck>false</BufferSecurityCheck>
|
||||
<FloatingPointModel>Precise</FloatingPointModel>
|
||||
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
|
||||
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
@ -226,13 +240,20 @@
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<PreprocessorDefinitions>RC_DISABLE_LUA;WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
<PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
|
||||
<AdditionalIncludeDirectories>../rcheevos/include</AdditionalIncludeDirectories>
|
||||
<BufferSecurityCheck>false</BufferSecurityCheck>
|
||||
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
|
||||
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
|
||||
<OmitFramePointers>false</OmitFramePointers>
|
||||
<EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
|
||||
<FloatingPointModel>Precise</FloatingPointModel>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
@ -251,6 +272,12 @@
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
<PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
|
||||
<AdditionalIncludeDirectories>../rcheevos/include</AdditionalIncludeDirectories>
|
||||
<OmitFramePointers>false</OmitFramePointers>
|
||||
<EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
|
||||
<FloatingPointModel>Precise</FloatingPointModel>
|
||||
<BasicRuntimeChecks>Default</BasicRuntimeChecks>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
<MinimalRebuild>false</MinimalRebuild>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
@ -267,6 +294,12 @@
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
<PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
|
||||
<AdditionalIncludeDirectories>../rcheevos/include</AdditionalIncludeDirectories>
|
||||
<OmitFramePointers>false</OmitFramePointers>
|
||||
<EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
|
||||
<FloatingPointModel>Precise</FloatingPointModel>
|
||||
<BasicRuntimeChecks>Default</BasicRuntimeChecks>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
<MinimalRebuild>false</MinimalRebuild>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
@ -278,13 +311,20 @@
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<PreprocessorDefinitions>RC_DISABLE_LUA;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
<PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
|
||||
<AdditionalIncludeDirectories>../rcheevos/include</AdditionalIncludeDirectories>
|
||||
<BufferSecurityCheck>false</BufferSecurityCheck>
|
||||
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
|
||||
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
|
||||
<OmitFramePointers>false</OmitFramePointers>
|
||||
<EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
|
||||
<FloatingPointModel>Precise</FloatingPointModel>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
@ -298,13 +338,20 @@
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<PreprocessorDefinitions>RC_DISABLE_LUA;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
<PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
|
||||
<AdditionalIncludeDirectories>../rcheevos/include</AdditionalIncludeDirectories>
|
||||
<BufferSecurityCheck>false</BufferSecurityCheck>
|
||||
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
|
||||
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
|
||||
<OmitFramePointers>false</OmitFramePointers>
|
||||
<EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
|
||||
<FloatingPointModel>Precise</FloatingPointModel>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
|
Loading…
x
Reference in New Issue
Block a user