mirror of
https://github.com/shadps4-emu/ext-imgui.git
synced 2024-11-23 18:29:42 +00:00
Add and use SSE-enabled ImRsqrt() in place of 1.0f / ImSqrt(). (#4091)
Squashed 3 commits.
This commit is contained in:
parent
84545dbe6f
commit
4c9f0cec27
@ -63,6 +63,7 @@ Other Changes:
|
|||||||
- Popups: Added 'OpenPopup(ImGuiID id)' overload to facilitate calling from nested stacks. (#3993, #331) [@zlash]
|
- Popups: Added 'OpenPopup(ImGuiID id)' overload to facilitate calling from nested stacks. (#3993, #331) [@zlash]
|
||||||
- Optimization: Disabling some of MSVC most aggressive Debug runtime checks for some simple/low-level functions
|
- Optimization: Disabling some of MSVC most aggressive Debug runtime checks for some simple/low-level functions
|
||||||
(e.g. ImVec2, ImVector) leading to a 10-20% increase of performances with MSVC "default" Debug settings.
|
(e.g. ImVec2, ImVector) leading to a 10-20% increase of performances with MSVC "default" Debug settings.
|
||||||
|
- ImDrawList: Add and use SSE-enabled ImRsqrt() in place of 1.0f / ImSqrt(). (#4091) [@wolfpld]
|
||||||
- ImDrawList: Fixed/improved thickness of thick strokes with sharp angles. (#4053, #3366, #2964, #2868, #2518, #2183)
|
- ImDrawList: Fixed/improved thickness of thick strokes with sharp angles. (#4053, #3366, #2964, #2868, #2518, #2183)
|
||||||
Effectively introduced a regression in 1.67 (Jan 2019), and a fix in 1.70 (Apr 2019) but the fix wasn't actually on
|
Effectively introduced a regression in 1.67 (Jan 2019), and a fix in 1.70 (Apr 2019) but the fix wasn't actually on
|
||||||
par with original version. Now incorporating the correct revert.
|
par with original version. Now incorporating the correct revert.
|
||||||
|
@ -694,7 +694,7 @@ void ImDrawList::PrimQuadUV(const ImVec2& a, const ImVec2& b, const ImVec2& c, c
|
|||||||
|
|
||||||
// On AddPolyline() and AddConvexPolyFilled() we intentionally avoid using ImVec2 and superfluous function calls to optimize debug/non-inlined builds.
|
// On AddPolyline() and AddConvexPolyFilled() we intentionally avoid using ImVec2 and superfluous function calls to optimize debug/non-inlined builds.
|
||||||
// Those macros expects l-values.
|
// Those macros expects l-values.
|
||||||
#define IM_NORMALIZE2F_OVER_ZERO(VX,VY) do { float d2 = VX*VX + VY*VY; if (d2 > 0.0f) { float inv_len = 1.0f / ImSqrt(d2); VX *= inv_len; VY *= inv_len; } } while (0)
|
#define IM_NORMALIZE2F_OVER_ZERO(VX,VY) do { float d2 = VX*VX + VY*VY; if (d2 > 0.0f) { float inv_len = ImRsqrt(d2); VX *= inv_len; VY *= inv_len; } } while (0)
|
||||||
#define IM_FIXNORMAL2F_MAX_INVLEN2 100.0f // 500.0f (see #4053, #3366)
|
#define IM_FIXNORMAL2F_MAX_INVLEN2 100.0f // 500.0f (see #4053, #3366)
|
||||||
#define IM_FIXNORMAL2F(VX,VY) do { float d2 = VX*VX + VY*VY; if (d2 > 0.000001f) { float inv_len2 = 1.0f / d2; if (inv_len2 > IM_FIXNORMAL2F_MAX_INVLEN2) inv_len2 = IM_FIXNORMAL2F_MAX_INVLEN2; VX *= inv_len2; VY *= inv_len2; } } while (0)
|
#define IM_FIXNORMAL2F(VX,VY) do { float d2 = VX*VX + VY*VY; if (d2 > 0.000001f) { float inv_len2 = 1.0f / d2; if (inv_len2 > IM_FIXNORMAL2F_MAX_INVLEN2) inv_len2 = IM_FIXNORMAL2F_MAX_INVLEN2; VX *= inv_len2; VY *= inv_len2; } } while (0)
|
||||||
|
|
||||||
|
@ -51,6 +51,12 @@ Index of this file:
|
|||||||
#include <math.h> // sqrtf, fabsf, fmodf, powf, floorf, ceilf, cosf, sinf
|
#include <math.h> // sqrtf, fabsf, fmodf, powf, floorf, ceilf, cosf, sinf
|
||||||
#include <limits.h> // INT_MIN, INT_MAX
|
#include <limits.h> // INT_MIN, INT_MAX
|
||||||
|
|
||||||
|
// Enable SSE intrinsics if available
|
||||||
|
#if defined __SSE__ || defined __x86_64__ || defined _M_X64
|
||||||
|
#define IMGUI_ENABLE_SSE
|
||||||
|
#include <immintrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
// Visual Studio warnings
|
// Visual Studio warnings
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
#pragma warning (push)
|
#pragma warning (push)
|
||||||
@ -390,6 +396,12 @@ static inline float ImAbs(float x) { return fabsf(x); }
|
|||||||
static inline double ImAbs(double x) { return fabs(x); }
|
static inline double ImAbs(double x) { return fabs(x); }
|
||||||
static inline float ImSign(float x) { return (x < 0.0f) ? -1.0f : ((x > 0.0f) ? 1.0f : 0.0f); } // Sign operator - returns -1, 0 or 1 based on sign of argument
|
static inline float ImSign(float x) { return (x < 0.0f) ? -1.0f : ((x > 0.0f) ? 1.0f : 0.0f); } // Sign operator - returns -1, 0 or 1 based on sign of argument
|
||||||
static inline double ImSign(double x) { return (x < 0.0) ? -1.0 : ((x > 0.0) ? 1.0 : 0.0); }
|
static inline double ImSign(double x) { return (x < 0.0) ? -1.0 : ((x > 0.0) ? 1.0 : 0.0); }
|
||||||
|
#ifdef IMGUI_ENABLE_SSE
|
||||||
|
static inline float ImRsqrt(float x) { return _mm_cvtss_f32(_mm_rsqrt_ss(_mm_set_ss(x))); }
|
||||||
|
#else
|
||||||
|
static inline float ImRsqrt(float x) { return 1.0f / sqrtf(x); }
|
||||||
|
#endif
|
||||||
|
static inline double ImRsqrt(double x) { return 1.0 / sqrt(x); }
|
||||||
#endif
|
#endif
|
||||||
// - ImMin/ImMax/ImClamp/ImLerp/ImSwap are used by widgets which support variety of types: signed/unsigned int/long long float/double
|
// - ImMin/ImMax/ImClamp/ImLerp/ImSwap are used by widgets which support variety of types: signed/unsigned int/long long float/double
|
||||||
// (Exceptionally using templates here but we could also redefine them for those types)
|
// (Exceptionally using templates here but we could also redefine them for those types)
|
||||||
@ -410,7 +422,7 @@ static inline ImVec4 ImLerp(const ImVec4& a, const ImVec4& b, float t)
|
|||||||
static inline float ImSaturate(float f) { return (f < 0.0f) ? 0.0f : (f > 1.0f) ? 1.0f : f; }
|
static inline float ImSaturate(float f) { return (f < 0.0f) ? 0.0f : (f > 1.0f) ? 1.0f : f; }
|
||||||
static inline float ImLengthSqr(const ImVec2& lhs) { return (lhs.x * lhs.x) + (lhs.y * lhs.y); }
|
static inline float ImLengthSqr(const ImVec2& lhs) { return (lhs.x * lhs.x) + (lhs.y * lhs.y); }
|
||||||
static inline float ImLengthSqr(const ImVec4& lhs) { return (lhs.x * lhs.x) + (lhs.y * lhs.y) + (lhs.z * lhs.z) + (lhs.w * lhs.w); }
|
static inline float ImLengthSqr(const ImVec4& lhs) { return (lhs.x * lhs.x) + (lhs.y * lhs.y) + (lhs.z * lhs.z) + (lhs.w * lhs.w); }
|
||||||
static inline float ImInvLength(const ImVec2& lhs, float fail_value) { float d = (lhs.x * lhs.x) + (lhs.y * lhs.y); if (d > 0.0f) return 1.0f / ImSqrt(d); return fail_value; }
|
static inline float ImInvLength(const ImVec2& lhs, float fail_value) { float d = (lhs.x * lhs.x) + (lhs.y * lhs.y); if (d > 0.0f) return ImRsqrt(d); return fail_value; }
|
||||||
static inline float ImFloor(float f) { return (float)(int)(f); }
|
static inline float ImFloor(float f) { return (float)(int)(f); }
|
||||||
static inline float ImFloorSigned(float f) { return (float)((f >= 0 || (int)f == f) ? (int)f : (int)f - 1); } // Decent replacement for floorf()
|
static inline float ImFloorSigned(float f) { return (float)((f >= 0 || (int)f == f) ? (int)f : (int)f - 1); } // Decent replacement for floorf()
|
||||||
static inline ImVec2 ImFloor(const ImVec2& v) { return ImVec2((float)(int)(v.x), (float)(int)(v.y)); }
|
static inline ImVec2 ImFloor(const ImVec2& v) { return ImVec2((float)(int)(v.x), (float)(int)(v.y)); }
|
||||||
|
Loading…
Reference in New Issue
Block a user