ppsspp/Common/Math/math_util.h

#pragma once

// Some of the stuff in this file are snippets from all over the web, esp. dspmusic.org. I think it's all public domain.
// In any case, very little of it is used anywhere at the moment.

#include <cmath>
#include <cstring>
#include <cstdint>

typedef unsigned short float16;

// This ain't a 1.5.10 float16, it's a stupid hack format where we chop 16 bits off a float.
// This choice is subject to change. Don't think I'm using this for anything at all now anyway.
// DEPRECATED
inline float16 FloatToFloat16(float x) {
	int ix;
	memcpy(&ix, &x, sizeof(float));
	return ix >> 16;
}

inline float Float16ToFloat(float16 ix) {
	float x;
	memcpy(&x, &ix, sizeof(float));
	return x;
}

inline bool isPowerOf2(int n) {
	return n == 1 || (n & (n - 1)) == 0;
}

// Next power of 2.
inline uint32_t RoundUpToPowerOf2(uint32_t v) {
	v--;
	v |= v >> 1;
	v |= v >> 2;
	v |= v >> 4;
	v |= v >> 8;
	v |= v >> 16;
	v++;
	return v;
}

inline uint32_t RoundUpToPowerOf2(uint32_t v, uint32_t power) {
	return (v + power - 1) & ~(power - 1);
}

inline uint32_t log2i(uint32_t val) {
	unsigned int ret = -1;
	while (val != 0) {
		val >>= 1; ret++;
	}
	return ret;
}

#define PI 3.141592653589793f
#ifndef M_PI
#define M_PI 3.141592653589793f
#endif

template<class T>
inline T clamp_value(T val, T floor, T cap) {
	if (val > cap)
		return cap;
	else if (val < floor)
		return floor;
	else
		return val;
}

// Very common operation, familiar from shaders.
inline float saturatef(float x) {
	if (x > 1.0f) return 1.0f;
	else if (x < 0.0f) return 0.0f;
	else return x;
}

#define ROUND_UP(x, a)   (((x) + (a) - 1) & ~((a) - 1))
#define ROUND_DOWN(x, a) ((x) & ~((a) - 1))

template<class T>
inline void Clamp(T* val, const T& min, const T& max)
{
	if (*val < min)
		*val = min;
	else if (*val > max)
		*val = max;
}

template<class T>
inline T Clamp(const T val, const T& min, const T& max)
{
	T ret = val;
	Clamp(&ret, min, max);
	return ret;
}

union FP32 {
	uint32_t u;
	float f;
};

struct FP16 {
	uint16_t u;
};

inline bool my_isinf(float f) {
	FP32 f2u;
	f2u.f = f;
	return f2u.u == 0x7f800000 ||
		f2u.u == 0xff800000;
}

inline bool my_isinf_u(uint32_t u) {
	return u == 0x7f800000 || u == 0xff800000;
}

inline bool my_isnan(float f) {
	FP32 f2u;
	f2u.f = f;
	// NaNs have non-zero mantissa
	return ((f2u.u & 0x7F800000) == 0x7F800000) && (f2u.u & 0x7FFFFF);
}

inline bool my_isnanorinf(float f) {
	FP32 f2u;
	f2u.f = f;
	// NaNs have non-zero mantissa, infs have zero mantissa. That is, we just ignore the mantissa here.
	return ((f2u.u & 0x7F800000) == 0x7F800000);
}

inline float InfToZero(float f) {
	return my_isinf(f) ? 0.0f : f;
}

inline int is_even(float d) {
	float int_part;
	modff(d / 2.0f, &int_part);
	return 2.0f * int_part == d;
}

// Rounds *.5 to closest even number
inline double round_ieee_754(double d) {
	float i = (float)floor(d);
	d -= i;
	if (d < 0.5f)
		return i;
	if (d > 0.5f)
		return i + 1.0f;
	if (is_even(i))
		return i;
	return i + 1.0f;
}

// magic code from ryg: http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
// See also SSE2 version: https://gist.github.com/rygorous/2144712
inline FP32 half_to_float_fast5(FP16 h)
{
	static const FP32 magic = { (127 + (127 - 15)) << 23 };
	static const FP32 was_infnan = { (127 + 16) << 23 };
	FP32 o;
	o.u = (h.u & 0x7fff) << 13;     // exponent/mantissa bits
	o.f *= magic.f;                 // exponent adjust
	if (o.f >= was_infnan.f)        // make sure Inf/NaN survive (retain the low bits)
		o.u = (255 << 23) | (h.u & 0x03ff);
	o.u |= (h.u & 0x8000) << 16;    // sign bit
	return o;
}

inline float ExpandHalf(uint16_t half) {
	FP16 fp16;
	fp16.u = half;
	FP32 fp = half_to_float_fast5(fp16);
	return fp.f;
}

// More magic code: https://gist.github.com/rygorous/2156668
inline FP16 float_to_half_fast3(FP32 f)
{
	static const FP32 f32infty = { 255 << 23 };
	static const FP32 f16infty = { 31 << 23 };
	static const FP32 magic = { 15 << 23 };
	static const uint32_t sign_mask = 0x80000000u;
	static const uint32_t round_mask = ~0xfffu;
	FP16 o = { 0 };

	uint32_t sign = f.u & sign_mask;
	f.u ^= sign;

	if (f.u >= f32infty.u) // Inf or NaN (all exponent bits set)
		o.u = (f.u > f32infty.u) ? (0x7e00 | (f.u & 0x3ff)) : 0x7c00; // NaN->qNaN and Inf->Inf
	else // (De)normalized number or zero
	{
		f.u &= round_mask;
		f.f *= magic.f;
		f.u -= round_mask;
		if (f.u > f16infty.u) f.u = f16infty.u; // Clamp to signed infinity if overflowed

		o.u = f.u >> 13; // Take the bits!
	}

	o.u |= sign >> 16;
	return o;
}

inline uint16_t ShrinkToHalf(float full) {
	FP32 fp32;
	fp32.f = full;
	FP16 fp = float_to_half_fast3(fp32);
	return fp.u;
}

// FPU control.
void EnableFZ();

// Enable both FZ and Default-NaN. Is documented to flip some ARM implementation into a "run-fast" mode
// where they can schedule VFP instructions on the NEON unit (these implementations have
// very slow VFP units).
// http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0274h/Babffifj.html
void FPU_SetFastMode();
Add drag capturing (useful to resolve conflicts between scrollviews and draggable controls within) Some misc cleanup 2014-01-31 13:31:19 +00:00			`#pragma once`
Initial commit 2012-03-24 22:39:19 +00:00
Reintroduce mipmaps for images in the UI like screenshots (D3D11, OpenGL, D3D9) 2017-03-11 13:43:42 +00:00			`// Some of the stuff in this file are snippets from all over the web, esp. dspmusic.org. I think it's all public domain.`
			`// In any case, very little of it is used anywhere at the moment.`

Add some old code, do some buildfixing 2012-07-26 11:47:15 +00:00			`#include <cmath>`
			`#include <cstring>`
Merge base/basictypes.h into Common/Common.h (mostly). 2020-09-29 10:44:47 +00:00			`#include <cstdint>`
Custom implementations of isnan/isinf as it's just a mess portability-wise.. 2013-04-13 19:49:03 +00:00
Initial commit 2012-03-24 22:39:19 +00:00			`typedef unsigned short float16;`

Cleanup 2012-03-31 09:16:13 +00:00			`// This ain't a 1.5.10 float16, it's a stupid hack format where we chop 16 bits off a float.`
			`// This choice is subject to change. Don't think I'm using this for anything at all now anyway.`
Tabs unification 2012-05-08 20:04:24 +00:00			`// DEPRECATED`
Initial commit 2012-03-24 22:39:19 +00:00			`inline float16 FloatToFloat16(float x) {`
Reindent (->tabs) 2012-10-30 12:20:55 +00:00			`int ix;`
			`memcpy(&ix, &x, sizeof(float));`
			`return ix >> 16;`
Initial commit 2012-03-24 22:39:19 +00:00			`}`

			`inline float Float16ToFloat(float16 ix) {`
Reindent (->tabs) 2012-10-30 12:20:55 +00:00			`float x;`
			`memcpy(&x, &ix, sizeof(float));`
			`return x;`
Initial commit 2012-03-24 22:39:19 +00:00			`}`

OpenGL: Fix texture wrapping of render targets. 2018-03-28 09:23:41 +00:00			`inline bool isPowerOf2(int n) {`
			`return n == 1 \|\| (n & (n - 1)) == 0;`
			`}`

Comments and fixes 2022-08-27 21:37:02 +00:00			`// Next power of 2.`
Add a function to math_util 2013-09-15 10:52:27 +00:00			`inline uint32_t RoundUpToPowerOf2(uint32_t v) {`
			`v--;`
			`v \|= v >> 1;`
			`v \|= v >> 2;`
			`v \|= v >> 4;`
			`v \|= v >> 8;`
			`v \|= v >> 16;`
			`v++;`
			`return v;`
			`}`
Initial commit 2012-03-24 22:39:19 +00:00
Vulkan texture uploads: Take optimalBufferCopyRowPitchAlignment into account Might marginally increase texture upload performance on some GPUs, but mainly just the right thing to do. For example, on Intel, this is 64. 2023-03-04 08:54:27 +00:00			`inline uint32_t RoundUpToPowerOf2(uint32_t v, uint32_t power) {`
			`return (v + power - 1) & ~(power - 1);`
			`}`

Reintroduce mipmaps for images in the UI like screenshots (D3D11, OpenGL, D3D9) 2017-03-11 13:43:42 +00:00			`inline uint32_t log2i(uint32_t val) {`
			`unsigned int ret = -1;`
			`while (val != 0) {`
			`val >>= 1; ret++;`
			`}`
			`return ret;`
			`}`

Initial commit 2012-03-24 22:39:19 +00:00			`#define PI 3.141592653589793f`
Add some old code, do some buildfixing 2012-07-26 11:47:15 +00:00			`#ifndef M_PI`
			`#define M_PI 3.141592653589793f`
			`#endif`
Initial commit 2012-03-24 22:39:19 +00:00
Add clamp_value function to math_util 2014-03-03 10:38:48 +00:00			`template<class T>`
			`inline T clamp_value(T val, T floor, T cap) {`
			`if (val > cap)`
			`return cap;`
			`else if (val < floor)`
			`return floor;`
			`else`
			`return val;`
			`}`
Initial commit 2012-03-24 22:39:19 +00:00
Better mechanism to fade out the side bar when not on EmuScreen 2023-07-16 06:55:48 +00:00			`// Very common operation, familiar from shaders.`
			`inline float saturatef(float x) {`
			`if (x > 1.0f) return 1.0f;`
			`else if (x < 0.0f) return 0.0f;`
			`else return x;`
			`}`

Remove Timer.cpp/h. Move various collections into Common/Data/Collections. 2020-10-05 18:58:33 +00:00			`#define ROUND_UP(x, a) (((x) + (a) - 1) & ~((a) - 1))`
			`#define ROUND_DOWN(x, a) ((x) & ~((a) - 1))`

			`template<class T>`
			`inline void Clamp(T* val, const T& min, const T& max)`
			`{`
			`if (*val < min)`
			`*val = min;`
			`else if (*val > max)`
			`*val = max;`
			`}`

			`template<class T>`
			`inline T Clamp(const T val, const T& min, const T& max)`
			`{`
			`T ret = val;`
			`Clamp(&ret, min, max);`
			`return ret;`
			`}`

Add function to set both FZ and DefaultNaN on ARM. 2014-03-23 09:04:25 +00:00			`union FP32 {`
			`uint32_t u;`
			`float f;`
			`};`

			`struct FP16 {`
			`uint16_t u;`
			`};`

Custom implementations of isnan/isinf as it's just a mess portability-wise.. 2013-04-13 19:49:03 +00:00			`inline bool my_isinf(float f) {`
Add function to set both FZ and DefaultNaN on ARM. 2014-03-23 09:04:25 +00:00			`FP32 f2u;`
Custom implementations of isnan/isinf as it's just a mess portability-wise.. 2013-04-13 19:49:03 +00:00			`f2u.f = f;`
			`return f2u.u == 0x7f800000 \|\|`
			`f2u.u == 0xff800000;`
			`}`

IRInterpreter: Slight optimization for fmul Just put stuff in temporaries, allows for better codegen 2024-06-24 07:12:57 +00:00			`inline bool my_isinf_u(uint32_t u) {`
			`return u == 0x7f800000 \|\| u == 0xff800000;`
			`}`

Custom implementations of isnan/isinf as it's just a mess portability-wise.. 2013-04-13 19:49:03 +00:00			`inline bool my_isnan(float f) {`
Add function to set both FZ and DefaultNaN on ARM. 2014-03-23 09:04:25 +00:00			`FP32 f2u;`
Custom implementations of isnan/isinf as it's just a mess portability-wise.. 2013-04-13 19:49:03 +00:00			`f2u.f = f;`
Fix for my_isnan 2013-04-13 21:04:53 +00:00			`// NaNs have non-zero mantissa`
			`return ((f2u.u & 0x7F800000) == 0x7F800000) && (f2u.u & 0x7FFFFF);`
Custom implementations of isnan/isinf as it's just a mess portability-wise.. 2013-04-13 19:49:03 +00:00			`}`
Rename math_util.cc to cpp, cleanup. 2012-05-08 22:33:43 +00:00
Add my_isnanorinf. 2013-11-12 12:38:14 +00:00			`inline bool my_isnanorinf(float f) {`
Add function to set both FZ and DefaultNaN on ARM. 2014-03-23 09:04:25 +00:00			`FP32 f2u;`
Add my_isnanorinf. 2013-11-12 12:38:14 +00:00			`f2u.f = f;`
			`// NaNs have non-zero mantissa, infs have zero mantissa. That is, we just ignore the mantissa here.`
			`return ((f2u.u & 0x7F800000) == 0x7F800000);`
			`}`

Eliminate inf values resulting from depth range computation. Fixes #17981 2023-12-12 17:48:22 +00:00			`inline float InfToZero(float f) {`
			`return my_isinf(f) ? 0.0f : f;`
			`}`

Move a couple of math functions heret: 2013-07-29 20:35:34 +00:00			`inline int is_even(float d) {`
			`float int_part;`
			`modff(d / 2.0f, &int_part);`
			`return 2.0f * int_part == d;`
			`}`

			`// Rounds *.5 to closest even number`
round_ieee_754 needs to use double to pass all the tests correctly 2013-08-07 20:32:55 +00:00			`inline double round_ieee_754(double d) {`
Merge base/basictypes.h into Common/Common.h (mostly). 2020-09-29 10:44:47 +00:00			`float i = (float)floor(d);`
Move a couple of math functions heret: 2013-07-29 20:35:34 +00:00			`d -= i;`
round_ieee_754 needs to use double to pass all the tests correctly 2013-08-07 20:32:55 +00:00			`if (d < 0.5f)`
Move a couple of math functions heret: 2013-07-29 20:35:34 +00:00			`return i;`
round_ieee_754 needs to use double to pass all the tests correctly 2013-08-07 20:32:55 +00:00			`if (d > 0.5f)`
Move a couple of math functions heret: 2013-07-29 20:35:34 +00:00			`return i + 1.0f;`
round_ieee_754 needs to use double to pass all the tests correctly 2013-08-07 20:32:55 +00:00			`if (is_even(i))`
Move a couple of math functions heret: 2013-07-29 20:35:34 +00:00			`return i;`
			`return i + 1.0f;`
			`}`

Move some fp16 math utils here. 2013-07-30 20:26:02 +00:00			`// magic code from ryg: http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/`
			`// See also SSE2 version: https://gist.github.com/rygorous/2144712`
			`inline FP32 half_to_float_fast5(FP16 h)`
			`{`
			`static const FP32 magic = { (127 + (127 - 15)) << 23 };`
			`static const FP32 was_infnan = { (127 + 16) << 23 };`
			`FP32 o;`
			`o.u = (h.u & 0x7fff) << 13; // exponent/mantissa bits`
			`o.f *= magic.f; // exponent adjust`
Retain NaN bits in fp16/fp32 conversions. This matches PSP behavior better. 2015-04-19 23:07:00 +00:00			`if (o.f >= was_infnan.f) // make sure Inf/NaN survive (retain the low bits)`
			`o.u = (255 << 23) \| (h.u & 0x03ff);`
Move some fp16 math utils here. 2013-07-30 20:26:02 +00:00			`o.u \|= (h.u & 0x8000) << 16; // sign bit`
			`return o;`
			`}`

			`inline float ExpandHalf(uint16_t half) {`
			`FP16 fp16;`
			`fp16.u = half;`
			`FP32 fp = half_to_float_fast5(fp16);`
			`return fp.f;`
			`}`
Move a couple of math functions heret: 2013-07-29 20:35:34 +00:00
Move some fp16 math utils here. 2013-07-30 20:26:02 +00:00			`// More magic code: https://gist.github.com/rygorous/2156668`
			`inline FP16 float_to_half_fast3(FP32 f)`
			`{`
			`static const FP32 f32infty = { 255 << 23 };`
			`static const FP32 f16infty = { 31 << 23 };`
			`static const FP32 magic = { 15 << 23 };`
			`static const uint32_t sign_mask = 0x80000000u;`
			`static const uint32_t round_mask = ~0xfffu;`
			`FP16 o = { 0 };`

			`uint32_t sign = f.u & sign_mask;`
			`f.u ^= sign;`

			`if (f.u >= f32infty.u) // Inf or NaN (all exponent bits set)`
Retain NaN bits in fp16/fp32 conversions. This matches PSP behavior better. 2015-04-19 23:07:00 +00:00			`o.u = (f.u > f32infty.u) ? (0x7e00 \| (f.u & 0x3ff)) : 0x7c00; // NaN->qNaN and Inf->Inf`
Move some fp16 math utils here. 2013-07-30 20:26:02 +00:00			`else // (De)normalized number or zero`
			`{`
			`f.u &= round_mask;`
			`f.f *= magic.f;`
			`f.u -= round_mask;`
			`if (f.u > f16infty.u) f.u = f16infty.u; // Clamp to signed infinity if overflowed`

			`o.u = f.u >> 13; // Take the bits!`
			`}`

			`o.u \|= sign >> 16;`
			`return o;`
			`}`

			`inline uint16_t ShrinkToHalf(float full) {`
			`FP32 fp32;`
			`fp32.f = full;`
			`FP16 fp = float_to_half_fast3(fp32);`
			`return fp.u;`
			`}`
Experimentally revert "Delete some misguided FPU control code, that we definitely don't want" This reverts commit d52d383c224aa60d0d4541276e80e114486eaab3. 2022-10-10 16:35:29 +00:00
			`// FPU control.`
			`void EnableFZ();`

			`// Enable both FZ and Default-NaN. Is documented to flip some ARM implementation into a "run-fast" mode`
			`// where they can schedule VFP instructions on the NEON unit (these implementations have`
			`// very slow VFP units).`
			`// http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0274h/Babffifj.html`
			`void FPU_SetFastMode();`