openMSX
Math.hh
Go to the documentation of this file.
1#ifndef MATH_HH
2#define MATH_HH
3
4#include "narrow.hh"
5#include <bit>
6#include <cassert>
7#include <climits>
8#include <cmath>
9#include <concepts>
10#include <cstdint>
11#include <numbers>
12#include <span>
13
14#ifdef _MSC_VER
15#include <intrin.h>
16#pragma intrinsic(_BitScanForward)
17#endif
18
19namespace Math {
20
21inline constexpr double e = std::numbers::e_v <double>;
22inline constexpr double ln2 = std::numbers::ln2_v <double>;
23inline constexpr double ln10 = std::numbers::ln10_v<double>;
24inline constexpr double pi = std::numbers::pi_v <double>;
25
32[[nodiscard]] constexpr auto floodRight(std::unsigned_integral auto x) noexcept
33{
34 x |= x >> 1;
35 x |= x >> 2;
36 x |= x >> 4;
37 x |= x >> ((sizeof(x) >= 2) ? 8 : 0); // Written in a weird way to
38 x |= x >> ((sizeof(x) >= 4) ? 16 : 0); // suppress compiler warnings.
39 x |= x >> ((sizeof(x) >= 8) ? 32 : 0); // Generates equally efficient
40 return x; // code.
41}
42
46template<std::signed_integral T>
47[[nodiscard]] inline int16_t clipToInt16(T x)
48{
49 static_assert((T(-1) >> 1) == T(-1), "right-shift must preserve sign");
50 if (int16_t(x) == x) [[likely]] {
51 return narrow_cast<int16_t>(x);
52 } else {
53 constexpr int SHIFT = (sizeof(T) * CHAR_BIT) - 1;
54 return narrow_cast<int16_t>(0x7FFF - (x >> SHIFT));
55 }
56}
57
61[[nodiscard]] inline uint8_t clipIntToByte(int x)
62{
63 static_assert((-1 >> 1) == -1, "right-shift must preserve sign");
64 if (uint8_t(x) == x) [[likely]] {
65 return narrow_cast<uint8_t>(x);
66 } else {
67 return narrow_cast<uint8_t>(~(x >> 31));
68 }
69}
70
75[[nodiscard]] constexpr unsigned reverseNBits(unsigned x, unsigned bits)
76{
77 unsigned ret = 0;
78 while (bits--) {
79 ret = (ret << 1) | (x & 1);
80 x >>= 1;
81 }
82 return ret;
83
84 /* Just for fun I tried the asm version below (the carry-flag trick
85 * cannot be described in plain C). It's correct and generates shorter
86 * code (both less instructions and less bytes). But it doesn't
87 * actually run faster on the machine I tested on, or only a tiny bit
88 * (possibly because of dependency chains and processor stalls???).
89 * However a big disadvantage of this asm version is that when called
90 * with compile-time constant arguments, this version performs exactly
91 * the same, while the version above can be further optimized by the
92 * compiler (constant-propagation, loop unrolling, ...).
93 unsigned ret = 0;
94 if (bits) {
95 asm (
96 "1: shr %[VAL]\n"
97 " adc %[RET],%[RET]\n"
98 " dec %[BITS]\n"
99 " jne 1b\n"
100 : [VAL] "+r" (val)
101 , [BITS] "+r" (bits)
102 , [RET] "+r" (ret)
103 );
104 }
105 return ret;
106 */
107
108 /* Maarten suggested the following approach with O(lg(N)) time
109 * complexity (the version above is O(N)).
110 * - reverse full (32-bit) word: O(lg(N))
111 * - shift right over 32-N bits: O(1)
112 * Note: In some lower end CPU the shift-over-N-bits instruction itself
113 * is O(N), in that case this whole algorithm is O(N)
114 * Note2: Instead of '32' it's also possible to use a lower power of 2,
115 * as long as it's bigger than or equal to N.
116 * This algorithm may or may not be faster than the version above, I
117 * didn't try it yet. Also because this routine is _NOT_ performance
118 * critical _AT_ALL_ currently.
119 */
120}
121
125[[nodiscard]] constexpr uint8_t reverseByte(uint8_t a)
126{
127 // Classical implementation (can be extended to 16 and 32 bits)
128 // a = ((a & 0xF0) >> 4) | ((a & 0x0F) << 4);
129 // a = ((a & 0xCC) >> 2) | ((a & 0x33) << 2);
130 // a = ((a & 0xAA) >> 1) | ((a & 0x55) << 1);
131 // return a;
132
133 // The versions below are specific to reverse a single byte (can't
134 // easily be extended to wider types). Found these tricks on:
135 // http://graphics.stanford.edu/~seander/bithacks.html
136#ifdef __x86_64
137 // on 64-bit systems this is slightly faster
138 return narrow_cast<uint8_t>((((a * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL) >> 32);
139#else
140 // on 32-bit systems this is faster
141 return narrow_cast<uint8_t>((((a * 0x0802 & 0x22110) | (a * 0x8020 & 0x88440)) * 0x10101) >> 16);
142#endif
143}
144
149[[nodiscard]] inline /*constexpr*/ unsigned findFirstSet(uint32_t x)
150{
151 return x ? std::countr_zero(x) + 1 : 0;
152}
153
154// Cubic Hermite Interpolation:
155// Given 4 points: (-1, y[0]), (0, y[1]), (1, y[2]), (2, y[3])
156// Fit a polynomial: f(x) = a*x^3 + b*x^2 + c*x + d
157// which passes through the given points at x=0 and x=1
158// f(0) = y[0]
159// f(1) = y[1]
160// and which has specific derivatives at x=0 and x=1
161// f'(0) = (y[1] - y[-1]) / 2
162// f'(1) = (y[2] - y[ 0]) / 2
163// Then evaluate this polynomial at the given x-position (x in [0, 1]).
164// For more details see:
165// https://en.wikipedia.org/wiki/Cubic_Hermite_spline
166// https://www.paulinternet.nl/?page=bicubic
167[[nodiscard]] constexpr float cubicHermite(std::span<const float, 4> y, float x)
168{
169 assert(0.0f <= x); assert(x <= 1.0f);
170 float a = -0.5f*y[0] + 1.5f*y[1] - 1.5f*y[2] + 0.5f*y[3];
171 float b = y[0] - 2.5f*y[1] + 2.0f*y[2] - 0.5f*y[3];
172 float c = -0.5f*y[0] + 0.5f*y[2];
173 float d = y[1];
174 float x2 = x * x;
175 float x3 = x * x2;
176 return a*x3 + b*x2 + c*x + d;
177}
178
188constexpr QuotientRemainder div_mod_floor(int dividend, int divisor) {
189 int q = dividend / divisor;
190 int r = dividend % divisor;
191 if ((r != 0) && ((r < 0) != (divisor < 0))) {
192 --q;
193 r += divisor;
194 }
195 return {q, r};
196}
197constexpr int div_floor(int dividend, int divisor) {
198 return div_mod_floor(dividend, divisor).quotient;
199}
200constexpr int mod_floor(int dividend, int divisor) {
201 return div_mod_floor(dividend, divisor).remainder;
202}
203
204} // namespace Math
205
206#endif // MATH_HH
Definition Math.hh:19
constexpr QuotientRemainder div_mod_floor(int dividend, int divisor)
Definition Math.hh:188
constexpr unsigned reverseNBits(unsigned x, unsigned bits)
Reverse the lower N bits of a given value.
Definition Math.hh:75
unsigned findFirstSet(uint32_t x)
Find the least significant bit that is set.
Definition Math.hh:149
constexpr auto floodRight(std::unsigned_integral auto x) noexcept
Returns the smallest number of the form 2^n-1 that is greater or equal to the given number.
Definition Math.hh:32
uint8_t clipIntToByte(int x)
Clip x to range [0,255].
Definition Math.hh:61
constexpr double pi
Definition Math.hh:24
constexpr int div_floor(int dividend, int divisor)
Definition Math.hh:197
constexpr float cubicHermite(std::span< const float, 4 > y, float x)
Definition Math.hh:167
constexpr int mod_floor(int dividend, int divisor)
Definition Math.hh:200
constexpr double ln2
Definition Math.hh:22
constexpr double ln10
Definition Math.hh:23
int16_t clipToInt16(T x)
Clip x to range [-32768,32767].
Definition Math.hh:47
constexpr uint8_t reverseByte(uint8_t a)
Reverse the bits in a byte.
Definition Math.hh:125
constexpr double e
Definition Math.hh:21
Divide one integer by another, rounding towards minus infinity.
Definition Math.hh:184