openMSX
Math.cc
Go to the documentation of this file.
1 #include "Math.hh"
2 #include <cstdint>
3 #include <cstdlib>
4 
5 #if defined _MSC_VER && defined __x86_64
6 #include <emmintrin.h>
7 #endif
8 
9 namespace openmsx {
10 
11 #ifdef _MSC_VER
12 
13 // Poor man's implementations to get openmsx building with VC++
14 
15 #ifdef __x86_64
16 
17 // What follows below are C++ implementations of several C99 math functions missing
18 // from VC++'s CRT as of 2008. These implementations using SSE/SSE2 intrinsics
19 // for floating point operations. The (generally safe) assumption is that those
20 // instructions are always available on CPUs capable of running 64-bit Windows.
21 //
22 // The assembler in Visual Studio (ml64.exe) no longer allows inline assembly,
23 // so for ease of reading we've opted to use these intrinsic-based functions instead
24 // of using the hand-written versions in a separate .asm file. The resulting
25 // optimized code is pretty close to the hand-written code.
26 
27 // gcc-generated data section
28 //;.LC0:
29 //; .long 0
30 //; .long 1127219200
31 //; .section .rodata.cst16,"aM",@progbits,16
32 //; .align 16
33 //;.LC1:
34 //; .long 4294967295
35 //; .long 2147483647
36 //; .long 0
37 //; .long 0
38 //; .section .rodata.cst8
39 //; .align 8
40 //;.LC2:
41 //; .long 4294967295
42 //; .long 1071644671
43 //; .section .rodata.cst4,"aM",@progbits,4
44 //; .align 4
45 //;.LC3:
46 //; .long 1258291200
47 //; .section .rodata.cst16
48 //; .align 16
49 //;.LC4:
50 //; .long 2147483647
51 //; .long 0
52 //; .long 0
53 //; .long 0
54 //; .ident "GCC: (GNU) 4.4.0 20090130 (experimental)"
55 //; .section .note.GNU-stack,"",@progbits
56 // ML64 hand-written data section:
57 //LC0 QWORD 4330000000000000h
58 //LC1 QWORD 7FFFFFFFFFFFFFFFh
59 //LC2 QWORD 3FDFFFFFFFFFFFFFh
60 //LC3 DWORD 4B000000h
61 //LC4 DWORD 7FFFFFFFh
62 
63 // lrint(): round double according to current floating-point rounding direction
64 // gcc-generated:
65 //; cvtsd2siq %xmm0, %rax
66 //; ret
67 // ML64 hand-written:
68 //LEAF_ENTRY lrinta, Math
69 // cvtsd2si rax, xmm0
70 // ret
71 //LEAF_END lrinta, Math
72 long lrint(double x)
73 {
74  return _mm_cvtsd_si32(_mm_load_sd(&x));
75 }
76 
77 // lrintf(): round float according to current floating-point rounding direction
78 // gcc-generated:
79 //; cvtss2siq %xmm0, %rax
80 //; ret
81 // ML64 hand-written:
82 //LEAF_ENTRY lrintf, Math
83 // cvtss2si rax, xmm0
84 // ret
85 //LEAF_END lrintf, Math
86 long lrintf(float x)
87 {
88  return _mm_cvtss_si32(_mm_load_ss(&x));
89 }
90 
91 // truncf(): round float to the nearest integer not larger in absolute value
92 // gcc-generated:
93 //; movss .LC4(%rip), %xmm1
94 //; movaps %xmm0, %xmm2
95 //; andps %xmm1, %xmm2
96 //; comiss .LC3(%rip), %xmm2
97 //; jae .L6
98 //; cvttss2si %xmm0, %eax
99 //; cvtsi2ss %eax, %xmm0
100 //;.L6:
101 //; rep
102 //; ret
103 // ML64 hand-written:
104 //LEAF_ENTRY truncf, Math
105 // movss xmm1, LC4
106 // movaps xmm2, xmm0
107 // andps xmm2, xmm1
108 // comiss xmm2, LC3
109 // jae L6
110 // cvttss2si eax, xmm0
111 // cvtsi2ss xmm0, eax
112 //L6:
113 // ret
114 //LEAF_END truncf, Math
115 float truncf(float x)
116 {
117  int64_t tempi = _mm_cvttss_si64(_mm_load_ss(&x));
118  __m128 xmmx = _mm_cvtsi64_ss(_mm_setzero_ps(), tempi);
119 
120  float ret;
121  _mm_store_ss(&ret, xmmx);
122  return ret;
123 }
124 
125 // round(): round double to nearest integer, away from zero
126 // gcc-generated:
127 //; movsd .LC1(%rip), %xmm1
128 //; movapd %xmm0, %xmm2
129 //; andpd %xmm1, %xmm2
130 //; comisd .LC0(%rip), %xmm2
131 //; jae .L2
132 //; addsd .LC2(%rip), %xmm2
133 //; andnpd %xmm0, %xmm1
134 //; cvttsd2siq %xmm2, %rax
135 //; cvtsi2sdq %rax, %xmm2
136 //; movapd %xmm2, %xmm0
137 //; orpd %xmm1, %xmm0
138 //;.L2:
139 //; rep
140 //; ret
141 // ML64 hand-written:
142 //LEAF_ENTRY round, Math
143 // movsd xmm1, LC1
144 // movapd xmm2, xmm0
145 // andpd xmm2, xmm1
146 // comisd xmm2, LC0
147 // jae L2
148 // addsd xmm2, LC2
149 // andnpd xmm1, xmm0
150 // cvttsd2si rax, xmm2
151 // cvtsi2sd xmm2, rax
152 // movapd xmm0, xmm2
153 // orpd xmm0, xmm1
154 //L2:
155 // ret
156 //LEAF_END round, Math
157 double round(double x)
158 {
159  const double half = 0.5;
160  __m128d xmmhalf = _mm_load_sd(&half);
161  __m128d xmmx = _mm_load_sd(&x);
162 
163  // Add + or -0.5 depending on sign...
164  if (_mm_comige_sd(xmmx, _mm_setzero_pd())) {
165  xmmx = _mm_add_sd(xmmx, xmmhalf);
166  } else {
167  xmmx = _mm_sub_sd(xmmx, xmmhalf);
168  }
169 
170  // ... then round towards zero
171  int64_t tempi64 = _mm_cvttsd_si64(xmmx);
172  xmmx = _mm_cvtsi64_sd(_mm_setzero_pd(), tempi64);
173 
174  double ret;
175  _mm_store_sd(&ret, xmmx);
176  return ret;
177 }
178 
179 #else
180 
181 // lrint(): round double according to current floating-point rounding direction
182 long lrint(double x)
183 {
184  long retval;
185  __asm {
186  fld qword ptr [x]
187  fistp dword ptr [retval]
188  }
189  return retval;
190 }
191 
192 // lrint(): round float according to current floating-point rounding direction
193 long lrintf(float x)
194 {
195  long retval;
196  __asm {
197  fld dword ptr [x]
198  fistp dword ptr [retval]
199  }
200  return retval;
201 }
202 
203 // trunc(): round double to the nearest integer not larger in absolute value
204 double trunc(double x)
205 {
206  short ctrl1, ctrl2;
207  double retval;
208  __asm {
209  fnstcw word ptr [ctrl1]
210  movzx eax,word ptr [ctrl1]
211  or ah,0Ch
212  mov word ptr [ctrl2],ax
213  fldcw word ptr [ctrl2]
214  fld qword ptr [x]
215  frndint
216  fldcw word ptr [ctrl1]
217  fstp qword ptr [retval]
218  }
219  return retval;
220 }
221 
222 // truncf(): round float to the nearest integer not larger in absolute value
223 float truncf(float x)
224 {
225  short ctrl1, ctrl2;
226  float retval;
227  __asm {
228  fnstcw word ptr [ctrl1]
229  movzx eax,word ptr [ctrl1]
230  or ah,0Ch
231  mov word ptr [ctrl2],ax
232  fldcw word ptr [ctrl2]
233  fld dword ptr[x]
234  frndint
235  fldcw word ptr [ctrl1]
236  fstp dword ptr [retval]
237  }
238  return retval;
239 }
240 
241 // round(): round double to nearest integer, away from zero
242 double round(double x)
243 {
244  return (x > 0) ? trunc(x + 0.5) : trunc(x - 0.5);
245 }
246 
247 #endif // __x86_64
248 #endif // _MSC_VER
249 
250 namespace Math {
251 
252 unsigned powerOfTwo(unsigned a)
253 {
254  // classical implementation:
255  // unsigned res = 1;
256  // while (a > res) res <<= 1;
257  // return res;
258 
259  // optimized version
260  a += (a == 0); // can be removed if argument is never zero
261  return floodRight(a - 1) + 1;
262 }
263 
264 void gaussian2(double& r1, double& r2)
265 {
266  static const double S = 2.0 / RAND_MAX;
267  double x1, x2, w;
268  do {
269  x1 = S * rand() - 1.0;
270  x2 = S * rand() - 1.0;
271  w = x1 * x1 + x2 * x2;
272  } while (w >= 1.0);
273  w = sqrt((-2.0 * log(w)) / w);
274  r1 = x1 * w;
275  r2 = x2 * w;
276 }
277 
278 } // namespace Math
279 
280 } // namespace openmsx