20 namespace DivModByConstPrivate {
22 template<u
int32_t A, u
int32_t R = 0>
struct log2
23 :
if_c<A == 0, std::integral_constant<int, R>, log2<A / 2, R + 1>> {};
26 template<u
int64_t RH, u
int64_t RL, u
int64_t QH, u
int64_t QL, u
int64_t DH, u
int64_t DL, u
int32_t BITS>
29 static const uint64_t
QL2 = (QL << 1);
30 static const uint64_t
QH2 = (QH << 1) + (
QL2 < QL);
31 static const uint64_t
RL2 = (RL << 1) + (
QH2 < QH);
32 static const uint64_t
RH2 = (RH << 1) + (
RL2 < RL);
34 static const bool C = (
RH2 != DH) ? (
RH2 < DH) : (
RL2 < DL);
46 template<u
int64_t RH, u
int64_t RL, u
int64_t QH, u
int64_t QL, u
int64_t DH, u
int64_t DL>
54 template<u
int64_t Div
idendH, u
int64_t Div
idendL, u
int64_t Div
iderH, u
int64_t Div
iderL>
56 :
Div128_helper<0, 0, DividendH, DividendL, DividerH, DividerL, 128> {};
64 template<u
int64_t M, u
int32_t S,
bool B = M & 1>
struct DBCReduce
70 template<u
int64_t M, u
int32_t S>
struct DBCReduce<M, S, true>
72 static const uint64_t
M2 = M;
73 static const uint32_t
S2 = S;
82 template<u
int64_t AH, u
int64_t AL, u
int64_t BH, u
int64_t BL>
85 static const uint64_t
AH2 = AH / 2;
86 static const uint64_t
AL2 = AL / 2 + ((
AH2 * 2 != AH) ? (1ull << 63) : 0);
87 static const uint64_t
BH2 = BH / 2;
88 static const uint64_t
BL2 = BL / 2 + ((
BH2 * 2 != BH) ? (1ull << 63) : 0);
90 template<u
int64_t AH, u
int64_t AL, u
int64_t BH, u
int64_t BL, u
int32_t L>
96 static const bool value =
C && (L > 0);
98 template<u
int64_t AH, u
int64_t AL, u
int64_t BH, u
int64_t BL, u
int32_t LL,
bool B>
110 template<u
int64_t AH, u
int64_t AL, u
int64_t BH, u
int64_t BL, u
int32_t LL>
113 static const uint64_t
MLH = AH;
114 static const uint64_t
MLL = AL;
115 static const uint64_t
MHH = BH;
116 static const uint64_t
MHL = BL;
117 static const uint32_t
L = LL;
119 template<u
int64_t AH, u
int64_t AL, u
int64_t BH, u
int64_t BL, u
int32_t LL>
137 return dividend >> S;
141 static inline uint64_t mla64(uint64_t a, uint64_t b, uint64_t c)
145 uint64_t t1 = uint64_t(uint32_t(a)) * uint32_t(b);
146 uint64_t t2 = (a >> 32) * uint32_t(b);
147 uint64_t t3 = uint32_t(a) * (b >> 32);
148 uint64_t t4 = (a >> 32) * (b >> 32);
150 uint64_t s1 = uint64_t(uint32_t(c)) + uint32_t(t1);
151 uint64_t s2 = (s1 >> 32) + (c >> 32) + (t1 >> 32) + t2;
152 uint64_t s3 = uint64_t(uint32_t(s2)) + uint32_t(t3);
153 uint64_t s4 = (s3 >> 32) + (s2 >> 32) + (t3 >> 32) + t4;
163 #if ASM_X86_32 || defined(__arm__)
164 const uint32_t _ah_ = R::M2 >> 32;
165 const uint32_t _al_ = uint32_t((R::M2 << 32) >> 32);
166 const uint32_t _bh_ = dividend >> 32;
167 const uint32_t _bl_ = uint32_t(dividend);
172 register uint32_t result;
207 mov cl,
byte ptr [R::S2]
212 uint32_t realResult = uint32_t(mla64(dividend, R::M2, 0) >> R::S2);
213 assert(realResult == result);
217 uint32_t th, tl, ch, cl;
219 "movl %[AH],%%eax\n\t"
221 "movl %%eax,%[TL]\n\t"
222 "movl %%edx,%[TH]\n\t"
223 "movl %[AL],%%eax\n\t"
225 "addl %%edx,%[TL]\n\t"
228 "movl %[AH],%%eax\n\t"
230 "movl %%eax,%[CL]\n\t"
231 "movl %%edx,%[CH]\n\t"
232 "movl %[AL],%%eax\n\t"
234 "addl %%eax,%[TL]\n\t"
235 "adcl %%edx,%[TH]\n\t"
237 "addl %[TH],%[CL]\n\t"
251 "shrd %[SH],%[CH],%[CL]\n\t"
260 #elif defined(__arm__)
264 "umull %[TH],%[TL],%[AL],%[BL]\n\t"
265 "eors %[TH],%[TH]\n\t"
266 "umlal %[TL],%[TH],%[AH],%[BL]\n\t"
268 "umull %[BL],%[AL],%[BH],%[AL]\n\t"
269 "adds %[TL],%[TL],%[BL]\n\t"
270 "adcs %[TH],%[TH],%[AL]\n\t"
272 "adc %[TL],%[TL],%[TL]\n\t"
273 "umlal %[TH],%[TL],%[AH],%[BH]\n\t"
275 "lsr %[RES],%[TH],%[S]\n\t"
277 "lsls %[TL],%[TL],%[S32]\n\t"
278 "orrs %[RES],%[RES],%[TL]\n\t"
285 , [BL]
"[RES]" (_bl_)
287 , [S32]
"M" (32 - R::S2)
291 uint64_t h = mla64(dividend, R::M2, 0);
292 uint64_t result = h >> R::S2;
295 assert(result == uint32_t(result));
297 return uint32_t(result);
302 template<u
int32_t DIVISOR, u
int32_t N>
struct DBCAlgo3
312 #if ASM_X86_32 || defined(__arm__)
313 const uint32_t ah = R::M2 >> 32;
314 const uint32_t al = uint32_t(R::M2);
315 const uint32_t bh = dividend >> 32;
316 const uint32_t bl = dividend;
319 uint32_t th, tl, ch, cl;
321 "mov %[AH],%%eax\n\t"
323 "mov %%eax,%[TL]\n\t"
324 "mov %%edx,%[TH]\n\t"
325 "mov %[AL],%%eax\n\t"
327 "add %[AL],%%eax\n\t"
328 "adc %[AH],%%edx\n\t"
330 "add %%edx,%[TL]\n\t"
333 "mov %[AH],%%eax\n\t"
335 "mov %%eax,%[CL]\n\t"
336 "mov %%edx,%[CH]\n\t"
337 "mov %[AL],%%eax\n\t"
339 "add %%eax,%[TL]\n\t"
340 "adc %%edx,%[TH]\n\t"
342 "add %[TH],%[CL]\n\t"
356 "shrd %[SH],%[CH],%[CL]\n\t"
365 #elif defined(__arm__)
369 "umull %[TH],%[TL],%[AL],%[BL]\n\t"
370 "adds %[TH],%[TH],%[AL]\n\t"
371 "adcs %[TL],%[TL],%[AH]\n\t"
373 "adc %[TH],%[TH],%[TH]\n\t"
374 "umlal %[TL],%[TH],%[AH],%[BL]\n\t"
376 "umull %[BL],%[AL],%[BH],%[AL]\n\t"
377 "adds %[TL],%[TL],%[BL]\n\t"
378 "adcs %[TH],%[TH],%[AL]\n\t"
380 "adc %[TL],%[TL],%[TL]\n\t"
381 "umlal %[TH],%[TL],%[AH],%[BH]\n\t"
383 "lsr %[RES],%[TH],%[S]\n\t"
385 "lsls %[TL],%[TL],%[S32]\n\t"
386 "orrs %[RES],%[RES],%[TL]\n\t"
395 , [S32]
"M" (32 - R::S2)
399 uint64_t h = mla64(dividend, R::M2, R::M2);
406 template<u
int32_t DIVISOR, u
int32_t N,
typename RM>
struct DBCHelper3
407 :
if_c<RM::MHH == 0, DBCAlgo2<RM::MHL, N + RM::L>
408 , DBCAlgo3<DIVISOR, N>> {};
413 static const uint64_t
J = 0xffffffffffffffffull % DIVISOR;
414 typedef Div128<1 <<
L, 0, 0, 0xffffffffffffffffull -
J>
K;
424 return dbc(dividend);
429 :
if_c<DIVISOR == 1, DBCAlgo1<SHIFT>,
430 if_c<DIVISOR & 1, DBCHelper2<DIVISOR, SHIFT>
431 , DBCHelper1<DIVISOR / 2, SHIFT + 1>>> {};
438 uint32_t
div(uint64_t dividend)
const
443 return uint32_t(dividend / DIVISOR);
446 return dbc(dividend);
450 uint32_t
mod(uint64_t dividend)
const
454 result = dividend % DIVISOR;
456 result = dividend - DIVISOR *
div(dividend);
460 assert(result == uint32_t(result));
462 return uint32_t(result);
466 #endif // DIVMODBYCONST