1/*
2 * Copyright (c) 2008-2016 Stefan Krah. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28
29#ifndef TYPEARITH_H
30#define TYPEARITH_H
31
32
33#include "mpdecimal.h"
34
35
36/*****************************************************************************/
37/*                 Low level native arithmetic on basic types                */
38/*****************************************************************************/
39
40
41/** ------------------------------------------------------------
42 **           Double width multiplication and division
43 ** ------------------------------------------------------------
44 */
45
46#if defined(CONFIG_64)
47#if defined(ANSI)
48#if defined(HAVE_UINT128_T)
49static inline void
50_mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
51{
52    __uint128_t hl;
53
54    hl = (__uint128_t)a * b;
55
56    *hi = hl >> 64;
57    *lo = (mpd_uint_t)hl;
58}
59
60static inline void
61_mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo,
62               mpd_uint_t d)
63{
64    __uint128_t hl;
65
66    hl = ((__uint128_t)hi<<64) + lo;
67    *q = (mpd_uint_t)(hl / d); /* quotient is known to fit */
68    *r = (mpd_uint_t)(hl - (__uint128_t)(*q) * d);
69}
70#else
71static inline void
72_mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
73{
74    uint32_t w[4], carry;
75    uint32_t ah, al, bh, bl;
76    uint64_t hl;
77
78    ah = (uint32_t)(a>>32); al = (uint32_t)a;
79    bh = (uint32_t)(b>>32); bl = (uint32_t)b;
80
81    hl = (uint64_t)al * bl;
82    w[0] = (uint32_t)hl;
83    carry = (uint32_t)(hl>>32);
84
85    hl = (uint64_t)ah * bl + carry;
86    w[1] = (uint32_t)hl;
87    w[2] = (uint32_t)(hl>>32);
88
89    hl = (uint64_t)al * bh + w[1];
90    w[1] = (uint32_t)hl;
91    carry = (uint32_t)(hl>>32);
92
93    hl = ((uint64_t)ah * bh + w[2]) + carry;
94    w[2] = (uint32_t)hl;
95    w[3] = (uint32_t)(hl>>32);
96
97    *hi = ((uint64_t)w[3]<<32) + w[2];
98    *lo = ((uint64_t)w[1]<<32) + w[0];
99}
100
101/*
102 * By Henry S. Warren: http://www.hackersdelight.org/HDcode/divlu.c.txt
103 * http://www.hackersdelight.org/permissions.htm:
104 * "You are free to use, copy, and distribute any of the code on this web
105 *  site, whether modified by you or not. You need not give attribution."
106 *
107 * Slightly modified, comments are mine.
108 */
109static inline int
110nlz(uint64_t x)
111{
112    int n;
113
114    if (x == 0) return(64);
115
116    n = 0;
117    if (x <= 0x00000000FFFFFFFF) {n = n +32; x = x <<32;}
118    if (x <= 0x0000FFFFFFFFFFFF) {n = n +16; x = x <<16;}
119    if (x <= 0x00FFFFFFFFFFFFFF) {n = n + 8; x = x << 8;}
120    if (x <= 0x0FFFFFFFFFFFFFFF) {n = n + 4; x = x << 4;}
121    if (x <= 0x3FFFFFFFFFFFFFFF) {n = n + 2; x = x << 2;}
122    if (x <= 0x7FFFFFFFFFFFFFFF) {n = n + 1;}
123
124    return n;
125}
126
127static inline void
128_mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t u1, mpd_uint_t u0,
129               mpd_uint_t v)
130{
131    const mpd_uint_t b = 4294967296;
132    mpd_uint_t un1, un0,
133               vn1, vn0,
134               q1, q0,
135               un32, un21, un10,
136               rhat, t;
137    int s;
138
139    assert(u1 < v);
140
141    s = nlz(v);
142    v = v << s;
143    vn1 = v >> 32;
144    vn0 = v & 0xFFFFFFFF;
145
146    t = (s == 0) ? 0 : u0 >> (64 - s);
147    un32 = (u1 << s) | t;
148    un10 = u0 << s;
149
150    un1 = un10 >> 32;
151    un0 = un10 & 0xFFFFFFFF;
152
153    q1 = un32 / vn1;
154    rhat = un32 - q1*vn1;
155again1:
156    if (q1 >= b || q1*vn0 > b*rhat + un1) {
157        q1 = q1 - 1;
158        rhat = rhat + vn1;
159        if (rhat < b) goto again1;
160    }
161
162    /*
163     *  Before again1 we had:
164     *      (1) q1*vn1   + rhat         = un32
165     *      (2) q1*vn1*b + rhat*b + un1 = un32*b + un1
166     *
167     *  The statements inside the if-clause do not change the value
168     *  of the left-hand side of (2), and the loop is only exited
169     *  if q1*vn0 <= rhat*b + un1, so:
170     *
171     *      (3) q1*vn1*b + q1*vn0 <= un32*b + un1
172     *      (4)              q1*v <= un32*b + un1
173     *      (5)                 0 <= un32*b + un1 - q1*v
174     *
175     *  By (5) we are certain that the possible add-back step from
176     *  Knuth's algorithm D is never required.
177     *
178     *  Since the final quotient is less than 2**64, the following
179     *  must be true:
180     *
181     *      (6) un32*b + un1 - q1*v <= UINT64_MAX
182     *
183     *  This means that in the following line, the high words
184     *  of un32*b and q1*v can be discarded without any effect
185     *  on the result.
186     */
187    un21 = un32*b + un1 - q1*v;
188
189    q0 = un21 / vn1;
190    rhat = un21 - q0*vn1;
191again2:
192    if (q0 >= b || q0*vn0 > b*rhat + un0) {
193        q0 = q0 - 1;
194        rhat = rhat + vn1;
195        if (rhat < b) goto again2;
196    }
197
198    *q = q1*b + q0;
199    *r = (un21*b + un0 - q0*v) >> s;
200}
201#endif
202
203/* END ANSI */
204#elif defined(ASM)
205static inline void
206_mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
207{
208    mpd_uint_t h, l;
209
210    __asm__ ( "mulq %3\n\t"
211              : "=d" (h), "=a" (l)
212              : "%a" (a), "rm" (b)
213              : "cc"
214    );
215
216    *hi = h;
217    *lo = l;
218}
219
220static inline void
221_mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo,
222               mpd_uint_t d)
223{
224    mpd_uint_t qq, rr;
225
226    __asm__ ( "divq %4\n\t"
227              : "=a" (qq), "=d" (rr)
228              : "a" (lo), "d" (hi), "rm" (d)
229              : "cc"
230    );
231
232    *q = qq;
233    *r = rr;
234}
235/* END GCC ASM */
236#elif defined(MASM)
237#include <intrin.h>
238#pragma intrinsic(_umul128)
239
240static inline void
241_mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
242{
243    *lo = _umul128(a, b, hi);
244}
245
246void _mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo,
247                    mpd_uint_t d);
248
249/* END MASM (_MSC_VER) */
250#else
251  #error "need platform specific 128 bit multiplication and division"
252#endif
253
254#define DIVMOD(q, r, v, d) *q = v / d; *r = v - *q * d
255static inline void
256_mpd_divmod_pow10(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t v, mpd_uint_t exp)
257{
258    assert(exp <= 19);
259
260    if (exp <= 9) {
261        if (exp <= 4) {
262            switch (exp) {
263            case 0: *q = v; *r = 0; break;
264            case 1: DIVMOD(q, r, v, 10UL); break;
265            case 2: DIVMOD(q, r, v, 100UL); break;
266            case 3: DIVMOD(q, r, v, 1000UL); break;
267            case 4: DIVMOD(q, r, v, 10000UL); break;
268            }
269        }
270        else {
271            switch (exp) {
272            case 5: DIVMOD(q, r, v, 100000UL); break;
273            case 6: DIVMOD(q, r, v, 1000000UL); break;
274            case 7: DIVMOD(q, r, v, 10000000UL); break;
275            case 8: DIVMOD(q, r, v, 100000000UL); break;
276            case 9: DIVMOD(q, r, v, 1000000000UL); break;
277            }
278        }
279    }
280    else {
281        if (exp <= 14) {
282            switch (exp) {
283            case 10: DIVMOD(q, r, v, 10000000000ULL); break;
284            case 11: DIVMOD(q, r, v, 100000000000ULL); break;
285            case 12: DIVMOD(q, r, v, 1000000000000ULL); break;
286            case 13: DIVMOD(q, r, v, 10000000000000ULL); break;
287            case 14: DIVMOD(q, r, v, 100000000000000ULL); break;
288            }
289        }
290        else {
291            switch (exp) {
292            case 15: DIVMOD(q, r, v, 1000000000000000ULL); break;
293            case 16: DIVMOD(q, r, v, 10000000000000000ULL); break;
294            case 17: DIVMOD(q, r, v, 100000000000000000ULL); break;
295            case 18: DIVMOD(q, r, v, 1000000000000000000ULL); break;
296            case 19: DIVMOD(q, r, v, 10000000000000000000ULL); break; /* GCOV_NOT_REACHED */
297            }
298        }
299    }
300}
301
302/* END CONFIG_64 */
303#elif defined(CONFIG_32)
304#if defined(ANSI)
305#if !defined(LEGACY_COMPILER)
306static inline void
307_mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
308{
309    mpd_uuint_t hl;
310
311    hl = (mpd_uuint_t)a * b;
312
313    *hi = hl >> 32;
314    *lo = (mpd_uint_t)hl;
315}
316
317static inline void
318_mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo,
319               mpd_uint_t d)
320{
321    mpd_uuint_t hl;
322
323    hl = ((mpd_uuint_t)hi<<32) + lo;
324    *q = (mpd_uint_t)(hl / d); /* quotient is known to fit */
325    *r = (mpd_uint_t)(hl - (mpd_uuint_t)(*q) * d);
326}
327/* END ANSI + uint64_t */
328#else
329static inline void
330_mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
331{
332    uint16_t w[4], carry;
333    uint16_t ah, al, bh, bl;
334    uint32_t hl;
335
336    ah = (uint16_t)(a>>16); al = (uint16_t)a;
337    bh = (uint16_t)(b>>16); bl = (uint16_t)b;
338
339    hl = (uint32_t)al * bl;
340    w[0] = (uint16_t)hl;
341    carry = (uint16_t)(hl>>16);
342
343    hl = (uint32_t)ah * bl + carry;
344    w[1] = (uint16_t)hl;
345    w[2] = (uint16_t)(hl>>16);
346
347    hl = (uint32_t)al * bh + w[1];
348    w[1] = (uint16_t)hl;
349    carry = (uint16_t)(hl>>16);
350
351    hl = ((uint32_t)ah * bh + w[2]) + carry;
352    w[2] = (uint16_t)hl;
353    w[3] = (uint16_t)(hl>>16);
354
355    *hi = ((uint32_t)w[3]<<16) + w[2];
356    *lo = ((uint32_t)w[1]<<16) + w[0];
357}
358
359/*
360 * By Henry S. Warren: http://www.hackersdelight.org/HDcode/divlu.c.txt
361 * http://www.hackersdelight.org/permissions.htm:
362 * "You are free to use, copy, and distribute any of the code on this web
363 *  site, whether modified by you or not. You need not give attribution."
364 *
365 * Slightly modified, comments are mine.
366 */
367static inline int
368nlz(uint32_t x)
369{
370    int n;
371
372    if (x == 0) return(32);
373
374    n = 0;
375    if (x <= 0x0000FFFF) {n = n +16; x = x <<16;}
376    if (x <= 0x00FFFFFF) {n = n + 8; x = x << 8;}
377    if (x <= 0x0FFFFFFF) {n = n + 4; x = x << 4;}
378    if (x <= 0x3FFFFFFF) {n = n + 2; x = x << 2;}
379    if (x <= 0x7FFFFFFF) {n = n + 1;}
380
381    return n;
382}
383
384static inline void
385_mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t u1, mpd_uint_t u0,
386               mpd_uint_t v)
387{
388    const mpd_uint_t b = 65536;
389    mpd_uint_t un1, un0,
390               vn1, vn0,
391               q1, q0,
392               un32, un21, un10,
393               rhat, t;
394    int s;
395
396    assert(u1 < v);
397
398    s = nlz(v);
399    v = v << s;
400    vn1 = v >> 16;
401    vn0 = v & 0xFFFF;
402
403    t = (s == 0) ? 0 : u0 >> (32 - s);
404    un32 = (u1 << s) | t;
405    un10 = u0 << s;
406
407    un1 = un10 >> 16;
408    un0 = un10 & 0xFFFF;
409
410    q1 = un32 / vn1;
411    rhat = un32 - q1*vn1;
412again1:
413    if (q1 >= b || q1*vn0 > b*rhat + un1) {
414        q1 = q1 - 1;
415        rhat = rhat + vn1;
416        if (rhat < b) goto again1;
417    }
418
419    /*
420     *  Before again1 we had:
421     *      (1) q1*vn1   + rhat         = un32
422     *      (2) q1*vn1*b + rhat*b + un1 = un32*b + un1
423     *
424     *  The statements inside the if-clause do not change the value
425     *  of the left-hand side of (2), and the loop is only exited
426     *  if q1*vn0 <= rhat*b + un1, so:
427     *
428     *      (3) q1*vn1*b + q1*vn0 <= un32*b + un1
429     *      (4)              q1*v <= un32*b + un1
430     *      (5)                 0 <= un32*b + un1 - q1*v
431     *
432     *  By (5) we are certain that the possible add-back step from
433     *  Knuth's algorithm D is never required.
434     *
435     *  Since the final quotient is less than 2**32, the following
436     *  must be true:
437     *
438     *      (6) un32*b + un1 - q1*v <= UINT32_MAX
439     *
440     *  This means that in the following line, the high words
441     *  of un32*b and q1*v can be discarded without any effect
442     *  on the result.
443     */
444    un21 = un32*b + un1 - q1*v;
445
446    q0 = un21 / vn1;
447    rhat = un21 - q0*vn1;
448again2:
449    if (q0 >= b || q0*vn0 > b*rhat + un0) {
450        q0 = q0 - 1;
451        rhat = rhat + vn1;
452        if (rhat < b) goto again2;
453    }
454
455    *q = q1*b + q0;
456    *r = (un21*b + un0 - q0*v) >> s;
457}
458#endif /* END ANSI + LEGACY_COMPILER */
459
460/* END ANSI */
461#elif defined(ASM)
462static inline void
463_mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
464{
465    mpd_uint_t h, l;
466
467    __asm__ ( "mull %3\n\t"
468              : "=d" (h), "=a" (l)
469              : "%a" (a), "rm" (b)
470              : "cc"
471    );
472
473    *hi = h;
474    *lo = l;
475}
476
477static inline void
478_mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo,
479               mpd_uint_t d)
480{
481    mpd_uint_t qq, rr;
482
483    __asm__ ( "divl %4\n\t"
484              : "=a" (qq), "=d" (rr)
485              : "a" (lo), "d" (hi), "rm" (d)
486              : "cc"
487    );
488
489    *q = qq;
490    *r = rr;
491}
492/* END GCC ASM */
493#elif defined(MASM)
494static inline void __cdecl
495_mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b)
496{
497    mpd_uint_t h, l;
498
499    __asm {
500        mov eax, a
501        mul b
502        mov h, edx
503        mov l, eax
504    }
505
506    *hi = h;
507    *lo = l;
508}
509
510static inline void __cdecl
511_mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo,
512               mpd_uint_t d)
513{
514    mpd_uint_t qq, rr;
515
516    __asm {
517        mov eax, lo
518        mov edx, hi
519        div d
520        mov qq, eax
521        mov rr, edx
522    }
523
524    *q = qq;
525    *r = rr;
526}
527/* END MASM (_MSC_VER) */
528#else
529  #error "need platform specific 64 bit multiplication and division"
530#endif
531
532#define DIVMOD(q, r, v, d) *q = v / d; *r = v - *q * d
533static inline void
534_mpd_divmod_pow10(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t v, mpd_uint_t exp)
535{
536    assert(exp <= 9);
537
538    if (exp <= 4) {
539        switch (exp) {
540        case 0: *q = v; *r = 0; break;
541        case 1: DIVMOD(q, r, v, 10UL); break;
542        case 2: DIVMOD(q, r, v, 100UL); break;
543        case 3: DIVMOD(q, r, v, 1000UL); break;
544        case 4: DIVMOD(q, r, v, 10000UL); break;
545        }
546    }
547    else {
548        switch (exp) {
549        case 5: DIVMOD(q, r, v, 100000UL); break;
550        case 6: DIVMOD(q, r, v, 1000000UL); break;
551        case 7: DIVMOD(q, r, v, 10000000UL); break;
552        case 8: DIVMOD(q, r, v, 100000000UL); break;
553        case 9: DIVMOD(q, r, v, 1000000000UL); break; /* GCOV_NOT_REACHED */
554        }
555    }
556}
557/* END CONFIG_32 */
558
559/* NO CONFIG */
560#else
561  #error "define CONFIG_64 or CONFIG_32"
562#endif /* CONFIG */
563
564
565static inline void
566_mpd_div_word(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t v, mpd_uint_t d)
567{
568    *q = v / d;
569    *r = v - *q * d;
570}
571
572static inline void
573_mpd_idiv_word(mpd_ssize_t *q, mpd_ssize_t *r, mpd_ssize_t v, mpd_ssize_t d)
574{
575    *q = v / d;
576    *r = v - *q * d;
577}
578
579
580/** ------------------------------------------------------------
581 **              Arithmetic with overflow checking
582 ** ------------------------------------------------------------
583 */
584
585/* The following macros do call exit() in case of an overflow.
586   If the library is used correctly (i.e. with valid context
587   parameters), such overflows cannot occur. The macros are used
588   as sanity checks in a couple of strategic places and should
589   be viewed as a handwritten version of gcc's -ftrapv option. */
590
591static inline mpd_size_t
592add_size_t(mpd_size_t a, mpd_size_t b)
593{
594    if (a > MPD_SIZE_MAX - b) {
595        mpd_err_fatal("add_size_t(): overflow: check the context"); /* GCOV_NOT_REACHED */
596    }
597    return a + b;
598}
599
600static inline mpd_size_t
601sub_size_t(mpd_size_t a, mpd_size_t b)
602{
603    if (b > a) {
604        mpd_err_fatal("sub_size_t(): overflow: check the context"); /* GCOV_NOT_REACHED */
605    }
606    return a - b;
607}
608
609#if MPD_SIZE_MAX != MPD_UINT_MAX
610  #error "adapt mul_size_t() and mulmod_size_t()"
611#endif
612
613static inline mpd_size_t
614mul_size_t(mpd_size_t a, mpd_size_t b)
615{
616    mpd_uint_t hi, lo;
617
618    _mpd_mul_words(&hi, &lo, (mpd_uint_t)a, (mpd_uint_t)b);
619    if (hi) {
620        mpd_err_fatal("mul_size_t(): overflow: check the context"); /* GCOV_NOT_REACHED */
621    }
622    return lo;
623}
624
625static inline mpd_size_t
626add_size_t_overflow(mpd_size_t a, mpd_size_t b, mpd_size_t *overflow)
627{
628    mpd_size_t ret;
629
630    *overflow = 0;
631    ret = a + b;
632    if (ret < a) *overflow = 1;
633    return ret;
634}
635
636static inline mpd_size_t
637mul_size_t_overflow(mpd_size_t a, mpd_size_t b, mpd_size_t *overflow)
638{
639    mpd_uint_t lo;
640
641    _mpd_mul_words((mpd_uint_t *)overflow, &lo, (mpd_uint_t)a,
642                   (mpd_uint_t)b);
643    return lo;
644}
645
646static inline mpd_ssize_t
647mod_mpd_ssize_t(mpd_ssize_t a, mpd_ssize_t m)
648{
649    mpd_ssize_t r = a % m;
650    return (r < 0) ? r + m : r;
651}
652
653static inline mpd_size_t
654mulmod_size_t(mpd_size_t a, mpd_size_t b, mpd_size_t m)
655{
656    mpd_uint_t hi, lo;
657    mpd_uint_t q, r;
658
659    _mpd_mul_words(&hi, &lo, (mpd_uint_t)a, (mpd_uint_t)b);
660    _mpd_div_words(&q, &r, hi, lo, (mpd_uint_t)m);
661
662    return r;
663}
664
665
666#endif /* TYPEARITH_H */
667
668
669
670