1/* ===-- udivmodti4.c - Implement __udivmodti4 -----------------------------===
2 *
3 *                    The LLVM Compiler Infrastructure
4 *
5 * This file is dual licensed under the MIT and the University of Illinois Open
6 * Source Licenses. See LICENSE.TXT for details.
7 *
8 * ===----------------------------------------------------------------------===
9 *
10 * This file implements __udivmodti4 for the compiler_rt library.
11 *
12 * ===----------------------------------------------------------------------===
13 */
14
15#include "int_lib.h"
16
17#ifdef CRT_HAS_128BIT
18
19/* Effects: if rem != 0, *rem = a % b
20 * Returns: a / b
21 */
22
23/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */
24
25COMPILER_RT_ABI tu_int
26__udivmodti4(tu_int a, tu_int b, tu_int* rem)
27{
28    const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
29    const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT;
30    utwords n;
31    n.all = a;
32    utwords d;
33    d.all = b;
34    utwords q;
35    utwords r;
36    unsigned sr;
37    /* special cases, X is unknown, K != 0 */
38    if (n.s.high == 0)
39    {
40        if (d.s.high == 0)
41        {
42            /* 0 X
43             * ---
44             * 0 X
45             */
46            if (rem)
47                *rem = n.s.low % d.s.low;
48            return n.s.low / d.s.low;
49        }
50        /* 0 X
51         * ---
52         * K X
53         */
54        if (rem)
55            *rem = n.s.low;
56        return 0;
57    }
58    /* n.s.high != 0 */
59    if (d.s.low == 0)
60    {
61        if (d.s.high == 0)
62        {
63            /* K X
64             * ---
65             * 0 0
66             */
67            if (rem)
68                *rem = n.s.high % d.s.low;
69            return n.s.high / d.s.low;
70        }
71        /* d.s.high != 0 */
72        if (n.s.low == 0)
73        {
74            /* K 0
75             * ---
76             * K 0
77             */
78            if (rem)
79            {
80                r.s.high = n.s.high % d.s.high;
81                r.s.low = 0;
82                *rem = r.all;
83            }
84            return n.s.high / d.s.high;
85        }
86        /* K K
87         * ---
88         * K 0
89         */
90        if ((d.s.high & (d.s.high - 1)) == 0)     /* if d is a power of 2 */
91        {
92            if (rem)
93            {
94                r.s.low = n.s.low;
95                r.s.high = n.s.high & (d.s.high - 1);
96                *rem = r.all;
97            }
98            return n.s.high >> __builtin_ctzll(d.s.high);
99        }
100        /* K K
101         * ---
102         * K 0
103         */
104        sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high);
105        /* 0 <= sr <= n_udword_bits - 2 or sr large */
106        if (sr > n_udword_bits - 2)
107        {
108           if (rem)
109                *rem = n.all;
110            return 0;
111        }
112        ++sr;
113        /* 1 <= sr <= n_udword_bits - 1 */
114        /* q.all = n.all << (n_utword_bits - sr); */
115        q.s.low = 0;
116        q.s.high = n.s.low << (n_udword_bits - sr);
117        /* r.all = n.all >> sr; */
118        r.s.high = n.s.high >> sr;
119        r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
120    }
121    else  /* d.s.low != 0 */
122    {
123        if (d.s.high == 0)
124        {
125            /* K X
126             * ---
127             * 0 K
128             */
129            if ((d.s.low & (d.s.low - 1)) == 0)     /* if d is a power of 2 */
130            {
131                if (rem)
132                    *rem = n.s.low & (d.s.low - 1);
133                if (d.s.low == 1)
134                    return n.all;
135                sr = __builtin_ctzll(d.s.low);
136                q.s.high = n.s.high >> sr;
137                q.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
138                return q.all;
139            }
140            /* K X
141             * ---
142             * 0 K
143             */
144            sr = 1 + n_udword_bits + __builtin_clzll(d.s.low)
145                                   - __builtin_clzll(n.s.high);
146            /* 2 <= sr <= n_utword_bits - 1
147             * q.all = n.all << (n_utword_bits - sr);
148             * r.all = n.all >> sr;
149             */
150            if (sr == n_udword_bits)
151            {
152                q.s.low = 0;
153                q.s.high = n.s.low;
154                r.s.high = 0;
155                r.s.low = n.s.high;
156            }
157            else if (sr < n_udword_bits)  // 2 <= sr <= n_udword_bits - 1
158            {
159                q.s.low = 0;
160                q.s.high = n.s.low << (n_udword_bits - sr);
161                r.s.high = n.s.high >> sr;
162                r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
163            }
164            else              // n_udword_bits + 1 <= sr <= n_utword_bits - 1
165            {
166                q.s.low = n.s.low << (n_utword_bits - sr);
167                q.s.high = (n.s.high << (n_utword_bits - sr)) |
168                           (n.s.low >> (sr - n_udword_bits));
169                r.s.high = 0;
170                r.s.low = n.s.high >> (sr - n_udword_bits);
171            }
172        }
173        else
174        {
175            /* K X
176             * ---
177             * K K
178             */
179            sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high);
180            /*0 <= sr <= n_udword_bits - 1 or sr large */
181            if (sr > n_udword_bits - 1)
182            {
183               if (rem)
184                    *rem = n.all;
185                return 0;
186            }
187            ++sr;
188            /* 1 <= sr <= n_udword_bits
189             * q.all = n.all << (n_utword_bits - sr);
190             * r.all = n.all >> sr;
191             */
192            q.s.low = 0;
193            if (sr == n_udword_bits)
194            {
195                q.s.high = n.s.low;
196                r.s.high = 0;
197                r.s.low = n.s.high;
198            }
199            else
200            {
201                r.s.high = n.s.high >> sr;
202                r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
203                q.s.high = n.s.low << (n_udword_bits - sr);
204            }
205        }
206    }
207    /* Not a special case
208     * q and r are initialized with:
209     * q.all = n.all << (n_utword_bits - sr);
210     * r.all = n.all >> sr;
211     * 1 <= sr <= n_utword_bits - 1
212     */
213    su_int carry = 0;
214    for (; sr > 0; --sr)
215    {
216        /* r:q = ((r:q)  << 1) | carry */
217        r.s.high = (r.s.high << 1) | (r.s.low  >> (n_udword_bits - 1));
218        r.s.low  = (r.s.low  << 1) | (q.s.high >> (n_udword_bits - 1));
219        q.s.high = (q.s.high << 1) | (q.s.low  >> (n_udword_bits - 1));
220        q.s.low  = (q.s.low  << 1) | carry;
221        /* carry = 0;
222         * if (r.all >= d.all)
223         * {
224         *     r.all -= d.all;
225         *      carry = 1;
226         * }
227         */
228        const ti_int s = (ti_int)(d.all - r.all - 1) >> (n_utword_bits - 1);
229        carry = s & 1;
230        r.all -= d.all & s;
231    }
232    q.all = (q.all << 1) | carry;
233    if (rem)
234        *rem = r.all;
235    return q.all;
236}
237
238#endif /* CRT_HAS_128BIT */
239