1/* ===-- udivmodti4.c - Implement __udivmodti4 -----------------------------===
2 *
3 *                    The LLVM Compiler Infrastructure
4 *
5 * This file is distributed under the University of Illinois Open Source
6 * License. See LICENSE.TXT for details.
7 *
8 * ===----------------------------------------------------------------------===
9 *
10 * This file implements __udivmodti4 for the compiler_rt library.
11 *
12 * ===----------------------------------------------------------------------===
13 */
14
15#if __x86_64
16
17#include "int_lib.h"
18
19/* Effects: if rem != 0, *rem = a % b
20 * Returns: a / b
21 */
22
23/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */
24
25tu_int
26__udivmodti4(tu_int a, tu_int b, tu_int* rem)
27{
28    const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
29    const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT;
30    utwords n;
31    n.all = a;
32    utwords d;
33    d.all = b;
34    utwords q;
35    utwords r;
36    unsigned sr;
37    /* special cases, X is unknown, K != 0 */
38    if (n.s.high == 0)
39    {
40        if (d.s.high == 0)
41        {
42            /* 0 X
43             * ---
44             * 0 X
45             */
46            if (rem)
47                *rem = n.s.low % d.s.low;
48            return n.s.low / d.s.low;
49        }
50        /* 0 X
51         * ---
52         * K X
53         */
54        if (rem)
55            *rem = n.s.low;
56        return 0;
57    }
58    /* n.s.high != 0 */
59    if (d.s.low == 0)
60    {
61        if (d.s.high == 0)
62        {
63            /* K X
64             * ---
65             * 0 0
66             */
67            if (rem)
68                *rem = n.s.high % d.s.low;
69            return n.s.high / d.s.low;
70        }
71        /* d.s.high != 0 */
72        if (n.s.low == 0)
73        {
74            /* K 0
75             * ---
76             * K 0
77             */
78            if (rem)
79            {
80                r.s.high = n.s.high % d.s.high;
81                r.s.low = 0;
82                *rem = r.all;
83            }
84            return n.s.high / d.s.high;
85        }
86        /* K K
87         * ---
88         * K 0
89         */
90        if ((d.s.high & (d.s.high - 1)) == 0)     /* if d is a power of 2 */
91        {
92            if (rem)
93            {
94                r.s.low = n.s.low;
95                r.s.high = n.s.high & (d.s.high - 1);
96                *rem = r.all;
97            }
98            return n.s.high >> __builtin_ctzll(d.s.high);
99        }
100        /* K K
101         * ---
102         * K 0
103         */
104        sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high);
105        /* 0 <= sr <= n_udword_bits - 2 or sr large */
106        if (sr > n_udword_bits - 2)
107        {
108           if (rem)
109                *rem = n.all;
110            return 0;
111        }
112        ++sr;
113        /* 1 <= sr <= n_udword_bits - 1 */
114        /* q.all = n.all << (n_utword_bits - sr); */
115        q.s.low = 0;
116        q.s.high = n.s.low << (n_udword_bits - sr);
117        /* r.all = n.all >> sr; */
118        r.s.high = n.s.high >> sr;
119        r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
120    }
121    else  /* d.s.low != 0 */
122    {
123        if (d.s.high == 0)
124        {
125            /* K X
126             * ---
127             * 0 K
128             */
129            if ((d.s.low & (d.s.low - 1)) == 0)     /* if d is a power of 2 */
130            {
131                if (rem)
132                    *rem = n.s.low & (d.s.low - 1);
133                if (d.s.low == 1)
134                    return n.all;
135                unsigned sr = __builtin_ctzll(d.s.low);
136                q.s.high = n.s.high >> sr;
137                q.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
138                return q.all;
139            }
140            /* K X
141             * ---
142             * 0 K
143             */
144            sr = 1 + n_udword_bits + __builtin_clzll(d.s.low)
145                                   - __builtin_clzll(n.s.high);
146            /* 2 <= sr <= n_utword_bits - 1
147             * q.all = n.all << (n_utword_bits - sr);
148             * r.all = n.all >> sr;
149             * if (sr == n_udword_bits)
150             * {
151             *     q.s.low = 0;
152             *     q.s.high = n.s.low;
153             *     r.s.high = 0;
154             *     r.s.low = n.s.high;
155             * }
156             * else if (sr < n_udword_bits)  // 2 <= sr <= n_udword_bits - 1
157             * {
158             *     q.s.low = 0;
159             *     q.s.high = n.s.low << (n_udword_bits - sr);
160             *     r.s.high = n.s.high >> sr;
161             *     r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
162             * }
163             * else              // n_udword_bits + 1 <= sr <= n_utword_bits - 1
164             * {
165             *     q.s.low = n.s.low << (n_utword_bits - sr);
166             *     q.s.high = (n.s.high << (n_utword_bits - sr)) |
167             *              (n.s.low >> (sr - n_udword_bits));
168             *     r.s.high = 0;
169             *     r.s.low = n.s.high >> (sr - n_udword_bits);
170             * }
171             */
172            q.s.low =  (n.s.low << (n_utword_bits - sr)) &
173                     ((di_int)(int)(n_udword_bits - sr) >> (n_udword_bits-1));
174            q.s.high = ((n.s.low << ( n_udword_bits - sr))                        &
175                     ((di_int)(int)(sr - n_udword_bits - 1) >> (n_udword_bits-1))) |
176                     (((n.s.high << (n_utword_bits - sr))                       |
177                     (n.s.low >> (sr - n_udword_bits)))                         &
178                     ((di_int)(int)(n_udword_bits - sr) >> (n_udword_bits-1)));
179            r.s.high = (n.s.high >> sr) &
180                     ((di_int)(int)(sr - n_udword_bits) >> (n_udword_bits-1));
181            r.s.low =  ((n.s.high >> (sr - n_udword_bits))                        &
182                     ((di_int)(int)(n_udword_bits - sr - 1) >> (n_udword_bits-1))) |
183                     (((n.s.high << (n_udword_bits - sr))                       |
184                     (n.s.low >> sr))                                           &
185                     ((di_int)(int)(sr - n_udword_bits) >> (n_udword_bits-1)));
186        }
187        else
188        {
189            /* K X
190             * ---
191             * K K
192             */
193            sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high);
194            /*0 <= sr <= n_udword_bits - 1 or sr large */
195            if (sr > n_udword_bits - 1)
196            {
197               if (rem)
198                    *rem = n.all;
199                return 0;
200            }
201            ++sr;
202            /* 1 <= sr <= n_udword_bits */
203            /* q.all = n.all << (n_utword_bits - sr); */
204            q.s.low = 0;
205            q.s.high = n.s.low << (n_udword_bits - sr);
206            /* r.all = n.all >> sr;
207             * if (sr < n_udword_bits)
208             * {
209             *     r.s.high = n.s.high >> sr;
210             *     r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
211             * }
212             * else
213             * {
214             *     r.s.high = 0;
215             *     r.s.low = n.s.high;
216             * }
217             */
218            r.s.high = (n.s.high >> sr) &
219                     ((di_int)(int)(sr - n_udword_bits) >> (n_udword_bits-1));
220            r.s.low = (n.s.high << (n_udword_bits - sr)) |
221                    ((n.s.low >> sr)                   &
222                    ((di_int)(int)(sr - n_udword_bits) >> (n_udword_bits-1)));
223        }
224    }
225    /* Not a special case
226     * q and r are initialized with:
227     * q.all = n.all << (n_utword_bits - sr);
228     * r.all = n.all >> sr;
229     * 1 <= sr <= n_utword_bits - 1
230     */
231    su_int carry = 0;
232    for (; sr > 0; --sr)
233    {
234        /* r:q = ((r:q)  << 1) | carry */
235        r.s.high = (r.s.high << 1) | (r.s.low  >> (n_udword_bits - 1));
236        r.s.low  = (r.s.low  << 1) | (q.s.high >> (n_udword_bits - 1));
237        q.s.high = (q.s.high << 1) | (q.s.low  >> (n_udword_bits - 1));
238        q.s.low  = (q.s.low  << 1) | carry;
239        /* carry = 0;
240         * if (r.all >= d.all)
241         * {
242         *     r.all -= d.all;
243         *      carry = 1;
244         * }
245         */
246        const ti_int s = (ti_int)(d.all - r.all - 1) >> (n_utword_bits - 1);
247        carry = s & 1;
248        r.all -= d.all & s;
249    }
250    q.all = (q.all << 1) | carry;
251    if (rem)
252        *rem = r.all;
253    return q.all;
254}
255
256#endif /* __x86_64 */
257