udivmoddi4.c revision 77ed6142daed1e068fbda64405d0de9845e40e1
1/* ===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------===
2 *
3 *                     The LLVM Compiler Infrastructure
4 *
5 * This file is distributed under the University of Illinois Open Source
6 * License. See LICENSE.TXT for details.
7 *
8 * ===----------------------------------------------------------------------===
9 *
10 * This file implements __udivmoddi4 for the compiler_rt library.
11 *
12 * ===----------------------------------------------------------------------===
13 */
14
15#include "int_lib.h"
16
17/* Effects: if rem != 0, *rem = a % b
18 * Returns: a / b
19 */
20
21/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */
22
23du_int
24__udivmoddi4(du_int a, du_int b, du_int* rem)
25{
26    const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT;
27    const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
28    udwords n;
29    n.all = a;
30    udwords d;
31    d.all = b;
32    udwords q;
33    udwords r;
34    unsigned sr;
35    /* special cases, X is unknown, K != 0 */
36    if (n.s.high == 0)
37    {
38        if (d.s.high == 0)
39        {
40            /* 0 X
41             * ---
42             * 0 X
43             */
44            if (rem)
45                *rem = n.s.low % d.s.low;
46            return n.s.low / d.s.low;
47        }
48        /* 0 X
49         * ---
50         * K X
51         */
52        if (rem)
53            *rem = n.s.low;
54        return 0;
55    }
56    /* n.s.high != 0 */
57    if (d.s.low == 0)
58    {
59        if (d.s.high == 0)
60        {
61            /* K X
62             * ---
63             * 0 0
64             */
65            if (rem)
66                *rem = n.s.high % d.s.low;
67            return n.s.high / d.s.low;
68        }
69        /* d.s.high != 0 */
70        if (n.s.low == 0)
71        {
72            /* K 0
73             * ---
74             * K 0
75             */
76            if (rem)
77            {
78                r.s.high = n.s.high % d.s.high;
79                r.s.low = 0;
80                *rem = r.all;
81            }
82            return n.s.high / d.s.high;
83        }
84        /* K K
85         * ---
86         * K 0
87         */
88        if ((d.s.high & (d.s.high - 1)) == 0)     /* if d is a power of 2 */
89        {
90            if (rem)
91            {
92                r.s.low = n.s.low;
93                r.s.high = n.s.high & (d.s.high - 1);
94                *rem = r.all;
95            }
96            return n.s.high >> __builtin_ctz(d.s.high);
97        }
98        /* K K
99         * ---
100         * K 0
101         */
102        sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
103        /* 0 <= sr <= n_uword_bits - 2 or sr large */
104        if (sr > n_uword_bits - 2)
105        {
106           if (rem)
107                *rem = n.all;
108            return 0;
109        }
110        ++sr;
111        /* 1 <= sr <= n_uword_bits - 1 */
112        /* q.all = n.all << (n_udword_bits - sr); */
113        q.s.low = 0;
114        q.s.high = n.s.low << (n_uword_bits - sr);
115        /* r.all = n.all >> sr; */
116        r.s.high = n.s.high >> sr;
117        r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
118    }
119    else  /* d.s.low != 0 */
120    {
121        if (d.s.high == 0)
122        {
123            /* K X
124             * ---
125             * 0 K
126             */
127            if ((d.s.low & (d.s.low - 1)) == 0)     /* if d is a power of 2 */
128            {
129                if (rem)
130                    *rem = n.s.low & (d.s.low - 1);
131                if (d.s.low == 1)
132                    return n.all;
133                unsigned sr = __builtin_ctz(d.s.low);
134                q.s.high = n.s.high >> sr;
135                q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
136                return q.all;
137            }
138            /* K X
139             * ---
140             *0 K
141             */
142            sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high);
143            /* 2 <= sr <= n_udword_bits - 1
144             * q.all = n.all << (n_udword_bits - sr);
145             * r.all = n.all >> sr;
146             * if (sr == n_uword_bits)
147             * {
148             *     q.s.low = 0;
149             *     q.s.high = n.s.low;
150             *     r.s.high = 0;
151             *     r.s.low = n.s.high;
152             * }
153             * else if (sr < n_uword_bits)  // 2 <= sr <= n_uword_bits - 1
154             * {
155             *     q.s.low = 0;
156             *     q.s.high = n.s.low << (n_uword_bits - sr);
157             *     r.s.high = n.s.high >> sr;
158             *     r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
159             * }
160             * else              // n_uword_bits + 1 <= sr <= n_udword_bits - 1
161             * {
162             *     q.s.low = n.s.low << (n_udword_bits - sr);
163             *     q.s.high = (n.s.high << (n_udword_bits - sr)) |
164             *              (n.s.low >> (sr - n_uword_bits));
165             *     r.s.high = 0;
166             *     r.s.low = n.s.high >> (sr - n_uword_bits);
167             * }
168             */
169            q.s.low =  (n.s.low << (n_udword_bits - sr)) &
170                     ((si_int)(n_uword_bits - sr) >> (n_uword_bits-1));
171            q.s.high = ((n.s.low << ( n_uword_bits - sr))                       &
172                     ((si_int)(sr - n_uword_bits - 1) >> (n_uword_bits-1))) |
173                     (((n.s.high << (n_udword_bits - sr))                     |
174                     (n.s.low >> (sr - n_uword_bits)))                        &
175                     ((si_int)(n_uword_bits - sr) >> (n_uword_bits-1)));
176            r.s.high = (n.s.high >> sr) &
177                     ((si_int)(sr - n_uword_bits) >> (n_uword_bits-1));
178            r.s.low =  ((n.s.high >> (sr - n_uword_bits))                       &
179                     ((si_int)(n_uword_bits - sr - 1) >> (n_uword_bits-1))) |
180                     (((n.s.high << (n_uword_bits - sr))                      |
181                     (n.s.low >> sr))                                         &
182                     ((si_int)(sr - n_uword_bits) >> (n_uword_bits-1)));
183        }
184        else
185        {
186            /* K X
187             * ---
188             * K K
189             */
190            sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
191            /* 0 <= sr <= n_uword_bits - 1 or sr large */
192            if (sr > n_uword_bits - 1)
193            {
194               if (rem)
195                    *rem = n.all;
196                return 0;
197            }
198            ++sr;
199            /* 1 <= sr <= n_uword_bits */
200            /*  q.all = n.all << (n_udword_bits - sr); */
201            q.s.low = 0;
202            q.s.high = n.s.low << (n_uword_bits - sr);
203            /* r.all = n.all >> sr;
204             * if (sr < n_uword_bits)
205             * {
206             *     r.s.high = n.s.high >> sr;
207             *     r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
208             * }
209             * else
210             * {
211             *     r.s.high = 0;
212             *     r.s.low = n.s.high;
213             * }
214             */
215            r.s.high = (n.s.high >> sr) &
216                     ((si_int)(sr - n_uword_bits) >> (n_uword_bits-1));
217            r.s.low = (n.s.high << (n_uword_bits - sr)) |
218                    ((n.s.low >> sr)                  &
219                    ((si_int)(sr - n_uword_bits) >> (n_uword_bits-1)));
220        }
221    }
222    /* Not a special case
223     * q and r are initialized with:
224     * q.all = n.all << (n_udword_bits - sr);
225     * r.all = n.all >> sr;
226     * 1 <= sr <= n_udword_bits - 1
227     */
228    su_int carry = 0;
229    for (; sr > 0; --sr)
230    {
231        /* r:q = ((r:q)  << 1) | carry */
232        r.s.high = (r.s.high << 1) | (r.s.low  >> (n_uword_bits - 1));
233        r.s.low  = (r.s.low  << 1) | (q.s.high >> (n_uword_bits - 1));
234        q.s.high = (q.s.high << 1) | (q.s.low  >> (n_uword_bits - 1));
235        q.s.low  = (q.s.low  << 1) | carry;
236        /* carry = 0;
237         * if (r.all >= d.all)
238         * {
239         *      r.all -= d.all;
240         *      carry = 1;
241         * }
242         */
243        const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1);
244        carry = s & 1;
245        r.all -= d.all & s;
246    }
247    q.all = (q.all << 1) | carry;
248    if (rem)
249        *rem = r.all;
250    return q.all;
251}
252