udivmoddi4.c revision 2d1fdb26e458c4ddc04155c1d421bced3ba90cd0
1/* ===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------===
2 *
3 *                     The LLVM Compiler Infrastructure
4 *
5 * This file is dual licensed under the MIT and the University of Illinois Open
6 * Source Licenses. See LICENSE.TXT for details.
7 *
8 * ===----------------------------------------------------------------------===
9 *
10 * This file implements __udivmoddi4 for the compiler_rt library.
11 *
12 * ===----------------------------------------------------------------------===
13 */
14
15#include "int_lib.h"
16
17/* Effects: if rem != 0, *rem = a % b
18 * Returns: a / b
19 */
20
21/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */
22
23COMPILER_RT_ABI du_int
24__udivmoddi4(du_int a, du_int b, du_int* rem)
25{
26    const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT;
27    const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
28    udwords n;
29    n.all = a;
30    udwords d;
31    d.all = b;
32    udwords q;
33    udwords r;
34    unsigned sr;
35    /* special cases, X is unknown, K != 0 */
36    if (n.s.high == 0)
37    {
38        if (d.s.high == 0)
39        {
40            /* 0 X
41             * ---
42             * 0 X
43             */
44            if (rem)
45                *rem = n.s.low % d.s.low;
46            return n.s.low / d.s.low;
47        }
48        /* 0 X
49         * ---
50         * K X
51         */
52        if (rem)
53            *rem = n.s.low;
54        return 0;
55    }
56    /* n.s.high != 0 */
57    if (d.s.low == 0)
58    {
59        if (d.s.high == 0)
60        {
61            /* K X
62             * ---
63             * 0 0
64             */
65            if (rem)
66                *rem = n.s.high % d.s.low;
67            return n.s.high / d.s.low;
68        }
69        /* d.s.high != 0 */
70        if (n.s.low == 0)
71        {
72            /* K 0
73             * ---
74             * K 0
75             */
76            if (rem)
77            {
78                r.s.high = n.s.high % d.s.high;
79                r.s.low = 0;
80                *rem = r.all;
81            }
82            return n.s.high / d.s.high;
83        }
84        /* K K
85         * ---
86         * K 0
87         */
88        if ((d.s.high & (d.s.high - 1)) == 0)     /* if d is a power of 2 */
89        {
90            if (rem)
91            {
92                r.s.low = n.s.low;
93                r.s.high = n.s.high & (d.s.high - 1);
94                *rem = r.all;
95            }
96            return n.s.high >> __builtin_ctz(d.s.high);
97        }
98        /* K K
99         * ---
100         * K 0
101         */
102        sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
103        /* 0 <= sr <= n_uword_bits - 2 or sr large */
104        if (sr > n_uword_bits - 2)
105        {
106           if (rem)
107                *rem = n.all;
108            return 0;
109        }
110        ++sr;
111        /* 1 <= sr <= n_uword_bits - 1 */
112        /* q.all = n.all << (n_udword_bits - sr); */
113        q.s.low = 0;
114        q.s.high = n.s.low << (n_uword_bits - sr);
115        /* r.all = n.all >> sr; */
116        r.s.high = n.s.high >> sr;
117        r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
118    }
119    else  /* d.s.low != 0 */
120    {
121        if (d.s.high == 0)
122        {
123            /* K X
124             * ---
125             * 0 K
126             */
127            if ((d.s.low & (d.s.low - 1)) == 0)     /* if d is a power of 2 */
128            {
129                if (rem)
130                    *rem = n.s.low & (d.s.low - 1);
131                if (d.s.low == 1)
132                    return n.all;
133                sr = __builtin_ctz(d.s.low);
134                q.s.high = n.s.high >> sr;
135                q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
136                return q.all;
137            }
138            /* K X
139             * ---
140             * 0 K
141             */
142            sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high);
143            /* 2 <= sr <= n_udword_bits - 1
144             * q.all = n.all << (n_udword_bits - sr);
145             * r.all = n.all >> sr;
146             */
147            if (sr == n_uword_bits)
148            {
149                q.s.low = 0;
150                q.s.high = n.s.low;
151                r.s.high = 0;
152                r.s.low = n.s.high;
153            }
154            else if (sr < n_uword_bits)  // 2 <= sr <= n_uword_bits - 1
155            {
156                q.s.low = 0;
157                q.s.high = n.s.low << (n_uword_bits - sr);
158                r.s.high = n.s.high >> sr;
159                r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
160            }
161            else              // n_uword_bits + 1 <= sr <= n_udword_bits - 1
162            {
163                q.s.low = n.s.low << (n_udword_bits - sr);
164                q.s.high = (n.s.high << (n_udword_bits - sr)) |
165                           (n.s.low >> (sr - n_uword_bits));
166                r.s.high = 0;
167                r.s.low = n.s.high >> (sr - n_uword_bits);
168            }
169        }
170        else
171        {
172            /* K X
173             * ---
174             * K K
175             */
176            sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
177            /* 0 <= sr <= n_uword_bits - 1 or sr large */
178            if (sr > n_uword_bits - 1)
179            {
180                if (rem)
181                    *rem = n.all;
182                return 0;
183            }
184            ++sr;
185            /* 1 <= sr <= n_uword_bits */
186            /*  q.all = n.all << (n_udword_bits - sr); */
187            q.s.low = 0;
188            if (sr == n_uword_bits)
189            {
190                q.s.high = n.s.low;
191                r.s.high = 0;
192                r.s.low = n.s.high;
193            }
194            else
195            {
196                q.s.high = n.s.low << (n_uword_bits - sr);
197                r.s.high = n.s.high >> sr;
198                r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
199            }
200        }
201    }
202    /* Not a special case
203     * q and r are initialized with:
204     * q.all = n.all << (n_udword_bits - sr);
205     * r.all = n.all >> sr;
206     * 1 <= sr <= n_udword_bits - 1
207     */
208    su_int carry = 0;
209    for (; sr > 0; --sr)
210    {
211        /* r:q = ((r:q)  << 1) | carry */
212        r.s.high = (r.s.high << 1) | (r.s.low  >> (n_uword_bits - 1));
213        r.s.low  = (r.s.low  << 1) | (q.s.high >> (n_uword_bits - 1));
214        q.s.high = (q.s.high << 1) | (q.s.low  >> (n_uword_bits - 1));
215        q.s.low  = (q.s.low  << 1) | carry;
216        /* carry = 0;
217         * if (r.all >= d.all)
218         * {
219         *      r.all -= d.all;
220         *      carry = 1;
221         * }
222         */
223        const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1);
224        carry = s & 1;
225        r.all -= d.all & s;
226    }
227    q.all = (q.all << 1) | carry;
228    if (rem)
229        *rem = r.all;
230    return q.all;
231}
232