1/* ===-- udivmodti4.c - Implement __udivmodti4 -----------------------------=== 2 * 3 * The LLVM Compiler Infrastructure 4 * 5 * This file is dual licensed under the MIT and the University of Illinois Open 6 * Source Licenses. See LICENSE.TXT for details. 7 * 8 * ===----------------------------------------------------------------------=== 9 * 10 * This file implements __udivmodti4 for the compiler_rt library. 11 * 12 * ===----------------------------------------------------------------------=== 13 */ 14 15#include "int_lib.h" 16 17#ifdef CRT_HAS_128BIT 18 19/* Effects: if rem != 0, *rem = a % b 20 * Returns: a / b 21 */ 22 23/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */ 24 25COMPILER_RT_ABI tu_int 26__udivmodti4(tu_int a, tu_int b, tu_int* rem) 27{ 28 const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT; 29 const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT; 30 utwords n; 31 n.all = a; 32 utwords d; 33 d.all = b; 34 utwords q; 35 utwords r; 36 unsigned sr; 37 /* special cases, X is unknown, K != 0 */ 38 if (n.s.high == 0) 39 { 40 if (d.s.high == 0) 41 { 42 /* 0 X 43 * --- 44 * 0 X 45 */ 46 if (rem) 47 *rem = n.s.low % d.s.low; 48 return n.s.low / d.s.low; 49 } 50 /* 0 X 51 * --- 52 * K X 53 */ 54 if (rem) 55 *rem = n.s.low; 56 return 0; 57 } 58 /* n.s.high != 0 */ 59 if (d.s.low == 0) 60 { 61 if (d.s.high == 0) 62 { 63 /* K X 64 * --- 65 * 0 0 66 */ 67 if (rem) 68 *rem = n.s.high % d.s.low; 69 return n.s.high / d.s.low; 70 } 71 /* d.s.high != 0 */ 72 if (n.s.low == 0) 73 { 74 /* K 0 75 * --- 76 * K 0 77 */ 78 if (rem) 79 { 80 r.s.high = n.s.high % d.s.high; 81 r.s.low = 0; 82 *rem = r.all; 83 } 84 return n.s.high / d.s.high; 85 } 86 /* K K 87 * --- 88 * K 0 89 */ 90 if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ 91 { 92 if (rem) 93 { 94 r.s.low = n.s.low; 95 r.s.high = n.s.high & (d.s.high - 1); 96 *rem = r.all; 97 } 98 return n.s.high >> __builtin_ctzll(d.s.high); 99 } 100 /* K K 101 * --- 102 * K 0 103 */ 104 sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high); 105 /* 0 <= sr <= n_udword_bits - 2 or sr large */ 106 if (sr > n_udword_bits - 2) 107 { 108 if (rem) 109 *rem = n.all; 110 return 0; 111 } 112 ++sr; 113 /* 1 <= sr <= n_udword_bits - 1 */ 114 /* q.all = n.all << (n_utword_bits - sr); */ 115 q.s.low = 0; 116 q.s.high = n.s.low << (n_udword_bits - sr); 117 /* r.all = n.all >> sr; */ 118 r.s.high = n.s.high >> sr; 119 r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); 120 } 121 else /* d.s.low != 0 */ 122 { 123 if (d.s.high == 0) 124 { 125 /* K X 126 * --- 127 * 0 K 128 */ 129 if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ 130 { 131 if (rem) 132 *rem = n.s.low & (d.s.low - 1); 133 if (d.s.low == 1) 134 return n.all; 135 sr = __builtin_ctzll(d.s.low); 136 q.s.high = n.s.high >> sr; 137 q.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); 138 return q.all; 139 } 140 /* K X 141 * --- 142 * 0 K 143 */ 144 sr = 1 + n_udword_bits + __builtin_clzll(d.s.low) 145 - __builtin_clzll(n.s.high); 146 /* 2 <= sr <= n_utword_bits - 1 147 * q.all = n.all << (n_utword_bits - sr); 148 * r.all = n.all >> sr; 149 */ 150 if (sr == n_udword_bits) 151 { 152 q.s.low = 0; 153 q.s.high = n.s.low; 154 r.s.high = 0; 155 r.s.low = n.s.high; 156 } 157 else if (sr < n_udword_bits) // 2 <= sr <= n_udword_bits - 1 158 { 159 q.s.low = 0; 160 q.s.high = n.s.low << (n_udword_bits - sr); 161 r.s.high = n.s.high >> sr; 162 r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); 163 } 164 else // n_udword_bits + 1 <= sr <= n_utword_bits - 1 165 { 166 q.s.low = n.s.low << (n_utword_bits - sr); 167 q.s.high = (n.s.high << (n_utword_bits - sr)) | 168 (n.s.low >> (sr - n_udword_bits)); 169 r.s.high = 0; 170 r.s.low = n.s.high >> (sr - n_udword_bits); 171 } 172 } 173 else 174 { 175 /* K X 176 * --- 177 * K K 178 */ 179 sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high); 180 /*0 <= sr <= n_udword_bits - 1 or sr large */ 181 if (sr > n_udword_bits - 1) 182 { 183 if (rem) 184 *rem = n.all; 185 return 0; 186 } 187 ++sr; 188 /* 1 <= sr <= n_udword_bits 189 * q.all = n.all << (n_utword_bits - sr); 190 * r.all = n.all >> sr; 191 */ 192 q.s.low = 0; 193 if (sr == n_udword_bits) 194 { 195 q.s.high = n.s.low; 196 r.s.high = 0; 197 r.s.low = n.s.high; 198 } 199 else 200 { 201 r.s.high = n.s.high >> sr; 202 r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); 203 q.s.high = n.s.low << (n_udword_bits - sr); 204 } 205 } 206 } 207 /* Not a special case 208 * q and r are initialized with: 209 * q.all = n.all << (n_utword_bits - sr); 210 * r.all = n.all >> sr; 211 * 1 <= sr <= n_utword_bits - 1 212 */ 213 su_int carry = 0; 214 for (; sr > 0; --sr) 215 { 216 /* r:q = ((r:q) << 1) | carry */ 217 r.s.high = (r.s.high << 1) | (r.s.low >> (n_udword_bits - 1)); 218 r.s.low = (r.s.low << 1) | (q.s.high >> (n_udword_bits - 1)); 219 q.s.high = (q.s.high << 1) | (q.s.low >> (n_udword_bits - 1)); 220 q.s.low = (q.s.low << 1) | carry; 221 /* carry = 0; 222 * if (r.all >= d.all) 223 * { 224 * r.all -= d.all; 225 * carry = 1; 226 * } 227 */ 228 const ti_int s = (ti_int)(d.all - r.all - 1) >> (n_utword_bits - 1); 229 carry = s & 1; 230 r.all -= d.all & s; 231 } 232 q.all = (q.all << 1) | carry; 233 if (rem) 234 *rem = r.all; 235 return q.all; 236} 237 238#endif /* CRT_HAS_128BIT */ 239