APInt.cpp revision 4e97a0f0cb1b1b266d2653e44eb31374f2685c2b
1//===-- APInt.cpp - Implement APInt class ---------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements a class to represent arbitrary precision integer
11// constant values and provide a variety of arithmetic operations on them.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "apint"
16#include "llvm/ADT/APInt.h"
17#include "llvm/ADT/FoldingSet.h"
18#include "llvm/ADT/SmallString.h"
19#include "llvm/Support/Debug.h"
20#include "llvm/Support/ErrorHandling.h"
21#include "llvm/Support/MathExtras.h"
22#include "llvm/Support/raw_ostream.h"
23#include <cmath>
24#include <limits>
25#include <cstring>
26#include <cstdlib>
27using namespace llvm;
28
29/// A utility function for allocating memory, checking for allocation failures,
30/// and ensuring the contents are zeroed.
31inline static uint64_t* getClearedMemory(unsigned numWords) {
32  uint64_t * result = new uint64_t[numWords];
33  assert(result && "APInt memory allocation fails!");
34  memset(result, 0, numWords * sizeof(uint64_t));
35  return result;
36}
37
38/// A utility function for allocating memory and checking for allocation
39/// failure.  The content is not zeroed.
40inline static uint64_t* getMemory(unsigned numWords) {
41  uint64_t * result = new uint64_t[numWords];
42  assert(result && "APInt memory allocation fails!");
43  return result;
44}
45
46void APInt::initSlowCase(unsigned numBits, uint64_t val, bool isSigned) {
47  pVal = getClearedMemory(getNumWords());
48  pVal[0] = val;
49  if (isSigned && int64_t(val) < 0)
50    for (unsigned i = 1; i < getNumWords(); ++i)
51      pVal[i] = -1ULL;
52}
53
54void APInt::initSlowCase(const APInt& that) {
55  pVal = getMemory(getNumWords());
56  memcpy(pVal, that.pVal, getNumWords() * APINT_WORD_SIZE);
57}
58
59
60APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
61  : BitWidth(numBits), VAL(0) {
62  assert(BitWidth && "bitwidth too small");
63  assert(bigVal && "Null pointer detected!");
64  if (isSingleWord())
65    VAL = bigVal[0];
66  else {
67    // Get memory, cleared to 0
68    pVal = getClearedMemory(getNumWords());
69    // Calculate the number of words to copy
70    unsigned words = std::min<unsigned>(numWords, getNumWords());
71    // Copy the words from bigVal to pVal
72    memcpy(pVal, bigVal, words * APINT_WORD_SIZE);
73  }
74  // Make sure unused high bits are cleared
75  clearUnusedBits();
76}
77
78APInt::APInt(unsigned numbits, const char StrStart[], unsigned slen,
79             uint8_t radix)
80  : BitWidth(numbits), VAL(0) {
81  assert(BitWidth && "bitwidth too small");
82  fromString(numbits, StrStart, slen, radix);
83}
84
85APInt& APInt::AssignSlowCase(const APInt& RHS) {
86  // Don't do anything for X = X
87  if (this == &RHS)
88    return *this;
89
90  if (BitWidth == RHS.getBitWidth()) {
91    // assume same bit-width single-word case is already handled
92    assert(!isSingleWord());
93    memcpy(pVal, RHS.pVal, getNumWords() * APINT_WORD_SIZE);
94    return *this;
95  }
96
97  if (isSingleWord()) {
98    // assume case where both are single words is already handled
99    assert(!RHS.isSingleWord());
100    VAL = 0;
101    pVal = getMemory(RHS.getNumWords());
102    memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
103  } else if (getNumWords() == RHS.getNumWords())
104    memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
105  else if (RHS.isSingleWord()) {
106    delete [] pVal;
107    VAL = RHS.VAL;
108  } else {
109    delete [] pVal;
110    pVal = getMemory(RHS.getNumWords());
111    memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
112  }
113  BitWidth = RHS.BitWidth;
114  return clearUnusedBits();
115}
116
117APInt& APInt::operator=(uint64_t RHS) {
118  if (isSingleWord())
119    VAL = RHS;
120  else {
121    pVal[0] = RHS;
122    memset(pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
123  }
124  return clearUnusedBits();
125}
126
127/// Profile - This method 'profiles' an APInt for use with FoldingSet.
128void APInt::Profile(FoldingSetNodeID& ID) const {
129  ID.AddInteger(BitWidth);
130
131  if (isSingleWord()) {
132    ID.AddInteger(VAL);
133    return;
134  }
135
136  unsigned NumWords = getNumWords();
137  for (unsigned i = 0; i < NumWords; ++i)
138    ID.AddInteger(pVal[i]);
139}
140
141/// add_1 - This function adds a single "digit" integer, y, to the multiple
142/// "digit" integer array,  x[]. x[] is modified to reflect the addition and
143/// 1 is returned if there is a carry out, otherwise 0 is returned.
144/// @returns the carry of the addition.
145static bool add_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
146  for (unsigned i = 0; i < len; ++i) {
147    dest[i] = y + x[i];
148    if (dest[i] < y)
149      y = 1; // Carry one to next digit.
150    else {
151      y = 0; // No need to carry so exit early
152      break;
153    }
154  }
155  return y;
156}
157
158/// @brief Prefix increment operator. Increments the APInt by one.
159APInt& APInt::operator++() {
160  if (isSingleWord())
161    ++VAL;
162  else
163    add_1(pVal, pVal, getNumWords(), 1);
164  return clearUnusedBits();
165}
166
167/// sub_1 - This function subtracts a single "digit" (64-bit word), y, from
168/// the multi-digit integer array, x[], propagating the borrowed 1 value until
169/// no further borrowing is neeeded or it runs out of "digits" in x.  The result
170/// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted.
171/// In other words, if y > x then this function returns 1, otherwise 0.
172/// @returns the borrow out of the subtraction
173static bool sub_1(uint64_t x[], unsigned len, uint64_t y) {
174  for (unsigned i = 0; i < len; ++i) {
175    uint64_t X = x[i];
176    x[i] -= y;
177    if (y > X)
178      y = 1;  // We have to "borrow 1" from next "digit"
179    else {
180      y = 0;  // No need to borrow
181      break;  // Remaining digits are unchanged so exit early
182    }
183  }
184  return bool(y);
185}
186
187/// @brief Prefix decrement operator. Decrements the APInt by one.
188APInt& APInt::operator--() {
189  if (isSingleWord())
190    --VAL;
191  else
192    sub_1(pVal, getNumWords(), 1);
193  return clearUnusedBits();
194}
195
196/// add - This function adds the integer array x to the integer array Y and
197/// places the result in dest.
198/// @returns the carry out from the addition
199/// @brief General addition of 64-bit integer arrays
200static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y,
201                unsigned len) {
202  bool carry = false;
203  for (unsigned i = 0; i< len; ++i) {
204    uint64_t limit = std::min(x[i],y[i]); // must come first in case dest == x
205    dest[i] = x[i] + y[i] + carry;
206    carry = dest[i] < limit || (carry && dest[i] == limit);
207  }
208  return carry;
209}
210
211/// Adds the RHS APint to this APInt.
212/// @returns this, after addition of RHS.
213/// @brief Addition assignment operator.
214APInt& APInt::operator+=(const APInt& RHS) {
215  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
216  if (isSingleWord())
217    VAL += RHS.VAL;
218  else {
219    add(pVal, pVal, RHS.pVal, getNumWords());
220  }
221  return clearUnusedBits();
222}
223
224/// Subtracts the integer array y from the integer array x
225/// @returns returns the borrow out.
226/// @brief Generalized subtraction of 64-bit integer arrays.
227static bool sub(uint64_t *dest, const uint64_t *x, const uint64_t *y,
228                unsigned len) {
229  bool borrow = false;
230  for (unsigned i = 0; i < len; ++i) {
231    uint64_t x_tmp = borrow ? x[i] - 1 : x[i];
232    borrow = y[i] > x_tmp || (borrow && x[i] == 0);
233    dest[i] = x_tmp - y[i];
234  }
235  return borrow;
236}
237
238/// Subtracts the RHS APInt from this APInt
239/// @returns this, after subtraction
240/// @brief Subtraction assignment operator.
241APInt& APInt::operator-=(const APInt& RHS) {
242  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
243  if (isSingleWord())
244    VAL -= RHS.VAL;
245  else
246    sub(pVal, pVal, RHS.pVal, getNumWords());
247  return clearUnusedBits();
248}
249
250/// Multiplies an integer array, x by a a uint64_t integer and places the result
251/// into dest.
252/// @returns the carry out of the multiplication.
253/// @brief Multiply a multi-digit APInt by a single digit (64-bit) integer.
254static uint64_t mul_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
255  // Split y into high 32-bit part (hy)  and low 32-bit part (ly)
256  uint64_t ly = y & 0xffffffffULL, hy = y >> 32;
257  uint64_t carry = 0;
258
259  // For each digit of x.
260  for (unsigned i = 0; i < len; ++i) {
261    // Split x into high and low words
262    uint64_t lx = x[i] & 0xffffffffULL;
263    uint64_t hx = x[i] >> 32;
264    // hasCarry - A flag to indicate if there is a carry to the next digit.
265    // hasCarry == 0, no carry
266    // hasCarry == 1, has carry
267    // hasCarry == 2, no carry and the calculation result == 0.
268    uint8_t hasCarry = 0;
269    dest[i] = carry + lx * ly;
270    // Determine if the add above introduces carry.
271    hasCarry = (dest[i] < carry) ? 1 : 0;
272    carry = hx * ly + (dest[i] >> 32) + (hasCarry ? (1ULL << 32) : 0);
273    // The upper limit of carry can be (2^32 - 1)(2^32 - 1) +
274    // (2^32 - 1) + 2^32 = 2^64.
275    hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
276
277    carry += (lx * hy) & 0xffffffffULL;
278    dest[i] = (carry << 32) | (dest[i] & 0xffffffffULL);
279    carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0) +
280            (carry >> 32) + ((lx * hy) >> 32) + hx * hy;
281  }
282  return carry;
283}
284
285/// Multiplies integer array x by integer array y and stores the result into
286/// the integer array dest. Note that dest's size must be >= xlen + ylen.
287/// @brief Generalized multiplicate of integer arrays.
288static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[],
289                unsigned ylen) {
290  dest[xlen] = mul_1(dest, x, xlen, y[0]);
291  for (unsigned i = 1; i < ylen; ++i) {
292    uint64_t ly = y[i] & 0xffffffffULL, hy = y[i] >> 32;
293    uint64_t carry = 0, lx = 0, hx = 0;
294    for (unsigned j = 0; j < xlen; ++j) {
295      lx = x[j] & 0xffffffffULL;
296      hx = x[j] >> 32;
297      // hasCarry - A flag to indicate if has carry.
298      // hasCarry == 0, no carry
299      // hasCarry == 1, has carry
300      // hasCarry == 2, no carry and the calculation result == 0.
301      uint8_t hasCarry = 0;
302      uint64_t resul = carry + lx * ly;
303      hasCarry = (resul < carry) ? 1 : 0;
304      carry = (hasCarry ? (1ULL << 32) : 0) + hx * ly + (resul >> 32);
305      hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
306
307      carry += (lx * hy) & 0xffffffffULL;
308      resul = (carry << 32) | (resul & 0xffffffffULL);
309      dest[i+j] += resul;
310      carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0)+
311              (carry >> 32) + (dest[i+j] < resul ? 1 : 0) +
312              ((lx * hy) >> 32) + hx * hy;
313    }
314    dest[i+xlen] = carry;
315  }
316}
317
318APInt& APInt::operator*=(const APInt& RHS) {
319  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
320  if (isSingleWord()) {
321    VAL *= RHS.VAL;
322    clearUnusedBits();
323    return *this;
324  }
325
326  // Get some bit facts about LHS and check for zero
327  unsigned lhsBits = getActiveBits();
328  unsigned lhsWords = !lhsBits ? 0 : whichWord(lhsBits - 1) + 1;
329  if (!lhsWords)
330    // 0 * X ===> 0
331    return *this;
332
333  // Get some bit facts about RHS and check for zero
334  unsigned rhsBits = RHS.getActiveBits();
335  unsigned rhsWords = !rhsBits ? 0 : whichWord(rhsBits - 1) + 1;
336  if (!rhsWords) {
337    // X * 0 ===> 0
338    clear();
339    return *this;
340  }
341
342  // Allocate space for the result
343  unsigned destWords = rhsWords + lhsWords;
344  uint64_t *dest = getMemory(destWords);
345
346  // Perform the long multiply
347  mul(dest, pVal, lhsWords, RHS.pVal, rhsWords);
348
349  // Copy result back into *this
350  clear();
351  unsigned wordsToCopy = destWords >= getNumWords() ? getNumWords() : destWords;
352  memcpy(pVal, dest, wordsToCopy * APINT_WORD_SIZE);
353
354  // delete dest array and return
355  delete[] dest;
356  return *this;
357}
358
359APInt& APInt::operator&=(const APInt& RHS) {
360  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
361  if (isSingleWord()) {
362    VAL &= RHS.VAL;
363    return *this;
364  }
365  unsigned numWords = getNumWords();
366  for (unsigned i = 0; i < numWords; ++i)
367    pVal[i] &= RHS.pVal[i];
368  return *this;
369}
370
371APInt& APInt::operator|=(const APInt& RHS) {
372  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
373  if (isSingleWord()) {
374    VAL |= RHS.VAL;
375    return *this;
376  }
377  unsigned numWords = getNumWords();
378  for (unsigned i = 0; i < numWords; ++i)
379    pVal[i] |= RHS.pVal[i];
380  return *this;
381}
382
383APInt& APInt::operator^=(const APInt& RHS) {
384  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
385  if (isSingleWord()) {
386    VAL ^= RHS.VAL;
387    this->clearUnusedBits();
388    return *this;
389  }
390  unsigned numWords = getNumWords();
391  for (unsigned i = 0; i < numWords; ++i)
392    pVal[i] ^= RHS.pVal[i];
393  return clearUnusedBits();
394}
395
396APInt APInt::AndSlowCase(const APInt& RHS) const {
397  unsigned numWords = getNumWords();
398  uint64_t* val = getMemory(numWords);
399  for (unsigned i = 0; i < numWords; ++i)
400    val[i] = pVal[i] & RHS.pVal[i];
401  return APInt(val, getBitWidth());
402}
403
404APInt APInt::OrSlowCase(const APInt& RHS) const {
405  unsigned numWords = getNumWords();
406  uint64_t *val = getMemory(numWords);
407  for (unsigned i = 0; i < numWords; ++i)
408    val[i] = pVal[i] | RHS.pVal[i];
409  return APInt(val, getBitWidth());
410}
411
412APInt APInt::XorSlowCase(const APInt& RHS) const {
413  unsigned numWords = getNumWords();
414  uint64_t *val = getMemory(numWords);
415  for (unsigned i = 0; i < numWords; ++i)
416    val[i] = pVal[i] ^ RHS.pVal[i];
417
418  // 0^0==1 so clear the high bits in case they got set.
419  return APInt(val, getBitWidth()).clearUnusedBits();
420}
421
422bool APInt::operator !() const {
423  if (isSingleWord())
424    return !VAL;
425
426  for (unsigned i = 0; i < getNumWords(); ++i)
427    if (pVal[i])
428      return false;
429  return true;
430}
431
432APInt APInt::operator*(const APInt& RHS) const {
433  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
434  if (isSingleWord())
435    return APInt(BitWidth, VAL * RHS.VAL);
436  APInt Result(*this);
437  Result *= RHS;
438  return Result.clearUnusedBits();
439}
440
441APInt APInt::operator+(const APInt& RHS) const {
442  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
443  if (isSingleWord())
444    return APInt(BitWidth, VAL + RHS.VAL);
445  APInt Result(BitWidth, 0);
446  add(Result.pVal, this->pVal, RHS.pVal, getNumWords());
447  return Result.clearUnusedBits();
448}
449
450APInt APInt::operator-(const APInt& RHS) const {
451  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
452  if (isSingleWord())
453    return APInt(BitWidth, VAL - RHS.VAL);
454  APInt Result(BitWidth, 0);
455  sub(Result.pVal, this->pVal, RHS.pVal, getNumWords());
456  return Result.clearUnusedBits();
457}
458
459bool APInt::operator[](unsigned bitPosition) const {
460  return (maskBit(bitPosition) &
461          (isSingleWord() ?  VAL : pVal[whichWord(bitPosition)])) != 0;
462}
463
464bool APInt::EqualSlowCase(const APInt& RHS) const {
465  // Get some facts about the number of bits used in the two operands.
466  unsigned n1 = getActiveBits();
467  unsigned n2 = RHS.getActiveBits();
468
469  // If the number of bits isn't the same, they aren't equal
470  if (n1 != n2)
471    return false;
472
473  // If the number of bits fits in a word, we only need to compare the low word.
474  if (n1 <= APINT_BITS_PER_WORD)
475    return pVal[0] == RHS.pVal[0];
476
477  // Otherwise, compare everything
478  for (int i = whichWord(n1 - 1); i >= 0; --i)
479    if (pVal[i] != RHS.pVal[i])
480      return false;
481  return true;
482}
483
484bool APInt::EqualSlowCase(uint64_t Val) const {
485  unsigned n = getActiveBits();
486  if (n <= APINT_BITS_PER_WORD)
487    return pVal[0] == Val;
488  else
489    return false;
490}
491
492bool APInt::ult(const APInt& RHS) const {
493  assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
494  if (isSingleWord())
495    return VAL < RHS.VAL;
496
497  // Get active bit length of both operands
498  unsigned n1 = getActiveBits();
499  unsigned n2 = RHS.getActiveBits();
500
501  // If magnitude of LHS is less than RHS, return true.
502  if (n1 < n2)
503    return true;
504
505  // If magnitude of RHS is greather than LHS, return false.
506  if (n2 < n1)
507    return false;
508
509  // If they bot fit in a word, just compare the low order word
510  if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD)
511    return pVal[0] < RHS.pVal[0];
512
513  // Otherwise, compare all words
514  unsigned topWord = whichWord(std::max(n1,n2)-1);
515  for (int i = topWord; i >= 0; --i) {
516    if (pVal[i] > RHS.pVal[i])
517      return false;
518    if (pVal[i] < RHS.pVal[i])
519      return true;
520  }
521  return false;
522}
523
524bool APInt::slt(const APInt& RHS) const {
525  assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
526  if (isSingleWord()) {
527    int64_t lhsSext = (int64_t(VAL) << (64-BitWidth)) >> (64-BitWidth);
528    int64_t rhsSext = (int64_t(RHS.VAL) << (64-BitWidth)) >> (64-BitWidth);
529    return lhsSext < rhsSext;
530  }
531
532  APInt lhs(*this);
533  APInt rhs(RHS);
534  bool lhsNeg = isNegative();
535  bool rhsNeg = rhs.isNegative();
536  if (lhsNeg) {
537    // Sign bit is set so perform two's complement to make it positive
538    lhs.flip();
539    lhs++;
540  }
541  if (rhsNeg) {
542    // Sign bit is set so perform two's complement to make it positive
543    rhs.flip();
544    rhs++;
545  }
546
547  // Now we have unsigned values to compare so do the comparison if necessary
548  // based on the negativeness of the values.
549  if (lhsNeg)
550    if (rhsNeg)
551      return lhs.ugt(rhs);
552    else
553      return true;
554  else if (rhsNeg)
555    return false;
556  else
557    return lhs.ult(rhs);
558}
559
560APInt& APInt::set(unsigned bitPosition) {
561  if (isSingleWord())
562    VAL |= maskBit(bitPosition);
563  else
564    pVal[whichWord(bitPosition)] |= maskBit(bitPosition);
565  return *this;
566}
567
568/// Set the given bit to 0 whose position is given as "bitPosition".
569/// @brief Set a given bit to 0.
570APInt& APInt::clear(unsigned bitPosition) {
571  if (isSingleWord())
572    VAL &= ~maskBit(bitPosition);
573  else
574    pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition);
575  return *this;
576}
577
578/// @brief Toggle every bit to its opposite value.
579
580/// Toggle a given bit to its opposite value whose position is given
581/// as "bitPosition".
582/// @brief Toggles a given bit to its opposite value.
583APInt& APInt::flip(unsigned bitPosition) {
584  assert(bitPosition < BitWidth && "Out of the bit-width range!");
585  if ((*this)[bitPosition]) clear(bitPosition);
586  else set(bitPosition);
587  return *this;
588}
589
590unsigned APInt::getBitsNeeded(const char* str, unsigned slen, uint8_t radix) {
591  assert(str != 0 && "Invalid value string");
592  assert(slen > 0 && "Invalid string length");
593
594  // Each computation below needs to know if its negative
595  unsigned isNegative = str[0] == '-';
596  if (isNegative) {
597    slen--;
598    str++;
599  }
600  // For radixes of power-of-two values, the bits required is accurately and
601  // easily computed
602  if (radix == 2)
603    return slen + isNegative;
604  if (radix == 8)
605    return slen * 3 + isNegative;
606  if (radix == 16)
607    return slen * 4 + isNegative;
608
609  // Otherwise it must be radix == 10, the hard case
610  assert(radix == 10 && "Invalid radix");
611
612  // This is grossly inefficient but accurate. We could probably do something
613  // with a computation of roughly slen*64/20 and then adjust by the value of
614  // the first few digits. But, I'm not sure how accurate that could be.
615
616  // Compute a sufficient number of bits that is always large enough but might
617  // be too large. This avoids the assertion in the constructor.
618  unsigned sufficient = slen*64/18;
619
620  // Convert to the actual binary value.
621  APInt tmp(sufficient, str, slen, radix);
622
623  // Compute how many bits are required.
624  return isNegative + tmp.logBase2() + 1;
625}
626
627// From http://www.burtleburtle.net, byBob Jenkins.
628// When targeting x86, both GCC and LLVM seem to recognize this as a
629// rotate instruction.
630#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
631
632// From http://www.burtleburtle.net, by Bob Jenkins.
633#define mix(a,b,c) \
634  { \
635    a -= c;  a ^= rot(c, 4);  c += b; \
636    b -= a;  b ^= rot(a, 6);  a += c; \
637    c -= b;  c ^= rot(b, 8);  b += a; \
638    a -= c;  a ^= rot(c,16);  c += b; \
639    b -= a;  b ^= rot(a,19);  a += c; \
640    c -= b;  c ^= rot(b, 4);  b += a; \
641  }
642
643// From http://www.burtleburtle.net, by Bob Jenkins.
644#define final(a,b,c) \
645  { \
646    c ^= b; c -= rot(b,14); \
647    a ^= c; a -= rot(c,11); \
648    b ^= a; b -= rot(a,25); \
649    c ^= b; c -= rot(b,16); \
650    a ^= c; a -= rot(c,4);  \
651    b ^= a; b -= rot(a,14); \
652    c ^= b; c -= rot(b,24); \
653  }
654
655// hashword() was adapted from http://www.burtleburtle.net, by Bob
656// Jenkins.  k is a pointer to an array of uint32_t values; length is
657// the length of the key, in 32-bit chunks.  This version only handles
658// keys that are a multiple of 32 bits in size.
659static inline uint32_t hashword(const uint64_t *k64, size_t length)
660{
661  const uint32_t *k = reinterpret_cast<const uint32_t *>(k64);
662  uint32_t a,b,c;
663
664  /* Set up the internal state */
665  a = b = c = 0xdeadbeef + (((uint32_t)length)<<2);
666
667  /*------------------------------------------------- handle most of the key */
668  while (length > 3)
669    {
670      a += k[0];
671      b += k[1];
672      c += k[2];
673      mix(a,b,c);
674      length -= 3;
675      k += 3;
676    }
677
678  /*------------------------------------------- handle the last 3 uint32_t's */
679  switch (length) {                  /* all the case statements fall through */
680  case 3 : c+=k[2];
681  case 2 : b+=k[1];
682  case 1 : a+=k[0];
683    final(a,b,c);
684    case 0:     /* case 0: nothing left to add */
685      break;
686    }
687  /*------------------------------------------------------ report the result */
688  return c;
689}
690
691// hashword8() was adapted from http://www.burtleburtle.net, by Bob
692// Jenkins.  This computes a 32-bit hash from one 64-bit word.  When
693// targeting x86 (32 or 64 bit), both LLVM and GCC compile this
694// function into about 35 instructions when inlined.
695static inline uint32_t hashword8(const uint64_t k64)
696{
697  uint32_t a,b,c;
698  a = b = c = 0xdeadbeef + 4;
699  b += k64 >> 32;
700  a += k64 & 0xffffffff;
701  final(a,b,c);
702  return c;
703}
704#undef final
705#undef mix
706#undef rot
707
708uint64_t APInt::getHashValue() const {
709  uint64_t hash;
710  if (isSingleWord())
711    hash = hashword8(VAL);
712  else
713    hash = hashword(pVal, getNumWords()*2);
714  return hash;
715}
716
717/// HiBits - This function returns the high "numBits" bits of this APInt.
718APInt APInt::getHiBits(unsigned numBits) const {
719  return APIntOps::lshr(*this, BitWidth - numBits);
720}
721
722/// LoBits - This function returns the low "numBits" bits of this APInt.
723APInt APInt::getLoBits(unsigned numBits) const {
724  return APIntOps::lshr(APIntOps::shl(*this, BitWidth - numBits),
725                        BitWidth - numBits);
726}
727
728bool APInt::isPowerOf2() const {
729  return (!!*this) && !(*this & (*this - APInt(BitWidth,1)));
730}
731
732unsigned APInt::countLeadingZerosSlowCase() const {
733  unsigned Count = 0;
734  for (unsigned i = getNumWords(); i > 0u; --i) {
735    if (pVal[i-1] == 0)
736      Count += APINT_BITS_PER_WORD;
737    else {
738      Count += CountLeadingZeros_64(pVal[i-1]);
739      break;
740    }
741  }
742  unsigned remainder = BitWidth % APINT_BITS_PER_WORD;
743  if (remainder)
744    Count -= APINT_BITS_PER_WORD - remainder;
745  return std::min(Count, BitWidth);
746}
747
748static unsigned countLeadingOnes_64(uint64_t V, unsigned skip) {
749  unsigned Count = 0;
750  if (skip)
751    V <<= skip;
752  while (V && (V & (1ULL << 63))) {
753    Count++;
754    V <<= 1;
755  }
756  return Count;
757}
758
759unsigned APInt::countLeadingOnes() const {
760  if (isSingleWord())
761    return countLeadingOnes_64(VAL, APINT_BITS_PER_WORD - BitWidth);
762
763  unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
764  unsigned shift;
765  if (!highWordBits) {
766    highWordBits = APINT_BITS_PER_WORD;
767    shift = 0;
768  } else {
769    shift = APINT_BITS_PER_WORD - highWordBits;
770  }
771  int i = getNumWords() - 1;
772  unsigned Count = countLeadingOnes_64(pVal[i], shift);
773  if (Count == highWordBits) {
774    for (i--; i >= 0; --i) {
775      if (pVal[i] == -1ULL)
776        Count += APINT_BITS_PER_WORD;
777      else {
778        Count += countLeadingOnes_64(pVal[i], 0);
779        break;
780      }
781    }
782  }
783  return Count;
784}
785
786unsigned APInt::countTrailingZeros() const {
787  if (isSingleWord())
788    return std::min(unsigned(CountTrailingZeros_64(VAL)), BitWidth);
789  unsigned Count = 0;
790  unsigned i = 0;
791  for (; i < getNumWords() && pVal[i] == 0; ++i)
792    Count += APINT_BITS_PER_WORD;
793  if (i < getNumWords())
794    Count += CountTrailingZeros_64(pVal[i]);
795  return std::min(Count, BitWidth);
796}
797
798unsigned APInt::countTrailingOnesSlowCase() const {
799  unsigned Count = 0;
800  unsigned i = 0;
801  for (; i < getNumWords() && pVal[i] == -1ULL; ++i)
802    Count += APINT_BITS_PER_WORD;
803  if (i < getNumWords())
804    Count += CountTrailingOnes_64(pVal[i]);
805  return std::min(Count, BitWidth);
806}
807
808unsigned APInt::countPopulationSlowCase() const {
809  unsigned Count = 0;
810  for (unsigned i = 0; i < getNumWords(); ++i)
811    Count += CountPopulation_64(pVal[i]);
812  return Count;
813}
814
815APInt APInt::byteSwap() const {
816  assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
817  if (BitWidth == 16)
818    return APInt(BitWidth, ByteSwap_16(uint16_t(VAL)));
819  else if (BitWidth == 32)
820    return APInt(BitWidth, ByteSwap_32(unsigned(VAL)));
821  else if (BitWidth == 48) {
822    unsigned Tmp1 = unsigned(VAL >> 16);
823    Tmp1 = ByteSwap_32(Tmp1);
824    uint16_t Tmp2 = uint16_t(VAL);
825    Tmp2 = ByteSwap_16(Tmp2);
826    return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
827  } else if (BitWidth == 64)
828    return APInt(BitWidth, ByteSwap_64(VAL));
829  else {
830    APInt Result(BitWidth, 0);
831    char *pByte = (char*)Result.pVal;
832    for (unsigned i = 0; i < BitWidth / APINT_WORD_SIZE / 2; ++i) {
833      char Tmp = pByte[i];
834      pByte[i] = pByte[BitWidth / APINT_WORD_SIZE - 1 - i];
835      pByte[BitWidth / APINT_WORD_SIZE - i - 1] = Tmp;
836    }
837    return Result;
838  }
839}
840
841APInt llvm::APIntOps::GreatestCommonDivisor(const APInt& API1,
842                                            const APInt& API2) {
843  APInt A = API1, B = API2;
844  while (!!B) {
845    APInt T = B;
846    B = APIntOps::urem(A, B);
847    A = T;
848  }
849  return A;
850}
851
852APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
853  union {
854    double D;
855    uint64_t I;
856  } T;
857  T.D = Double;
858
859  // Get the sign bit from the highest order bit
860  bool isNeg = T.I >> 63;
861
862  // Get the 11-bit exponent and adjust for the 1023 bit bias
863  int64_t exp = ((T.I >> 52) & 0x7ff) - 1023;
864
865  // If the exponent is negative, the value is < 0 so just return 0.
866  if (exp < 0)
867    return APInt(width, 0u);
868
869  // Extract the mantissa by clearing the top 12 bits (sign + exponent).
870  uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52;
871
872  // If the exponent doesn't shift all bits out of the mantissa
873  if (exp < 52)
874    return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
875                    APInt(width, mantissa >> (52 - exp));
876
877  // If the client didn't provide enough bits for us to shift the mantissa into
878  // then the result is undefined, just return 0
879  if (width <= exp - 52)
880    return APInt(width, 0);
881
882  // Otherwise, we have to shift the mantissa bits up to the right location
883  APInt Tmp(width, mantissa);
884  Tmp = Tmp.shl((unsigned)exp - 52);
885  return isNeg ? -Tmp : Tmp;
886}
887
888/// RoundToDouble - This function converts this APInt to a double.
889/// The layout for double is as following (IEEE Standard 754):
890///  --------------------------------------
891/// |  Sign    Exponent    Fraction    Bias |
892/// |-------------------------------------- |
893/// |  1[63]   11[62-52]   52[51-00]   1023 |
894///  --------------------------------------
895double APInt::roundToDouble(bool isSigned) const {
896
897  // Handle the simple case where the value is contained in one uint64_t.
898  // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
899  if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
900    if (isSigned) {
901      int64_t sext = (int64_t(getWord(0)) << (64-BitWidth)) >> (64-BitWidth);
902      return double(sext);
903    } else
904      return double(getWord(0));
905  }
906
907  // Determine if the value is negative.
908  bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
909
910  // Construct the absolute value if we're negative.
911  APInt Tmp(isNeg ? -(*this) : (*this));
912
913  // Figure out how many bits we're using.
914  unsigned n = Tmp.getActiveBits();
915
916  // The exponent (without bias normalization) is just the number of bits
917  // we are using. Note that the sign bit is gone since we constructed the
918  // absolute value.
919  uint64_t exp = n;
920
921  // Return infinity for exponent overflow
922  if (exp > 1023) {
923    if (!isSigned || !isNeg)
924      return std::numeric_limits<double>::infinity();
925    else
926      return -std::numeric_limits<double>::infinity();
927  }
928  exp += 1023; // Increment for 1023 bias
929
930  // Number of bits in mantissa is 52. To obtain the mantissa value, we must
931  // extract the high 52 bits from the correct words in pVal.
932  uint64_t mantissa;
933  unsigned hiWord = whichWord(n-1);
934  if (hiWord == 0) {
935    mantissa = Tmp.pVal[0];
936    if (n > 52)
937      mantissa >>= n - 52; // shift down, we want the top 52 bits.
938  } else {
939    assert(hiWord > 0 && "huh?");
940    uint64_t hibits = Tmp.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
941    uint64_t lobits = Tmp.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
942    mantissa = hibits | lobits;
943  }
944
945  // The leading bit of mantissa is implicit, so get rid of it.
946  uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
947  union {
948    double D;
949    uint64_t I;
950  } T;
951  T.I = sign | (exp << 52) | mantissa;
952  return T.D;
953}
954
955// Truncate to new width.
956APInt &APInt::trunc(unsigned width) {
957  assert(width < BitWidth && "Invalid APInt Truncate request");
958  assert(width && "Can't truncate to 0 bits");
959  unsigned wordsBefore = getNumWords();
960  BitWidth = width;
961  unsigned wordsAfter = getNumWords();
962  if (wordsBefore != wordsAfter) {
963    if (wordsAfter == 1) {
964      uint64_t *tmp = pVal;
965      VAL = pVal[0];
966      delete [] tmp;
967    } else {
968      uint64_t *newVal = getClearedMemory(wordsAfter);
969      for (unsigned i = 0; i < wordsAfter; ++i)
970        newVal[i] = pVal[i];
971      delete [] pVal;
972      pVal = newVal;
973    }
974  }
975  return clearUnusedBits();
976}
977
978// Sign extend to a new width.
979APInt &APInt::sext(unsigned width) {
980  assert(width > BitWidth && "Invalid APInt SignExtend request");
981  // If the sign bit isn't set, this is the same as zext.
982  if (!isNegative()) {
983    zext(width);
984    return *this;
985  }
986
987  // The sign bit is set. First, get some facts
988  unsigned wordsBefore = getNumWords();
989  unsigned wordBits = BitWidth % APINT_BITS_PER_WORD;
990  BitWidth = width;
991  unsigned wordsAfter = getNumWords();
992
993  // Mask the high order word appropriately
994  if (wordsBefore == wordsAfter) {
995    unsigned newWordBits = width % APINT_BITS_PER_WORD;
996    // The extension is contained to the wordsBefore-1th word.
997    uint64_t mask = ~0ULL;
998    if (newWordBits)
999      mask >>= APINT_BITS_PER_WORD - newWordBits;
1000    mask <<= wordBits;
1001    if (wordsBefore == 1)
1002      VAL |= mask;
1003    else
1004      pVal[wordsBefore-1] |= mask;
1005    return clearUnusedBits();
1006  }
1007
1008  uint64_t mask = wordBits == 0 ? 0 : ~0ULL << wordBits;
1009  uint64_t *newVal = getMemory(wordsAfter);
1010  if (wordsBefore == 1)
1011    newVal[0] = VAL | mask;
1012  else {
1013    for (unsigned i = 0; i < wordsBefore; ++i)
1014      newVal[i] = pVal[i];
1015    newVal[wordsBefore-1] |= mask;
1016  }
1017  for (unsigned i = wordsBefore; i < wordsAfter; i++)
1018    newVal[i] = -1ULL;
1019  if (wordsBefore != 1)
1020    delete [] pVal;
1021  pVal = newVal;
1022  return clearUnusedBits();
1023}
1024
1025//  Zero extend to a new width.
1026APInt &APInt::zext(unsigned width) {
1027  assert(width > BitWidth && "Invalid APInt ZeroExtend request");
1028  unsigned wordsBefore = getNumWords();
1029  BitWidth = width;
1030  unsigned wordsAfter = getNumWords();
1031  if (wordsBefore != wordsAfter) {
1032    uint64_t *newVal = getClearedMemory(wordsAfter);
1033    if (wordsBefore == 1)
1034      newVal[0] = VAL;
1035    else
1036      for (unsigned i = 0; i < wordsBefore; ++i)
1037        newVal[i] = pVal[i];
1038    if (wordsBefore != 1)
1039      delete [] pVal;
1040    pVal = newVal;
1041  }
1042  return *this;
1043}
1044
1045APInt &APInt::zextOrTrunc(unsigned width) {
1046  if (BitWidth < width)
1047    return zext(width);
1048  if (BitWidth > width)
1049    return trunc(width);
1050  return *this;
1051}
1052
1053APInt &APInt::sextOrTrunc(unsigned width) {
1054  if (BitWidth < width)
1055    return sext(width);
1056  if (BitWidth > width)
1057    return trunc(width);
1058  return *this;
1059}
1060
1061/// Arithmetic right-shift this APInt by shiftAmt.
1062/// @brief Arithmetic right-shift function.
1063APInt APInt::ashr(const APInt &shiftAmt) const {
1064  return ashr((unsigned)shiftAmt.getLimitedValue(BitWidth));
1065}
1066
1067/// Arithmetic right-shift this APInt by shiftAmt.
1068/// @brief Arithmetic right-shift function.
1069APInt APInt::ashr(unsigned shiftAmt) const {
1070  assert(shiftAmt <= BitWidth && "Invalid shift amount");
1071  // Handle a degenerate case
1072  if (shiftAmt == 0)
1073    return *this;
1074
1075  // Handle single word shifts with built-in ashr
1076  if (isSingleWord()) {
1077    if (shiftAmt == BitWidth)
1078      return APInt(BitWidth, 0); // undefined
1079    else {
1080      unsigned SignBit = APINT_BITS_PER_WORD - BitWidth;
1081      return APInt(BitWidth,
1082        (((int64_t(VAL) << SignBit) >> SignBit) >> shiftAmt));
1083    }
1084  }
1085
1086  // If all the bits were shifted out, the result is, technically, undefined.
1087  // We return -1 if it was negative, 0 otherwise. We check this early to avoid
1088  // issues in the algorithm below.
1089  if (shiftAmt == BitWidth) {
1090    if (isNegative())
1091      return APInt(BitWidth, -1ULL, true);
1092    else
1093      return APInt(BitWidth, 0);
1094  }
1095
1096  // Create some space for the result.
1097  uint64_t * val = new uint64_t[getNumWords()];
1098
1099  // Compute some values needed by the following shift algorithms
1100  unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD; // bits to shift per word
1101  unsigned offset = shiftAmt / APINT_BITS_PER_WORD; // word offset for shift
1102  unsigned breakWord = getNumWords() - 1 - offset; // last word affected
1103  unsigned bitsInWord = whichBit(BitWidth); // how many bits in last word?
1104  if (bitsInWord == 0)
1105    bitsInWord = APINT_BITS_PER_WORD;
1106
1107  // If we are shifting whole words, just move whole words
1108  if (wordShift == 0) {
1109    // Move the words containing significant bits
1110    for (unsigned i = 0; i <= breakWord; ++i)
1111      val[i] = pVal[i+offset]; // move whole word
1112
1113    // Adjust the top significant word for sign bit fill, if negative
1114    if (isNegative())
1115      if (bitsInWord < APINT_BITS_PER_WORD)
1116        val[breakWord] |= ~0ULL << bitsInWord; // set high bits
1117  } else {
1118    // Shift the low order words
1119    for (unsigned i = 0; i < breakWord; ++i) {
1120      // This combines the shifted corresponding word with the low bits from
1121      // the next word (shifted into this word's high bits).
1122      val[i] = (pVal[i+offset] >> wordShift) |
1123               (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
1124    }
1125
1126    // Shift the break word. In this case there are no bits from the next word
1127    // to include in this word.
1128    val[breakWord] = pVal[breakWord+offset] >> wordShift;
1129
1130    // Deal with sign extenstion in the break word, and possibly the word before
1131    // it.
1132    if (isNegative()) {
1133      if (wordShift > bitsInWord) {
1134        if (breakWord > 0)
1135          val[breakWord-1] |=
1136            ~0ULL << (APINT_BITS_PER_WORD - (wordShift - bitsInWord));
1137        val[breakWord] |= ~0ULL;
1138      } else
1139        val[breakWord] |= (~0ULL << (bitsInWord - wordShift));
1140    }
1141  }
1142
1143  // Remaining words are 0 or -1, just assign them.
1144  uint64_t fillValue = (isNegative() ? -1ULL : 0);
1145  for (unsigned i = breakWord+1; i < getNumWords(); ++i)
1146    val[i] = fillValue;
1147  return APInt(val, BitWidth).clearUnusedBits();
1148}
1149
1150/// Logical right-shift this APInt by shiftAmt.
1151/// @brief Logical right-shift function.
1152APInt APInt::lshr(const APInt &shiftAmt) const {
1153  return lshr((unsigned)shiftAmt.getLimitedValue(BitWidth));
1154}
1155
1156/// Logical right-shift this APInt by shiftAmt.
1157/// @brief Logical right-shift function.
1158APInt APInt::lshr(unsigned shiftAmt) const {
1159  if (isSingleWord()) {
1160    if (shiftAmt == BitWidth)
1161      return APInt(BitWidth, 0);
1162    else
1163      return APInt(BitWidth, this->VAL >> shiftAmt);
1164  }
1165
1166  // If all the bits were shifted out, the result is 0. This avoids issues
1167  // with shifting by the size of the integer type, which produces undefined
1168  // results. We define these "undefined results" to always be 0.
1169  if (shiftAmt == BitWidth)
1170    return APInt(BitWidth, 0);
1171
1172  // If none of the bits are shifted out, the result is *this. This avoids
1173  // issues with shifting by the size of the integer type, which produces
1174  // undefined results in the code below. This is also an optimization.
1175  if (shiftAmt == 0)
1176    return *this;
1177
1178  // Create some space for the result.
1179  uint64_t * val = new uint64_t[getNumWords()];
1180
1181  // If we are shifting less than a word, compute the shift with a simple carry
1182  if (shiftAmt < APINT_BITS_PER_WORD) {
1183    uint64_t carry = 0;
1184    for (int i = getNumWords()-1; i >= 0; --i) {
1185      val[i] = (pVal[i] >> shiftAmt) | carry;
1186      carry = pVal[i] << (APINT_BITS_PER_WORD - shiftAmt);
1187    }
1188    return APInt(val, BitWidth).clearUnusedBits();
1189  }
1190
1191  // Compute some values needed by the remaining shift algorithms
1192  unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
1193  unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
1194
1195  // If we are shifting whole words, just move whole words
1196  if (wordShift == 0) {
1197    for (unsigned i = 0; i < getNumWords() - offset; ++i)
1198      val[i] = pVal[i+offset];
1199    for (unsigned i = getNumWords()-offset; i < getNumWords(); i++)
1200      val[i] = 0;
1201    return APInt(val,BitWidth).clearUnusedBits();
1202  }
1203
1204  // Shift the low order words
1205  unsigned breakWord = getNumWords() - offset -1;
1206  for (unsigned i = 0; i < breakWord; ++i)
1207    val[i] = (pVal[i+offset] >> wordShift) |
1208             (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
1209  // Shift the break word.
1210  val[breakWord] = pVal[breakWord+offset] >> wordShift;
1211
1212  // Remaining words are 0
1213  for (unsigned i = breakWord+1; i < getNumWords(); ++i)
1214    val[i] = 0;
1215  return APInt(val, BitWidth).clearUnusedBits();
1216}
1217
1218/// Left-shift this APInt by shiftAmt.
1219/// @brief Left-shift function.
1220APInt APInt::shl(const APInt &shiftAmt) const {
1221  // It's undefined behavior in C to shift by BitWidth or greater.
1222  return shl((unsigned)shiftAmt.getLimitedValue(BitWidth));
1223}
1224
1225APInt APInt::shlSlowCase(unsigned shiftAmt) const {
1226  // If all the bits were shifted out, the result is 0. This avoids issues
1227  // with shifting by the size of the integer type, which produces undefined
1228  // results. We define these "undefined results" to always be 0.
1229  if (shiftAmt == BitWidth)
1230    return APInt(BitWidth, 0);
1231
1232  // If none of the bits are shifted out, the result is *this. This avoids a
1233  // lshr by the words size in the loop below which can produce incorrect
1234  // results. It also avoids the expensive computation below for a common case.
1235  if (shiftAmt == 0)
1236    return *this;
1237
1238  // Create some space for the result.
1239  uint64_t * val = new uint64_t[getNumWords()];
1240
1241  // If we are shifting less than a word, do it the easy way
1242  if (shiftAmt < APINT_BITS_PER_WORD) {
1243    uint64_t carry = 0;
1244    for (unsigned i = 0; i < getNumWords(); i++) {
1245      val[i] = pVal[i] << shiftAmt | carry;
1246      carry = pVal[i] >> (APINT_BITS_PER_WORD - shiftAmt);
1247    }
1248    return APInt(val, BitWidth).clearUnusedBits();
1249  }
1250
1251  // Compute some values needed by the remaining shift algorithms
1252  unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
1253  unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
1254
1255  // If we are shifting whole words, just move whole words
1256  if (wordShift == 0) {
1257    for (unsigned i = 0; i < offset; i++)
1258      val[i] = 0;
1259    for (unsigned i = offset; i < getNumWords(); i++)
1260      val[i] = pVal[i-offset];
1261    return APInt(val,BitWidth).clearUnusedBits();
1262  }
1263
1264  // Copy whole words from this to Result.
1265  unsigned i = getNumWords() - 1;
1266  for (; i > offset; --i)
1267    val[i] = pVal[i-offset] << wordShift |
1268             pVal[i-offset-1] >> (APINT_BITS_PER_WORD - wordShift);
1269  val[offset] = pVal[0] << wordShift;
1270  for (i = 0; i < offset; ++i)
1271    val[i] = 0;
1272  return APInt(val, BitWidth).clearUnusedBits();
1273}
1274
1275APInt APInt::rotl(const APInt &rotateAmt) const {
1276  return rotl((unsigned)rotateAmt.getLimitedValue(BitWidth));
1277}
1278
1279APInt APInt::rotl(unsigned rotateAmt) const {
1280  if (rotateAmt == 0)
1281    return *this;
1282  // Don't get too fancy, just use existing shift/or facilities
1283  APInt hi(*this);
1284  APInt lo(*this);
1285  hi.shl(rotateAmt);
1286  lo.lshr(BitWidth - rotateAmt);
1287  return hi | lo;
1288}
1289
1290APInt APInt::rotr(const APInt &rotateAmt) const {
1291  return rotr((unsigned)rotateAmt.getLimitedValue(BitWidth));
1292}
1293
1294APInt APInt::rotr(unsigned rotateAmt) const {
1295  if (rotateAmt == 0)
1296    return *this;
1297  // Don't get too fancy, just use existing shift/or facilities
1298  APInt hi(*this);
1299  APInt lo(*this);
1300  lo.lshr(rotateAmt);
1301  hi.shl(BitWidth - rotateAmt);
1302  return hi | lo;
1303}
1304
1305// Square Root - this method computes and returns the square root of "this".
1306// Three mechanisms are used for computation. For small values (<= 5 bits),
1307// a table lookup is done. This gets some performance for common cases. For
1308// values using less than 52 bits, the value is converted to double and then
1309// the libc sqrt function is called. The result is rounded and then converted
1310// back to a uint64_t which is then used to construct the result. Finally,
1311// the Babylonian method for computing square roots is used.
1312APInt APInt::sqrt() const {
1313
1314  // Determine the magnitude of the value.
1315  unsigned magnitude = getActiveBits();
1316
1317  // Use a fast table for some small values. This also gets rid of some
1318  // rounding errors in libc sqrt for small values.
1319  if (magnitude <= 5) {
1320    static const uint8_t results[32] = {
1321      /*     0 */ 0,
1322      /*  1- 2 */ 1, 1,
1323      /*  3- 6 */ 2, 2, 2, 2,
1324      /*  7-12 */ 3, 3, 3, 3, 3, 3,
1325      /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1326      /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1327      /*    31 */ 6
1328    };
1329    return APInt(BitWidth, results[ (isSingleWord() ? VAL : pVal[0]) ]);
1330  }
1331
1332  // If the magnitude of the value fits in less than 52 bits (the precision of
1333  // an IEEE double precision floating point value), then we can use the
1334  // libc sqrt function which will probably use a hardware sqrt computation.
1335  // This should be faster than the algorithm below.
1336  if (magnitude < 52) {
1337#ifdef _MSC_VER
1338    // Amazingly, VC++ doesn't have round().
1339    return APInt(BitWidth,
1340                 uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0]))) + 0.5);
1341#else
1342    return APInt(BitWidth,
1343                 uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0])))));
1344#endif
1345  }
1346
1347  // Okay, all the short cuts are exhausted. We must compute it. The following
1348  // is a classical Babylonian method for computing the square root. This code
1349  // was adapted to APINt from a wikipedia article on such computations.
1350  // See http://www.wikipedia.org/ and go to the page named
1351  // Calculate_an_integer_square_root.
1352  unsigned nbits = BitWidth, i = 4;
1353  APInt testy(BitWidth, 16);
1354  APInt x_old(BitWidth, 1);
1355  APInt x_new(BitWidth, 0);
1356  APInt two(BitWidth, 2);
1357
1358  // Select a good starting value using binary logarithms.
1359  for (;; i += 2, testy = testy.shl(2))
1360    if (i >= nbits || this->ule(testy)) {
1361      x_old = x_old.shl(i / 2);
1362      break;
1363    }
1364
1365  // Use the Babylonian method to arrive at the integer square root:
1366  for (;;) {
1367    x_new = (this->udiv(x_old) + x_old).udiv(two);
1368    if (x_old.ule(x_new))
1369      break;
1370    x_old = x_new;
1371  }
1372
1373  // Make sure we return the closest approximation
1374  // NOTE: The rounding calculation below is correct. It will produce an
1375  // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1376  // determined to be a rounding issue with pari/gp as it begins to use a
1377  // floating point representation after 192 bits. There are no discrepancies
1378  // between this algorithm and pari/gp for bit widths < 192 bits.
1379  APInt square(x_old * x_old);
1380  APInt nextSquare((x_old + 1) * (x_old +1));
1381  if (this->ult(square))
1382    return x_old;
1383  else if (this->ule(nextSquare)) {
1384    APInt midpoint((nextSquare - square).udiv(two));
1385    APInt offset(*this - square);
1386    if (offset.ult(midpoint))
1387      return x_old;
1388    else
1389      return x_old + 1;
1390  } else
1391    llvm_unreachable("Error in APInt::sqrt computation");
1392  return x_old + 1;
1393}
1394
1395/// Computes the multiplicative inverse of this APInt for a given modulo. The
1396/// iterative extended Euclidean algorithm is used to solve for this value,
1397/// however we simplify it to speed up calculating only the inverse, and take
1398/// advantage of div+rem calculations. We also use some tricks to avoid copying
1399/// (potentially large) APInts around.
1400APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1401  assert(ult(modulo) && "This APInt must be smaller than the modulo");
1402
1403  // Using the properties listed at the following web page (accessed 06/21/08):
1404  //   http://www.numbertheory.org/php/euclid.html
1405  // (especially the properties numbered 3, 4 and 9) it can be proved that
1406  // BitWidth bits suffice for all the computations in the algorithm implemented
1407  // below. More precisely, this number of bits suffice if the multiplicative
1408  // inverse exists, but may not suffice for the general extended Euclidean
1409  // algorithm.
1410
1411  APInt r[2] = { modulo, *this };
1412  APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1413  APInt q(BitWidth, 0);
1414
1415  unsigned i;
1416  for (i = 0; r[i^1] != 0; i ^= 1) {
1417    // An overview of the math without the confusing bit-flipping:
1418    // q = r[i-2] / r[i-1]
1419    // r[i] = r[i-2] % r[i-1]
1420    // t[i] = t[i-2] - t[i-1] * q
1421    udivrem(r[i], r[i^1], q, r[i]);
1422    t[i] -= t[i^1] * q;
1423  }
1424
1425  // If this APInt and the modulo are not coprime, there is no multiplicative
1426  // inverse, so return 0. We check this by looking at the next-to-last
1427  // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1428  // algorithm.
1429  if (r[i] != 1)
1430    return APInt(BitWidth, 0);
1431
1432  // The next-to-last t is the multiplicative inverse.  However, we are
1433  // interested in a positive inverse. Calcuate a positive one from a negative
1434  // one if necessary. A simple addition of the modulo suffices because
1435  // abs(t[i]) is known to be less than *this/2 (see the link above).
1436  return t[i].isNegative() ? t[i] + modulo : t[i];
1437}
1438
1439/// Calculate the magic numbers required to implement a signed integer division
1440/// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1441/// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1442/// Warren, Jr., chapter 10.
1443APInt::ms APInt::magic() const {
1444  const APInt& d = *this;
1445  unsigned p;
1446  APInt ad, anc, delta, q1, r1, q2, r2, t;
1447  APInt allOnes = APInt::getAllOnesValue(d.getBitWidth());
1448  APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1449  APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1450  struct ms mag;
1451
1452  ad = d.abs();
1453  t = signedMin + (d.lshr(d.getBitWidth() - 1));
1454  anc = t - 1 - t.urem(ad);   // absolute value of nc
1455  p = d.getBitWidth() - 1;    // initialize p
1456  q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1457  r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1458  q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1459  r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1460  do {
1461    p = p + 1;
1462    q1 = q1<<1;          // update q1 = 2p/abs(nc)
1463    r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1464    if (r1.uge(anc)) {  // must be unsigned comparison
1465      q1 = q1 + 1;
1466      r1 = r1 - anc;
1467    }
1468    q2 = q2<<1;          // update q2 = 2p/abs(d)
1469    r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1470    if (r2.uge(ad)) {   // must be unsigned comparison
1471      q2 = q2 + 1;
1472      r2 = r2 - ad;
1473    }
1474    delta = ad - r2;
1475  } while (q1.ule(delta) || (q1 == delta && r1 == 0));
1476
1477  mag.m = q2 + 1;
1478  if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1479  mag.s = p - d.getBitWidth();          // resulting shift
1480  return mag;
1481}
1482
1483/// Calculate the magic numbers required to implement an unsigned integer
1484/// division by a constant as a sequence of multiplies, adds and shifts.
1485/// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1486/// S. Warren, Jr., chapter 10.
1487APInt::mu APInt::magicu() const {
1488  const APInt& d = *this;
1489  unsigned p;
1490  APInt nc, delta, q1, r1, q2, r2;
1491  struct mu magu;
1492  magu.a = 0;               // initialize "add" indicator
1493  APInt allOnes = APInt::getAllOnesValue(d.getBitWidth());
1494  APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1495  APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1496
1497  nc = allOnes - (-d).urem(d);
1498  p = d.getBitWidth() - 1;  // initialize p
1499  q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1500  r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1501  q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1502  r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1503  do {
1504    p = p + 1;
1505    if (r1.uge(nc - r1)) {
1506      q1 = q1 + q1 + 1;  // update q1
1507      r1 = r1 + r1 - nc; // update r1
1508    }
1509    else {
1510      q1 = q1+q1; // update q1
1511      r1 = r1+r1; // update r1
1512    }
1513    if ((r2 + 1).uge(d - r2)) {
1514      if (q2.uge(signedMax)) magu.a = 1;
1515      q2 = q2+q2 + 1;     // update q2
1516      r2 = r2+r2 + 1 - d; // update r2
1517    }
1518    else {
1519      if (q2.uge(signedMin)) magu.a = 1;
1520      q2 = q2+q2;     // update q2
1521      r2 = r2+r2 + 1; // update r2
1522    }
1523    delta = d - 1 - r2;
1524  } while (p < d.getBitWidth()*2 &&
1525           (q1.ult(delta) || (q1 == delta && r1 == 0)));
1526  magu.m = q2 + 1; // resulting magic number
1527  magu.s = p - d.getBitWidth();  // resulting shift
1528  return magu;
1529}
1530
1531/// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1532/// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1533/// variables here have the same names as in the algorithm. Comments explain
1534/// the algorithm and any deviation from it.
1535static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
1536                     unsigned m, unsigned n) {
1537  assert(u && "Must provide dividend");
1538  assert(v && "Must provide divisor");
1539  assert(q && "Must provide quotient");
1540  assert(u != v && u != q && v != q && "Must us different memory");
1541  assert(n>1 && "n must be > 1");
1542
1543  // Knuth uses the value b as the base of the number system. In our case b
1544  // is 2^31 so we just set it to -1u.
1545  uint64_t b = uint64_t(1) << 32;
1546
1547#if 0
1548  DEBUG(errs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1549  DEBUG(errs() << "KnuthDiv: original:");
1550  DEBUG(for (int i = m+n; i >=0; i--) errs() << " " << u[i]);
1551  DEBUG(errs() << " by");
1552  DEBUG(for (int i = n; i >0; i--) errs() << " " << v[i-1]);
1553  DEBUG(errs() << '\n');
1554#endif
1555  // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1556  // u and v by d. Note that we have taken Knuth's advice here to use a power
1557  // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1558  // 2 allows us to shift instead of multiply and it is easy to determine the
1559  // shift amount from the leading zeros.  We are basically normalizing the u
1560  // and v so that its high bits are shifted to the top of v's range without
1561  // overflow. Note that this can require an extra word in u so that u must
1562  // be of length m+n+1.
1563  unsigned shift = CountLeadingZeros_32(v[n-1]);
1564  unsigned v_carry = 0;
1565  unsigned u_carry = 0;
1566  if (shift) {
1567    for (unsigned i = 0; i < m+n; ++i) {
1568      unsigned u_tmp = u[i] >> (32 - shift);
1569      u[i] = (u[i] << shift) | u_carry;
1570      u_carry = u_tmp;
1571    }
1572    for (unsigned i = 0; i < n; ++i) {
1573      unsigned v_tmp = v[i] >> (32 - shift);
1574      v[i] = (v[i] << shift) | v_carry;
1575      v_carry = v_tmp;
1576    }
1577  }
1578  u[m+n] = u_carry;
1579#if 0
1580  DEBUG(errs() << "KnuthDiv:   normal:");
1581  DEBUG(for (int i = m+n; i >=0; i--) errs() << " " << u[i]);
1582  DEBUG(errs() << " by");
1583  DEBUG(for (int i = n; i >0; i--) errs() << " " << v[i-1]);
1584  DEBUG(errs() << '\n');
1585#endif
1586
1587  // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1588  int j = m;
1589  do {
1590    DEBUG(errs() << "KnuthDiv: quotient digit #" << j << '\n');
1591    // D3. [Calculate q'.].
1592    //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1593    //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1594    // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1595    // qp by 1, inrease rp by v[n-1], and repeat this test if rp < b. The test
1596    // on v[n-2] determines at high speed most of the cases in which the trial
1597    // value qp is one too large, and it eliminates all cases where qp is two
1598    // too large.
1599    uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]);
1600    DEBUG(errs() << "KnuthDiv: dividend == " << dividend << '\n');
1601    uint64_t qp = dividend / v[n-1];
1602    uint64_t rp = dividend % v[n-1];
1603    if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1604      qp--;
1605      rp += v[n-1];
1606      if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1607        qp--;
1608    }
1609    DEBUG(errs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1610
1611    // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1612    // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1613    // consists of a simple multiplication by a one-place number, combined with
1614    // a subtraction.
1615    bool isNeg = false;
1616    for (unsigned i = 0; i < n; ++i) {
1617      uint64_t u_tmp = uint64_t(u[j+i]) | (uint64_t(u[j+i+1]) << 32);
1618      uint64_t subtrahend = uint64_t(qp) * uint64_t(v[i]);
1619      bool borrow = subtrahend > u_tmp;
1620      DEBUG(errs() << "KnuthDiv: u_tmp == " << u_tmp
1621                   << ", subtrahend == " << subtrahend
1622                   << ", borrow = " << borrow << '\n');
1623
1624      uint64_t result = u_tmp - subtrahend;
1625      unsigned k = j + i;
1626      u[k++] = (unsigned)(result & (b-1)); // subtract low word
1627      u[k++] = (unsigned)(result >> 32);   // subtract high word
1628      while (borrow && k <= m+n) { // deal with borrow to the left
1629        borrow = u[k] == 0;
1630        u[k]--;
1631        k++;
1632      }
1633      isNeg |= borrow;
1634      DEBUG(errs() << "KnuthDiv: u[j+i] == " << u[j+i] << ",  u[j+i+1] == " <<
1635                    u[j+i+1] << '\n');
1636    }
1637    DEBUG(errs() << "KnuthDiv: after subtraction:");
1638    DEBUG(for (int i = m+n; i >=0; i--) errs() << " " << u[i]);
1639    DEBUG(errs() << '\n');
1640    // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1641    // this step is actually negative, (u[j+n]...u[j]) should be left as the
1642    // true value plus b**(n+1), namely as the b's complement of
1643    // the true value, and a "borrow" to the left should be remembered.
1644    //
1645    if (isNeg) {
1646      bool carry = true;  // true because b's complement is "complement + 1"
1647      for (unsigned i = 0; i <= m+n; ++i) {
1648        u[i] = ~u[i] + carry; // b's complement
1649        carry = carry && u[i] == 0;
1650      }
1651    }
1652    DEBUG(errs() << "KnuthDiv: after complement:");
1653    DEBUG(for (int i = m+n; i >=0; i--) errs() << " " << u[i]);
1654    DEBUG(errs() << '\n');
1655
1656    // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1657    // negative, go to step D6; otherwise go on to step D7.
1658    q[j] = (unsigned)qp;
1659    if (isNeg) {
1660      // D6. [Add back]. The probability that this step is necessary is very
1661      // small, on the order of only 2/b. Make sure that test data accounts for
1662      // this possibility. Decrease q[j] by 1
1663      q[j]--;
1664      // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1665      // A carry will occur to the left of u[j+n], and it should be ignored
1666      // since it cancels with the borrow that occurred in D4.
1667      bool carry = false;
1668      for (unsigned i = 0; i < n; i++) {
1669        unsigned limit = std::min(u[j+i],v[i]);
1670        u[j+i] += v[i] + carry;
1671        carry = u[j+i] < limit || (carry && u[j+i] == limit);
1672      }
1673      u[j+n] += carry;
1674    }
1675    DEBUG(errs() << "KnuthDiv: after correction:");
1676    DEBUG(for (int i = m+n; i >=0; i--) errs() <<" " << u[i]);
1677    DEBUG(errs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1678
1679  // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1680  } while (--j >= 0);
1681
1682  DEBUG(errs() << "KnuthDiv: quotient:");
1683  DEBUG(for (int i = m; i >=0; i--) errs() <<" " << q[i]);
1684  DEBUG(errs() << '\n');
1685
1686  // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1687  // remainder may be obtained by dividing u[...] by d. If r is non-null we
1688  // compute the remainder (urem uses this).
1689  if (r) {
1690    // The value d is expressed by the "shift" value above since we avoided
1691    // multiplication by d by using a shift left. So, all we have to do is
1692    // shift right here. In order to mak
1693    if (shift) {
1694      unsigned carry = 0;
1695      DEBUG(errs() << "KnuthDiv: remainder:");
1696      for (int i = n-1; i >= 0; i--) {
1697        r[i] = (u[i] >> shift) | carry;
1698        carry = u[i] << (32 - shift);
1699        DEBUG(errs() << " " << r[i]);
1700      }
1701    } else {
1702      for (int i = n-1; i >= 0; i--) {
1703        r[i] = u[i];
1704        DEBUG(errs() << " " << r[i]);
1705      }
1706    }
1707    DEBUG(errs() << '\n');
1708  }
1709#if 0
1710  DEBUG(errs() << '\n');
1711#endif
1712}
1713
1714void APInt::divide(const APInt LHS, unsigned lhsWords,
1715                   const APInt &RHS, unsigned rhsWords,
1716                   APInt *Quotient, APInt *Remainder)
1717{
1718  assert(lhsWords >= rhsWords && "Fractional result");
1719
1720  // First, compose the values into an array of 32-bit words instead of
1721  // 64-bit words. This is a necessity of both the "short division" algorithm
1722  // and the the Knuth "classical algorithm" which requires there to be native
1723  // operations for +, -, and * on an m bit value with an m*2 bit result. We
1724  // can't use 64-bit operands here because we don't have native results of
1725  // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1726  // work on large-endian machines.
1727  uint64_t mask = ~0ull >> (sizeof(unsigned)*CHAR_BIT);
1728  unsigned n = rhsWords * 2;
1729  unsigned m = (lhsWords * 2) - n;
1730
1731  // Allocate space for the temporary values we need either on the stack, if
1732  // it will fit, or on the heap if it won't.
1733  unsigned SPACE[128];
1734  unsigned *U = 0;
1735  unsigned *V = 0;
1736  unsigned *Q = 0;
1737  unsigned *R = 0;
1738  if ((Remainder?4:3)*n+2*m+1 <= 128) {
1739    U = &SPACE[0];
1740    V = &SPACE[m+n+1];
1741    Q = &SPACE[(m+n+1) + n];
1742    if (Remainder)
1743      R = &SPACE[(m+n+1) + n + (m+n)];
1744  } else {
1745    U = new unsigned[m + n + 1];
1746    V = new unsigned[n];
1747    Q = new unsigned[m+n];
1748    if (Remainder)
1749      R = new unsigned[n];
1750  }
1751
1752  // Initialize the dividend
1753  memset(U, 0, (m+n+1)*sizeof(unsigned));
1754  for (unsigned i = 0; i < lhsWords; ++i) {
1755    uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.VAL : LHS.pVal[i]);
1756    U[i * 2] = (unsigned)(tmp & mask);
1757    U[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1758  }
1759  U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1760
1761  // Initialize the divisor
1762  memset(V, 0, (n)*sizeof(unsigned));
1763  for (unsigned i = 0; i < rhsWords; ++i) {
1764    uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.VAL : RHS.pVal[i]);
1765    V[i * 2] = (unsigned)(tmp & mask);
1766    V[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1767  }
1768
1769  // initialize the quotient and remainder
1770  memset(Q, 0, (m+n) * sizeof(unsigned));
1771  if (Remainder)
1772    memset(R, 0, n * sizeof(unsigned));
1773
1774  // Now, adjust m and n for the Knuth division. n is the number of words in
1775  // the divisor. m is the number of words by which the dividend exceeds the
1776  // divisor (i.e. m+n is the length of the dividend). These sizes must not
1777  // contain any zero words or the Knuth algorithm fails.
1778  for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1779    n--;
1780    m++;
1781  }
1782  for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1783    m--;
1784
1785  // If we're left with only a single word for the divisor, Knuth doesn't work
1786  // so we implement the short division algorithm here. This is much simpler
1787  // and faster because we are certain that we can divide a 64-bit quantity
1788  // by a 32-bit quantity at hardware speed and short division is simply a
1789  // series of such operations. This is just like doing short division but we
1790  // are using base 2^32 instead of base 10.
1791  assert(n != 0 && "Divide by zero?");
1792  if (n == 1) {
1793    unsigned divisor = V[0];
1794    unsigned remainder = 0;
1795    for (int i = m+n-1; i >= 0; i--) {
1796      uint64_t partial_dividend = uint64_t(remainder) << 32 | U[i];
1797      if (partial_dividend == 0) {
1798        Q[i] = 0;
1799        remainder = 0;
1800      } else if (partial_dividend < divisor) {
1801        Q[i] = 0;
1802        remainder = (unsigned)partial_dividend;
1803      } else if (partial_dividend == divisor) {
1804        Q[i] = 1;
1805        remainder = 0;
1806      } else {
1807        Q[i] = (unsigned)(partial_dividend / divisor);
1808        remainder = (unsigned)(partial_dividend - (Q[i] * divisor));
1809      }
1810    }
1811    if (R)
1812      R[0] = remainder;
1813  } else {
1814    // Now we're ready to invoke the Knuth classical divide algorithm. In this
1815    // case n > 1.
1816    KnuthDiv(U, V, Q, R, m, n);
1817  }
1818
1819  // If the caller wants the quotient
1820  if (Quotient) {
1821    // Set up the Quotient value's memory.
1822    if (Quotient->BitWidth != LHS.BitWidth) {
1823      if (Quotient->isSingleWord())
1824        Quotient->VAL = 0;
1825      else
1826        delete [] Quotient->pVal;
1827      Quotient->BitWidth = LHS.BitWidth;
1828      if (!Quotient->isSingleWord())
1829        Quotient->pVal = getClearedMemory(Quotient->getNumWords());
1830    } else
1831      Quotient->clear();
1832
1833    // The quotient is in Q. Reconstitute the quotient into Quotient's low
1834    // order words.
1835    if (lhsWords == 1) {
1836      uint64_t tmp =
1837        uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2));
1838      if (Quotient->isSingleWord())
1839        Quotient->VAL = tmp;
1840      else
1841        Quotient->pVal[0] = tmp;
1842    } else {
1843      assert(!Quotient->isSingleWord() && "Quotient APInt not large enough");
1844      for (unsigned i = 0; i < lhsWords; ++i)
1845        Quotient->pVal[i] =
1846          uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1847    }
1848  }
1849
1850  // If the caller wants the remainder
1851  if (Remainder) {
1852    // Set up the Remainder value's memory.
1853    if (Remainder->BitWidth != RHS.BitWidth) {
1854      if (Remainder->isSingleWord())
1855        Remainder->VAL = 0;
1856      else
1857        delete [] Remainder->pVal;
1858      Remainder->BitWidth = RHS.BitWidth;
1859      if (!Remainder->isSingleWord())
1860        Remainder->pVal = getClearedMemory(Remainder->getNumWords());
1861    } else
1862      Remainder->clear();
1863
1864    // The remainder is in R. Reconstitute the remainder into Remainder's low
1865    // order words.
1866    if (rhsWords == 1) {
1867      uint64_t tmp =
1868        uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2));
1869      if (Remainder->isSingleWord())
1870        Remainder->VAL = tmp;
1871      else
1872        Remainder->pVal[0] = tmp;
1873    } else {
1874      assert(!Remainder->isSingleWord() && "Remainder APInt not large enough");
1875      for (unsigned i = 0; i < rhsWords; ++i)
1876        Remainder->pVal[i] =
1877          uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1878    }
1879  }
1880
1881  // Clean up the memory we allocated.
1882  if (U != &SPACE[0]) {
1883    delete [] U;
1884    delete [] V;
1885    delete [] Q;
1886    delete [] R;
1887  }
1888}
1889
1890APInt APInt::udiv(const APInt& RHS) const {
1891  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1892
1893  // First, deal with the easy case
1894  if (isSingleWord()) {
1895    assert(RHS.VAL != 0 && "Divide by zero?");
1896    return APInt(BitWidth, VAL / RHS.VAL);
1897  }
1898
1899  // Get some facts about the LHS and RHS number of bits and words
1900  unsigned rhsBits = RHS.getActiveBits();
1901  unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1902  assert(rhsWords && "Divided by zero???");
1903  unsigned lhsBits = this->getActiveBits();
1904  unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
1905
1906  // Deal with some degenerate cases
1907  if (!lhsWords)
1908    // 0 / X ===> 0
1909    return APInt(BitWidth, 0);
1910  else if (lhsWords < rhsWords || this->ult(RHS)) {
1911    // X / Y ===> 0, iff X < Y
1912    return APInt(BitWidth, 0);
1913  } else if (*this == RHS) {
1914    // X / X ===> 1
1915    return APInt(BitWidth, 1);
1916  } else if (lhsWords == 1 && rhsWords == 1) {
1917    // All high words are zero, just use native divide
1918    return APInt(BitWidth, this->pVal[0] / RHS.pVal[0]);
1919  }
1920
1921  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1922  APInt Quotient(1,0); // to hold result.
1923  divide(*this, lhsWords, RHS, rhsWords, &Quotient, 0);
1924  return Quotient;
1925}
1926
1927APInt APInt::urem(const APInt& RHS) const {
1928  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1929  if (isSingleWord()) {
1930    assert(RHS.VAL != 0 && "Remainder by zero?");
1931    return APInt(BitWidth, VAL % RHS.VAL);
1932  }
1933
1934  // Get some facts about the LHS
1935  unsigned lhsBits = getActiveBits();
1936  unsigned lhsWords = !lhsBits ? 0 : (whichWord(lhsBits - 1) + 1);
1937
1938  // Get some facts about the RHS
1939  unsigned rhsBits = RHS.getActiveBits();
1940  unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1941  assert(rhsWords && "Performing remainder operation by zero ???");
1942
1943  // Check the degenerate cases
1944  if (lhsWords == 0) {
1945    // 0 % Y ===> 0
1946    return APInt(BitWidth, 0);
1947  } else if (lhsWords < rhsWords || this->ult(RHS)) {
1948    // X % Y ===> X, iff X < Y
1949    return *this;
1950  } else if (*this == RHS) {
1951    // X % X == 0;
1952    return APInt(BitWidth, 0);
1953  } else if (lhsWords == 1) {
1954    // All high words are zero, just use native remainder
1955    return APInt(BitWidth, pVal[0] % RHS.pVal[0]);
1956  }
1957
1958  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1959  APInt Remainder(1,0);
1960  divide(*this, lhsWords, RHS, rhsWords, 0, &Remainder);
1961  return Remainder;
1962}
1963
1964void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1965                    APInt &Quotient, APInt &Remainder) {
1966  // Get some size facts about the dividend and divisor
1967  unsigned lhsBits  = LHS.getActiveBits();
1968  unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
1969  unsigned rhsBits  = RHS.getActiveBits();
1970  unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1971
1972  // Check the degenerate cases
1973  if (lhsWords == 0) {
1974    Quotient = 0;                // 0 / Y ===> 0
1975    Remainder = 0;               // 0 % Y ===> 0
1976    return;
1977  }
1978
1979  if (lhsWords < rhsWords || LHS.ult(RHS)) {
1980    Quotient = 0;               // X / Y ===> 0, iff X < Y
1981    Remainder = LHS;            // X % Y ===> X, iff X < Y
1982    return;
1983  }
1984
1985  if (LHS == RHS) {
1986    Quotient  = 1;              // X / X ===> 1
1987    Remainder = 0;              // X % X ===> 0;
1988    return;
1989  }
1990
1991  if (lhsWords == 1 && rhsWords == 1) {
1992    // There is only one word to consider so use the native versions.
1993    uint64_t lhsValue = LHS.isSingleWord() ? LHS.VAL : LHS.pVal[0];
1994    uint64_t rhsValue = RHS.isSingleWord() ? RHS.VAL : RHS.pVal[0];
1995    Quotient = APInt(LHS.getBitWidth(), lhsValue / rhsValue);
1996    Remainder = APInt(LHS.getBitWidth(), lhsValue % rhsValue);
1997    return;
1998  }
1999
2000  // Okay, lets do it the long way
2001  divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder);
2002}
2003
2004void APInt::fromString(unsigned numbits, const char *str, unsigned slen,
2005                       uint8_t radix) {
2006  // Check our assumptions here
2007  assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
2008         "Radix should be 2, 8, 10, or 16!");
2009  assert(str && "String is null?");
2010  bool isNeg = str[0] == '-';
2011  if (isNeg)
2012    str++, slen--;
2013  assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2014  assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2015  assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2016  assert((((slen-1)*64)/22 <= numbits || radix != 10) && "Insufficient bit width");
2017
2018  // Allocate memory
2019  if (!isSingleWord())
2020    pVal = getClearedMemory(getNumWords());
2021
2022  // Figure out if we can shift instead of multiply
2023  unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2024
2025  // Set up an APInt for the digit to add outside the loop so we don't
2026  // constantly construct/destruct it.
2027  APInt apdigit(getBitWidth(), 0);
2028  APInt apradix(getBitWidth(), radix);
2029
2030  // Enter digit traversal loop
2031  for (unsigned i = 0; i < slen; i++) {
2032    // Get a digit
2033    unsigned digit = 0;
2034    char cdigit = str[i];
2035    if (radix == 16) {
2036      if (!isxdigit(cdigit))
2037        llvm_unreachable("Invalid hex digit in string");
2038      if (isdigit(cdigit))
2039        digit = cdigit - '0';
2040      else if (cdigit >= 'a')
2041        digit = cdigit - 'a' + 10;
2042      else if (cdigit >= 'A')
2043        digit = cdigit - 'A' + 10;
2044      else
2045        llvm_unreachable("huh? we shouldn't get here");
2046    } else if (isdigit(cdigit)) {
2047      digit = cdigit - '0';
2048      assert((radix == 10 ||
2049              (radix == 8 && digit != 8 && digit != 9) ||
2050              (radix == 2 && (digit == 0 || digit == 1))) &&
2051             "Invalid digit in string for given radix");
2052    } else {
2053      llvm_unreachable("Invalid character in digit string");
2054    }
2055
2056    // Shift or multiply the value by the radix
2057    if (slen > 1) {
2058      if (shift)
2059        *this <<= shift;
2060      else
2061        *this *= apradix;
2062    }
2063
2064    // Add in the digit we just interpreted
2065    if (apdigit.isSingleWord())
2066      apdigit.VAL = digit;
2067    else
2068      apdigit.pVal[0] = digit;
2069    *this += apdigit;
2070  }
2071  // If its negative, put it in two's complement form
2072  if (isNeg) {
2073    (*this)--;
2074    this->flip();
2075  }
2076}
2077
2078void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2079                     bool Signed) const {
2080  assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2) &&
2081         "Radix should be 2, 8, 10, or 16!");
2082
2083  // First, check for a zero value and just short circuit the logic below.
2084  if (*this == 0) {
2085    Str.push_back('0');
2086    return;
2087  }
2088
2089  static const char Digits[] = "0123456789ABCDEF";
2090
2091  if (isSingleWord()) {
2092    char Buffer[65];
2093    char *BufPtr = Buffer+65;
2094
2095    uint64_t N;
2096    if (Signed) {
2097      int64_t I = getSExtValue();
2098      if (I < 0) {
2099        Str.push_back('-');
2100        I = -I;
2101      }
2102      N = I;
2103    } else {
2104      N = getZExtValue();
2105    }
2106
2107    while (N) {
2108      *--BufPtr = Digits[N % Radix];
2109      N /= Radix;
2110    }
2111    Str.append(BufPtr, Buffer+65);
2112    return;
2113  }
2114
2115  APInt Tmp(*this);
2116
2117  if (Signed && isNegative()) {
2118    // They want to print the signed version and it is a negative value
2119    // Flip the bits and add one to turn it into the equivalent positive
2120    // value and put a '-' in the result.
2121    Tmp.flip();
2122    Tmp++;
2123    Str.push_back('-');
2124  }
2125
2126  // We insert the digits backward, then reverse them to get the right order.
2127  unsigned StartDig = Str.size();
2128
2129  // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2130  // because the number of bits per digit (1, 3 and 4 respectively) divides
2131  // equaly.  We just shift until the value is zero.
2132  if (Radix != 10) {
2133    // Just shift tmp right for each digit width until it becomes zero
2134    unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2135    unsigned MaskAmt = Radix - 1;
2136
2137    while (Tmp != 0) {
2138      unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2139      Str.push_back(Digits[Digit]);
2140      Tmp = Tmp.lshr(ShiftAmt);
2141    }
2142  } else {
2143    APInt divisor(4, 10);
2144    while (Tmp != 0) {
2145      APInt APdigit(1, 0);
2146      APInt tmp2(Tmp.getBitWidth(), 0);
2147      divide(Tmp, Tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2,
2148             &APdigit);
2149      unsigned Digit = (unsigned)APdigit.getZExtValue();
2150      assert(Digit < Radix && "divide failed");
2151      Str.push_back(Digits[Digit]);
2152      Tmp = tmp2;
2153    }
2154  }
2155
2156  // Reverse the digits before returning.
2157  std::reverse(Str.begin()+StartDig, Str.end());
2158}
2159
2160/// toString - This returns the APInt as a std::string.  Note that this is an
2161/// inefficient method.  It is better to pass in a SmallVector/SmallString
2162/// to the methods above.
2163std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2164  SmallString<40> S;
2165  toString(S, Radix, Signed);
2166  return S.c_str();
2167}
2168
2169
2170void APInt::dump() const {
2171  SmallString<40> S, U;
2172  this->toStringUnsigned(U);
2173  this->toStringSigned(S);
2174  fprintf(stderr, "APInt(%db, %su %ss)", BitWidth, U.c_str(), S.c_str());
2175}
2176
2177void APInt::print(raw_ostream &OS, bool isSigned) const {
2178  SmallString<40> S;
2179  this->toString(S, 10, isSigned);
2180  OS << S.c_str();
2181}
2182
2183std::ostream &llvm::operator<<(std::ostream &o, const APInt &I) {
2184  raw_os_ostream OS(o);
2185  OS << I;
2186  return o;
2187}
2188
2189// This implements a variety of operations on a representation of
2190// arbitrary precision, two's-complement, bignum integer values.
2191
2192/* Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2193   and unrestricting assumption.  */
2194#define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
2195COMPILE_TIME_ASSERT(integerPartWidth % 2 == 0);
2196
2197/* Some handy functions local to this file.  */
2198namespace {
2199
2200  /* Returns the integer part with the least significant BITS set.
2201     BITS cannot be zero.  */
2202  static inline integerPart
2203  lowBitMask(unsigned int bits)
2204  {
2205    assert (bits != 0 && bits <= integerPartWidth);
2206
2207    return ~(integerPart) 0 >> (integerPartWidth - bits);
2208  }
2209
2210  /* Returns the value of the lower half of PART.  */
2211  static inline integerPart
2212  lowHalf(integerPart part)
2213  {
2214    return part & lowBitMask(integerPartWidth / 2);
2215  }
2216
2217  /* Returns the value of the upper half of PART.  */
2218  static inline integerPart
2219  highHalf(integerPart part)
2220  {
2221    return part >> (integerPartWidth / 2);
2222  }
2223
2224  /* Returns the bit number of the most significant set bit of a part.
2225     If the input number has no bits set -1U is returned.  */
2226  static unsigned int
2227  partMSB(integerPart value)
2228  {
2229    unsigned int n, msb;
2230
2231    if (value == 0)
2232      return -1U;
2233
2234    n = integerPartWidth / 2;
2235
2236    msb = 0;
2237    do {
2238      if (value >> n) {
2239        value >>= n;
2240        msb += n;
2241      }
2242
2243      n >>= 1;
2244    } while (n);
2245
2246    return msb;
2247  }
2248
2249  /* Returns the bit number of the least significant set bit of a
2250     part.  If the input number has no bits set -1U is returned.  */
2251  static unsigned int
2252  partLSB(integerPart value)
2253  {
2254    unsigned int n, lsb;
2255
2256    if (value == 0)
2257      return -1U;
2258
2259    lsb = integerPartWidth - 1;
2260    n = integerPartWidth / 2;
2261
2262    do {
2263      if (value << n) {
2264        value <<= n;
2265        lsb -= n;
2266      }
2267
2268      n >>= 1;
2269    } while (n);
2270
2271    return lsb;
2272  }
2273}
2274
2275/* Sets the least significant part of a bignum to the input value, and
2276   zeroes out higher parts.  */
2277void
2278APInt::tcSet(integerPart *dst, integerPart part, unsigned int parts)
2279{
2280  unsigned int i;
2281
2282  assert (parts > 0);
2283
2284  dst[0] = part;
2285  for(i = 1; i < parts; i++)
2286    dst[i] = 0;
2287}
2288
2289/* Assign one bignum to another.  */
2290void
2291APInt::tcAssign(integerPart *dst, const integerPart *src, unsigned int parts)
2292{
2293  unsigned int i;
2294
2295  for(i = 0; i < parts; i++)
2296    dst[i] = src[i];
2297}
2298
2299/* Returns true if a bignum is zero, false otherwise.  */
2300bool
2301APInt::tcIsZero(const integerPart *src, unsigned int parts)
2302{
2303  unsigned int i;
2304
2305  for(i = 0; i < parts; i++)
2306    if (src[i])
2307      return false;
2308
2309  return true;
2310}
2311
2312/* Extract the given bit of a bignum; returns 0 or 1.  */
2313int
2314APInt::tcExtractBit(const integerPart *parts, unsigned int bit)
2315{
2316  return(parts[bit / integerPartWidth]
2317         & ((integerPart) 1 << bit % integerPartWidth)) != 0;
2318}
2319
2320/* Set the given bit of a bignum.  */
2321void
2322APInt::tcSetBit(integerPart *parts, unsigned int bit)
2323{
2324  parts[bit / integerPartWidth] |= (integerPart) 1 << (bit % integerPartWidth);
2325}
2326
2327/* Returns the bit number of the least significant set bit of a
2328   number.  If the input number has no bits set -1U is returned.  */
2329unsigned int
2330APInt::tcLSB(const integerPart *parts, unsigned int n)
2331{
2332  unsigned int i, lsb;
2333
2334  for(i = 0; i < n; i++) {
2335      if (parts[i] != 0) {
2336          lsb = partLSB(parts[i]);
2337
2338          return lsb + i * integerPartWidth;
2339      }
2340  }
2341
2342  return -1U;
2343}
2344
2345/* Returns the bit number of the most significant set bit of a number.
2346   If the input number has no bits set -1U is returned.  */
2347unsigned int
2348APInt::tcMSB(const integerPart *parts, unsigned int n)
2349{
2350  unsigned int msb;
2351
2352  do {
2353      --n;
2354
2355      if (parts[n] != 0) {
2356          msb = partMSB(parts[n]);
2357
2358          return msb + n * integerPartWidth;
2359      }
2360  } while (n);
2361
2362  return -1U;
2363}
2364
2365/* Copy the bit vector of width srcBITS from SRC, starting at bit
2366   srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2367   the least significant bit of DST.  All high bits above srcBITS in
2368   DST are zero-filled.  */
2369void
2370APInt::tcExtract(integerPart *dst, unsigned int dstCount,const integerPart *src,
2371                 unsigned int srcBits, unsigned int srcLSB)
2372{
2373  unsigned int firstSrcPart, dstParts, shift, n;
2374
2375  dstParts = (srcBits + integerPartWidth - 1) / integerPartWidth;
2376  assert (dstParts <= dstCount);
2377
2378  firstSrcPart = srcLSB / integerPartWidth;
2379  tcAssign (dst, src + firstSrcPart, dstParts);
2380
2381  shift = srcLSB % integerPartWidth;
2382  tcShiftRight (dst, dstParts, shift);
2383
2384  /* We now have (dstParts * integerPartWidth - shift) bits from SRC
2385     in DST.  If this is less that srcBits, append the rest, else
2386     clear the high bits.  */
2387  n = dstParts * integerPartWidth - shift;
2388  if (n < srcBits) {
2389    integerPart mask = lowBitMask (srcBits - n);
2390    dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2391                          << n % integerPartWidth);
2392  } else if (n > srcBits) {
2393    if (srcBits % integerPartWidth)
2394      dst[dstParts - 1] &= lowBitMask (srcBits % integerPartWidth);
2395  }
2396
2397  /* Clear high parts.  */
2398  while (dstParts < dstCount)
2399    dst[dstParts++] = 0;
2400}
2401
2402/* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2403integerPart
2404APInt::tcAdd(integerPart *dst, const integerPart *rhs,
2405             integerPart c, unsigned int parts)
2406{
2407  unsigned int i;
2408
2409  assert(c <= 1);
2410
2411  for(i = 0; i < parts; i++) {
2412    integerPart l;
2413
2414    l = dst[i];
2415    if (c) {
2416      dst[i] += rhs[i] + 1;
2417      c = (dst[i] <= l);
2418    } else {
2419      dst[i] += rhs[i];
2420      c = (dst[i] < l);
2421    }
2422  }
2423
2424  return c;
2425}
2426
2427/* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2428integerPart
2429APInt::tcSubtract(integerPart *dst, const integerPart *rhs,
2430                  integerPart c, unsigned int parts)
2431{
2432  unsigned int i;
2433
2434  assert(c <= 1);
2435
2436  for(i = 0; i < parts; i++) {
2437    integerPart l;
2438
2439    l = dst[i];
2440    if (c) {
2441      dst[i] -= rhs[i] + 1;
2442      c = (dst[i] >= l);
2443    } else {
2444      dst[i] -= rhs[i];
2445      c = (dst[i] > l);
2446    }
2447  }
2448
2449  return c;
2450}
2451
2452/* Negate a bignum in-place.  */
2453void
2454APInt::tcNegate(integerPart *dst, unsigned int parts)
2455{
2456  tcComplement(dst, parts);
2457  tcIncrement(dst, parts);
2458}
2459
2460/*  DST += SRC * MULTIPLIER + CARRY   if add is true
2461    DST  = SRC * MULTIPLIER + CARRY   if add is false
2462
2463    Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2464    they must start at the same point, i.e. DST == SRC.
2465
2466    If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2467    returned.  Otherwise DST is filled with the least significant
2468    DSTPARTS parts of the result, and if all of the omitted higher
2469    parts were zero return zero, otherwise overflow occurred and
2470    return one.  */
2471int
2472APInt::tcMultiplyPart(integerPart *dst, const integerPart *src,
2473                      integerPart multiplier, integerPart carry,
2474                      unsigned int srcParts, unsigned int dstParts,
2475                      bool add)
2476{
2477  unsigned int i, n;
2478
2479  /* Otherwise our writes of DST kill our later reads of SRC.  */
2480  assert(dst <= src || dst >= src + srcParts);
2481  assert(dstParts <= srcParts + 1);
2482
2483  /* N loops; minimum of dstParts and srcParts.  */
2484  n = dstParts < srcParts ? dstParts: srcParts;
2485
2486  for(i = 0; i < n; i++) {
2487    integerPart low, mid, high, srcPart;
2488
2489      /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2490
2491         This cannot overflow, because
2492
2493         (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2494
2495         which is less than n^2.  */
2496
2497    srcPart = src[i];
2498
2499    if (multiplier == 0 || srcPart == 0)        {
2500      low = carry;
2501      high = 0;
2502    } else {
2503      low = lowHalf(srcPart) * lowHalf(multiplier);
2504      high = highHalf(srcPart) * highHalf(multiplier);
2505
2506      mid = lowHalf(srcPart) * highHalf(multiplier);
2507      high += highHalf(mid);
2508      mid <<= integerPartWidth / 2;
2509      if (low + mid < low)
2510        high++;
2511      low += mid;
2512
2513      mid = highHalf(srcPart) * lowHalf(multiplier);
2514      high += highHalf(mid);
2515      mid <<= integerPartWidth / 2;
2516      if (low + mid < low)
2517        high++;
2518      low += mid;
2519
2520      /* Now add carry.  */
2521      if (low + carry < low)
2522        high++;
2523      low += carry;
2524    }
2525
2526    if (add) {
2527      /* And now DST[i], and store the new low part there.  */
2528      if (low + dst[i] < low)
2529        high++;
2530      dst[i] += low;
2531    } else
2532      dst[i] = low;
2533
2534    carry = high;
2535  }
2536
2537  if (i < dstParts) {
2538    /* Full multiplication, there is no overflow.  */
2539    assert(i + 1 == dstParts);
2540    dst[i] = carry;
2541    return 0;
2542  } else {
2543    /* We overflowed if there is carry.  */
2544    if (carry)
2545      return 1;
2546
2547    /* We would overflow if any significant unwritten parts would be
2548       non-zero.  This is true if any remaining src parts are non-zero
2549       and the multiplier is non-zero.  */
2550    if (multiplier)
2551      for(; i < srcParts; i++)
2552        if (src[i])
2553          return 1;
2554
2555    /* We fitted in the narrow destination.  */
2556    return 0;
2557  }
2558}
2559
2560/* DST = LHS * RHS, where DST has the same width as the operands and
2561   is filled with the least significant parts of the result.  Returns
2562   one if overflow occurred, otherwise zero.  DST must be disjoint
2563   from both operands.  */
2564int
2565APInt::tcMultiply(integerPart *dst, const integerPart *lhs,
2566                  const integerPart *rhs, unsigned int parts)
2567{
2568  unsigned int i;
2569  int overflow;
2570
2571  assert(dst != lhs && dst != rhs);
2572
2573  overflow = 0;
2574  tcSet(dst, 0, parts);
2575
2576  for(i = 0; i < parts; i++)
2577    overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2578                               parts - i, true);
2579
2580  return overflow;
2581}
2582
2583/* DST = LHS * RHS, where DST has width the sum of the widths of the
2584   operands.  No overflow occurs.  DST must be disjoint from both
2585   operands.  Returns the number of parts required to hold the
2586   result.  */
2587unsigned int
2588APInt::tcFullMultiply(integerPart *dst, const integerPart *lhs,
2589                      const integerPart *rhs, unsigned int lhsParts,
2590                      unsigned int rhsParts)
2591{
2592  /* Put the narrower number on the LHS for less loops below.  */
2593  if (lhsParts > rhsParts) {
2594    return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2595  } else {
2596    unsigned int n;
2597
2598    assert(dst != lhs && dst != rhs);
2599
2600    tcSet(dst, 0, rhsParts);
2601
2602    for(n = 0; n < lhsParts; n++)
2603      tcMultiplyPart(&dst[n], rhs, lhs[n], 0, rhsParts, rhsParts + 1, true);
2604
2605    n = lhsParts + rhsParts;
2606
2607    return n - (dst[n - 1] == 0);
2608  }
2609}
2610
2611/* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2612   Otherwise set LHS to LHS / RHS with the fractional part discarded,
2613   set REMAINDER to the remainder, return zero.  i.e.
2614
2615   OLD_LHS = RHS * LHS + REMAINDER
2616
2617   SCRATCH is a bignum of the same size as the operands and result for
2618   use by the routine; its contents need not be initialized and are
2619   destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2620*/
2621int
2622APInt::tcDivide(integerPart *lhs, const integerPart *rhs,
2623                integerPart *remainder, integerPart *srhs,
2624                unsigned int parts)
2625{
2626  unsigned int n, shiftCount;
2627  integerPart mask;
2628
2629  assert(lhs != remainder && lhs != srhs && remainder != srhs);
2630
2631  shiftCount = tcMSB(rhs, parts) + 1;
2632  if (shiftCount == 0)
2633    return true;
2634
2635  shiftCount = parts * integerPartWidth - shiftCount;
2636  n = shiftCount / integerPartWidth;
2637  mask = (integerPart) 1 << (shiftCount % integerPartWidth);
2638
2639  tcAssign(srhs, rhs, parts);
2640  tcShiftLeft(srhs, parts, shiftCount);
2641  tcAssign(remainder, lhs, parts);
2642  tcSet(lhs, 0, parts);
2643
2644  /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2645     the total.  */
2646  for(;;) {
2647      int compare;
2648
2649      compare = tcCompare(remainder, srhs, parts);
2650      if (compare >= 0) {
2651        tcSubtract(remainder, srhs, 0, parts);
2652        lhs[n] |= mask;
2653      }
2654
2655      if (shiftCount == 0)
2656        break;
2657      shiftCount--;
2658      tcShiftRight(srhs, parts, 1);
2659      if ((mask >>= 1) == 0)
2660        mask = (integerPart) 1 << (integerPartWidth - 1), n--;
2661  }
2662
2663  return false;
2664}
2665
2666/* Shift a bignum left COUNT bits in-place.  Shifted in bits are zero.
2667   There are no restrictions on COUNT.  */
2668void
2669APInt::tcShiftLeft(integerPart *dst, unsigned int parts, unsigned int count)
2670{
2671  if (count) {
2672    unsigned int jump, shift;
2673
2674    /* Jump is the inter-part jump; shift is is intra-part shift.  */
2675    jump = count / integerPartWidth;
2676    shift = count % integerPartWidth;
2677
2678    while (parts > jump) {
2679      integerPart part;
2680
2681      parts--;
2682
2683      /* dst[i] comes from the two parts src[i - jump] and, if we have
2684         an intra-part shift, src[i - jump - 1].  */
2685      part = dst[parts - jump];
2686      if (shift) {
2687        part <<= shift;
2688        if (parts >= jump + 1)
2689          part |= dst[parts - jump - 1] >> (integerPartWidth - shift);
2690      }
2691
2692      dst[parts] = part;
2693    }
2694
2695    while (parts > 0)
2696      dst[--parts] = 0;
2697  }
2698}
2699
2700/* Shift a bignum right COUNT bits in-place.  Shifted in bits are
2701   zero.  There are no restrictions on COUNT.  */
2702void
2703APInt::tcShiftRight(integerPart *dst, unsigned int parts, unsigned int count)
2704{
2705  if (count) {
2706    unsigned int i, jump, shift;
2707
2708    /* Jump is the inter-part jump; shift is is intra-part shift.  */
2709    jump = count / integerPartWidth;
2710    shift = count % integerPartWidth;
2711
2712    /* Perform the shift.  This leaves the most significant COUNT bits
2713       of the result at zero.  */
2714    for(i = 0; i < parts; i++) {
2715      integerPart part;
2716
2717      if (i + jump >= parts) {
2718        part = 0;
2719      } else {
2720        part = dst[i + jump];
2721        if (shift) {
2722          part >>= shift;
2723          if (i + jump + 1 < parts)
2724            part |= dst[i + jump + 1] << (integerPartWidth - shift);
2725        }
2726      }
2727
2728      dst[i] = part;
2729    }
2730  }
2731}
2732
2733/* Bitwise and of two bignums.  */
2734void
2735APInt::tcAnd(integerPart *dst, const integerPart *rhs, unsigned int parts)
2736{
2737  unsigned int i;
2738
2739  for(i = 0; i < parts; i++)
2740    dst[i] &= rhs[i];
2741}
2742
2743/* Bitwise inclusive or of two bignums.  */
2744void
2745APInt::tcOr(integerPart *dst, const integerPart *rhs, unsigned int parts)
2746{
2747  unsigned int i;
2748
2749  for(i = 0; i < parts; i++)
2750    dst[i] |= rhs[i];
2751}
2752
2753/* Bitwise exclusive or of two bignums.  */
2754void
2755APInt::tcXor(integerPart *dst, const integerPart *rhs, unsigned int parts)
2756{
2757  unsigned int i;
2758
2759  for(i = 0; i < parts; i++)
2760    dst[i] ^= rhs[i];
2761}
2762
2763/* Complement a bignum in-place.  */
2764void
2765APInt::tcComplement(integerPart *dst, unsigned int parts)
2766{
2767  unsigned int i;
2768
2769  for(i = 0; i < parts; i++)
2770    dst[i] = ~dst[i];
2771}
2772
2773/* Comparison (unsigned) of two bignums.  */
2774int
2775APInt::tcCompare(const integerPart *lhs, const integerPart *rhs,
2776                 unsigned int parts)
2777{
2778  while (parts) {
2779      parts--;
2780      if (lhs[parts] == rhs[parts])
2781        continue;
2782
2783      if (lhs[parts] > rhs[parts])
2784        return 1;
2785      else
2786        return -1;
2787    }
2788
2789  return 0;
2790}
2791
2792/* Increment a bignum in-place, return the carry flag.  */
2793integerPart
2794APInt::tcIncrement(integerPart *dst, unsigned int parts)
2795{
2796  unsigned int i;
2797
2798  for(i = 0; i < parts; i++)
2799    if (++dst[i] != 0)
2800      break;
2801
2802  return i == parts;
2803}
2804
2805/* Set the least significant BITS bits of a bignum, clear the
2806   rest.  */
2807void
2808APInt::tcSetLeastSignificantBits(integerPart *dst, unsigned int parts,
2809                                 unsigned int bits)
2810{
2811  unsigned int i;
2812
2813  i = 0;
2814  while (bits > integerPartWidth) {
2815    dst[i++] = ~(integerPart) 0;
2816    bits -= integerPartWidth;
2817  }
2818
2819  if (bits)
2820    dst[i++] = ~(integerPart) 0 >> (integerPartWidth - bits);
2821
2822  while (i < parts)
2823    dst[i++] = 0;
2824}
2825