APInt.cpp revision c25e7581b9b8088910da31702d4ca21c4734c6d7
1//===-- APInt.cpp - Implement APInt class ---------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements a class to represent arbitrary precision integer
11// constant values and provide a variety of arithmetic operations on them.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "apint"
16#include "llvm/ADT/APInt.h"
17#include "llvm/ADT/FoldingSet.h"
18#include "llvm/ADT/SmallString.h"
19#include "llvm/Support/Debug.h"
20#include "llvm/Support/ErrorHandling.h"
21#include "llvm/Support/MathExtras.h"
22#include "llvm/Support/raw_ostream.h"
23#include <cmath>
24#include <limits>
25#include <cstring>
26#include <cstdlib>
27using namespace llvm;
28
29/// A utility function for allocating memory, checking for allocation failures,
30/// and ensuring the contents are zeroed.
31inline static uint64_t* getClearedMemory(unsigned numWords) {
32  uint64_t * result = new uint64_t[numWords];
33  assert(result && "APInt memory allocation fails!");
34  memset(result, 0, numWords * sizeof(uint64_t));
35  return result;
36}
37
38/// A utility function for allocating memory and checking for allocation
39/// failure.  The content is not zeroed.
40inline static uint64_t* getMemory(unsigned numWords) {
41  uint64_t * result = new uint64_t[numWords];
42  assert(result && "APInt memory allocation fails!");
43  return result;
44}
45
46void APInt::initSlowCase(unsigned numBits, uint64_t val, bool isSigned) {
47  pVal = getClearedMemory(getNumWords());
48  pVal[0] = val;
49  if (isSigned && int64_t(val) < 0)
50    for (unsigned i = 1; i < getNumWords(); ++i)
51      pVal[i] = -1ULL;
52}
53
54void APInt::initSlowCase(const APInt& that) {
55  pVal = getMemory(getNumWords());
56  memcpy(pVal, that.pVal, getNumWords() * APINT_WORD_SIZE);
57}
58
59
60APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
61  : BitWidth(numBits), VAL(0) {
62  assert(BitWidth && "bitwidth too small");
63  assert(bigVal && "Null pointer detected!");
64  if (isSingleWord())
65    VAL = bigVal[0];
66  else {
67    // Get memory, cleared to 0
68    pVal = getClearedMemory(getNumWords());
69    // Calculate the number of words to copy
70    unsigned words = std::min<unsigned>(numWords, getNumWords());
71    // Copy the words from bigVal to pVal
72    memcpy(pVal, bigVal, words * APINT_WORD_SIZE);
73  }
74  // Make sure unused high bits are cleared
75  clearUnusedBits();
76}
77
78APInt::APInt(unsigned numbits, const char StrStart[], unsigned slen,
79             uint8_t radix)
80  : BitWidth(numbits), VAL(0) {
81  assert(BitWidth && "bitwidth too small");
82  fromString(numbits, StrStart, slen, radix);
83}
84
85APInt& APInt::AssignSlowCase(const APInt& RHS) {
86  // Don't do anything for X = X
87  if (this == &RHS)
88    return *this;
89
90  if (BitWidth == RHS.getBitWidth()) {
91    // assume same bit-width single-word case is already handled
92    assert(!isSingleWord());
93    memcpy(pVal, RHS.pVal, getNumWords() * APINT_WORD_SIZE);
94    return *this;
95  }
96
97  if (isSingleWord()) {
98    // assume case where both are single words is already handled
99    assert(!RHS.isSingleWord());
100    VAL = 0;
101    pVal = getMemory(RHS.getNumWords());
102    memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
103  } else if (getNumWords() == RHS.getNumWords())
104    memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
105  else if (RHS.isSingleWord()) {
106    delete [] pVal;
107    VAL = RHS.VAL;
108  } else {
109    delete [] pVal;
110    pVal = getMemory(RHS.getNumWords());
111    memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
112  }
113  BitWidth = RHS.BitWidth;
114  return clearUnusedBits();
115}
116
117APInt& APInt::operator=(uint64_t RHS) {
118  if (isSingleWord())
119    VAL = RHS;
120  else {
121    pVal[0] = RHS;
122    memset(pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
123  }
124  return clearUnusedBits();
125}
126
127/// Profile - This method 'profiles' an APInt for use with FoldingSet.
128void APInt::Profile(FoldingSetNodeID& ID) const {
129  ID.AddInteger(BitWidth);
130
131  if (isSingleWord()) {
132    ID.AddInteger(VAL);
133    return;
134  }
135
136  unsigned NumWords = getNumWords();
137  for (unsigned i = 0; i < NumWords; ++i)
138    ID.AddInteger(pVal[i]);
139}
140
141/// add_1 - This function adds a single "digit" integer, y, to the multiple
142/// "digit" integer array,  x[]. x[] is modified to reflect the addition and
143/// 1 is returned if there is a carry out, otherwise 0 is returned.
144/// @returns the carry of the addition.
145static bool add_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
146  for (unsigned i = 0; i < len; ++i) {
147    dest[i] = y + x[i];
148    if (dest[i] < y)
149      y = 1; // Carry one to next digit.
150    else {
151      y = 0; // No need to carry so exit early
152      break;
153    }
154  }
155  return y;
156}
157
158/// @brief Prefix increment operator. Increments the APInt by one.
159APInt& APInt::operator++() {
160  if (isSingleWord())
161    ++VAL;
162  else
163    add_1(pVal, pVal, getNumWords(), 1);
164  return clearUnusedBits();
165}
166
167/// sub_1 - This function subtracts a single "digit" (64-bit word), y, from
168/// the multi-digit integer array, x[], propagating the borrowed 1 value until
169/// no further borrowing is neeeded or it runs out of "digits" in x.  The result
170/// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted.
171/// In other words, if y > x then this function returns 1, otherwise 0.
172/// @returns the borrow out of the subtraction
173static bool sub_1(uint64_t x[], unsigned len, uint64_t y) {
174  for (unsigned i = 0; i < len; ++i) {
175    uint64_t X = x[i];
176    x[i] -= y;
177    if (y > X)
178      y = 1;  // We have to "borrow 1" from next "digit"
179    else {
180      y = 0;  // No need to borrow
181      break;  // Remaining digits are unchanged so exit early
182    }
183  }
184  return bool(y);
185}
186
187/// @brief Prefix decrement operator. Decrements the APInt by one.
188APInt& APInt::operator--() {
189  if (isSingleWord())
190    --VAL;
191  else
192    sub_1(pVal, getNumWords(), 1);
193  return clearUnusedBits();
194}
195
196/// add - This function adds the integer array x to the integer array Y and
197/// places the result in dest.
198/// @returns the carry out from the addition
199/// @brief General addition of 64-bit integer arrays
200static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y,
201                unsigned len) {
202  bool carry = false;
203  for (unsigned i = 0; i< len; ++i) {
204    uint64_t limit = std::min(x[i],y[i]); // must come first in case dest == x
205    dest[i] = x[i] + y[i] + carry;
206    carry = dest[i] < limit || (carry && dest[i] == limit);
207  }
208  return carry;
209}
210
211/// Adds the RHS APint to this APInt.
212/// @returns this, after addition of RHS.
213/// @brief Addition assignment operator.
214APInt& APInt::operator+=(const APInt& RHS) {
215  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
216  if (isSingleWord())
217    VAL += RHS.VAL;
218  else {
219    add(pVal, pVal, RHS.pVal, getNumWords());
220  }
221  return clearUnusedBits();
222}
223
224/// Subtracts the integer array y from the integer array x
225/// @returns returns the borrow out.
226/// @brief Generalized subtraction of 64-bit integer arrays.
227static bool sub(uint64_t *dest, const uint64_t *x, const uint64_t *y,
228                unsigned len) {
229  bool borrow = false;
230  for (unsigned i = 0; i < len; ++i) {
231    uint64_t x_tmp = borrow ? x[i] - 1 : x[i];
232    borrow = y[i] > x_tmp || (borrow && x[i] == 0);
233    dest[i] = x_tmp - y[i];
234  }
235  return borrow;
236}
237
238/// Subtracts the RHS APInt from this APInt
239/// @returns this, after subtraction
240/// @brief Subtraction assignment operator.
241APInt& APInt::operator-=(const APInt& RHS) {
242  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
243  if (isSingleWord())
244    VAL -= RHS.VAL;
245  else
246    sub(pVal, pVal, RHS.pVal, getNumWords());
247  return clearUnusedBits();
248}
249
250/// Multiplies an integer array, x by a a uint64_t integer and places the result
251/// into dest.
252/// @returns the carry out of the multiplication.
253/// @brief Multiply a multi-digit APInt by a single digit (64-bit) integer.
254static uint64_t mul_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
255  // Split y into high 32-bit part (hy)  and low 32-bit part (ly)
256  uint64_t ly = y & 0xffffffffULL, hy = y >> 32;
257  uint64_t carry = 0;
258
259  // For each digit of x.
260  for (unsigned i = 0; i < len; ++i) {
261    // Split x into high and low words
262    uint64_t lx = x[i] & 0xffffffffULL;
263    uint64_t hx = x[i] >> 32;
264    // hasCarry - A flag to indicate if there is a carry to the next digit.
265    // hasCarry == 0, no carry
266    // hasCarry == 1, has carry
267    // hasCarry == 2, no carry and the calculation result == 0.
268    uint8_t hasCarry = 0;
269    dest[i] = carry + lx * ly;
270    // Determine if the add above introduces carry.
271    hasCarry = (dest[i] < carry) ? 1 : 0;
272    carry = hx * ly + (dest[i] >> 32) + (hasCarry ? (1ULL << 32) : 0);
273    // The upper limit of carry can be (2^32 - 1)(2^32 - 1) +
274    // (2^32 - 1) + 2^32 = 2^64.
275    hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
276
277    carry += (lx * hy) & 0xffffffffULL;
278    dest[i] = (carry << 32) | (dest[i] & 0xffffffffULL);
279    carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0) +
280            (carry >> 32) + ((lx * hy) >> 32) + hx * hy;
281  }
282  return carry;
283}
284
285/// Multiplies integer array x by integer array y and stores the result into
286/// the integer array dest. Note that dest's size must be >= xlen + ylen.
287/// @brief Generalized multiplicate of integer arrays.
288static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[],
289                unsigned ylen) {
290  dest[xlen] = mul_1(dest, x, xlen, y[0]);
291  for (unsigned i = 1; i < ylen; ++i) {
292    uint64_t ly = y[i] & 0xffffffffULL, hy = y[i] >> 32;
293    uint64_t carry = 0, lx = 0, hx = 0;
294    for (unsigned j = 0; j < xlen; ++j) {
295      lx = x[j] & 0xffffffffULL;
296      hx = x[j] >> 32;
297      // hasCarry - A flag to indicate if has carry.
298      // hasCarry == 0, no carry
299      // hasCarry == 1, has carry
300      // hasCarry == 2, no carry and the calculation result == 0.
301      uint8_t hasCarry = 0;
302      uint64_t resul = carry + lx * ly;
303      hasCarry = (resul < carry) ? 1 : 0;
304      carry = (hasCarry ? (1ULL << 32) : 0) + hx * ly + (resul >> 32);
305      hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
306
307      carry += (lx * hy) & 0xffffffffULL;
308      resul = (carry << 32) | (resul & 0xffffffffULL);
309      dest[i+j] += resul;
310      carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0)+
311              (carry >> 32) + (dest[i+j] < resul ? 1 : 0) +
312              ((lx * hy) >> 32) + hx * hy;
313    }
314    dest[i+xlen] = carry;
315  }
316}
317
318APInt& APInt::operator*=(const APInt& RHS) {
319  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
320  if (isSingleWord()) {
321    VAL *= RHS.VAL;
322    clearUnusedBits();
323    return *this;
324  }
325
326  // Get some bit facts about LHS and check for zero
327  unsigned lhsBits = getActiveBits();
328  unsigned lhsWords = !lhsBits ? 0 : whichWord(lhsBits - 1) + 1;
329  if (!lhsWords)
330    // 0 * X ===> 0
331    return *this;
332
333  // Get some bit facts about RHS and check for zero
334  unsigned rhsBits = RHS.getActiveBits();
335  unsigned rhsWords = !rhsBits ? 0 : whichWord(rhsBits - 1) + 1;
336  if (!rhsWords) {
337    // X * 0 ===> 0
338    clear();
339    return *this;
340  }
341
342  // Allocate space for the result
343  unsigned destWords = rhsWords + lhsWords;
344  uint64_t *dest = getMemory(destWords);
345
346  // Perform the long multiply
347  mul(dest, pVal, lhsWords, RHS.pVal, rhsWords);
348
349  // Copy result back into *this
350  clear();
351  unsigned wordsToCopy = destWords >= getNumWords() ? getNumWords() : destWords;
352  memcpy(pVal, dest, wordsToCopy * APINT_WORD_SIZE);
353
354  // delete dest array and return
355  delete[] dest;
356  return *this;
357}
358
359APInt& APInt::operator&=(const APInt& RHS) {
360  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
361  if (isSingleWord()) {
362    VAL &= RHS.VAL;
363    return *this;
364  }
365  unsigned numWords = getNumWords();
366  for (unsigned i = 0; i < numWords; ++i)
367    pVal[i] &= RHS.pVal[i];
368  return *this;
369}
370
371APInt& APInt::operator|=(const APInt& RHS) {
372  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
373  if (isSingleWord()) {
374    VAL |= RHS.VAL;
375    return *this;
376  }
377  unsigned numWords = getNumWords();
378  for (unsigned i = 0; i < numWords; ++i)
379    pVal[i] |= RHS.pVal[i];
380  return *this;
381}
382
383APInt& APInt::operator^=(const APInt& RHS) {
384  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
385  if (isSingleWord()) {
386    VAL ^= RHS.VAL;
387    this->clearUnusedBits();
388    return *this;
389  }
390  unsigned numWords = getNumWords();
391  for (unsigned i = 0; i < numWords; ++i)
392    pVal[i] ^= RHS.pVal[i];
393  return clearUnusedBits();
394}
395
396APInt APInt::AndSlowCase(const APInt& RHS) const {
397  unsigned numWords = getNumWords();
398  uint64_t* val = getMemory(numWords);
399  for (unsigned i = 0; i < numWords; ++i)
400    val[i] = pVal[i] & RHS.pVal[i];
401  return APInt(val, getBitWidth());
402}
403
404APInt APInt::OrSlowCase(const APInt& RHS) const {
405  unsigned numWords = getNumWords();
406  uint64_t *val = getMemory(numWords);
407  for (unsigned i = 0; i < numWords; ++i)
408    val[i] = pVal[i] | RHS.pVal[i];
409  return APInt(val, getBitWidth());
410}
411
412APInt APInt::XorSlowCase(const APInt& RHS) const {
413  unsigned numWords = getNumWords();
414  uint64_t *val = getMemory(numWords);
415  for (unsigned i = 0; i < numWords; ++i)
416    val[i] = pVal[i] ^ RHS.pVal[i];
417
418  // 0^0==1 so clear the high bits in case they got set.
419  return APInt(val, getBitWidth()).clearUnusedBits();
420}
421
422bool APInt::operator !() const {
423  if (isSingleWord())
424    return !VAL;
425
426  for (unsigned i = 0; i < getNumWords(); ++i)
427    if (pVal[i])
428      return false;
429  return true;
430}
431
432APInt APInt::operator*(const APInt& RHS) const {
433  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
434  if (isSingleWord())
435    return APInt(BitWidth, VAL * RHS.VAL);
436  APInt Result(*this);
437  Result *= RHS;
438  return Result.clearUnusedBits();
439}
440
441APInt APInt::operator+(const APInt& RHS) const {
442  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
443  if (isSingleWord())
444    return APInt(BitWidth, VAL + RHS.VAL);
445  APInt Result(BitWidth, 0);
446  add(Result.pVal, this->pVal, RHS.pVal, getNumWords());
447  return Result.clearUnusedBits();
448}
449
450APInt APInt::operator-(const APInt& RHS) const {
451  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
452  if (isSingleWord())
453    return APInt(BitWidth, VAL - RHS.VAL);
454  APInt Result(BitWidth, 0);
455  sub(Result.pVal, this->pVal, RHS.pVal, getNumWords());
456  return Result.clearUnusedBits();
457}
458
459bool APInt::operator[](unsigned bitPosition) const {
460  return (maskBit(bitPosition) &
461          (isSingleWord() ?  VAL : pVal[whichWord(bitPosition)])) != 0;
462}
463
464bool APInt::EqualSlowCase(const APInt& RHS) const {
465  // Get some facts about the number of bits used in the two operands.
466  unsigned n1 = getActiveBits();
467  unsigned n2 = RHS.getActiveBits();
468
469  // If the number of bits isn't the same, they aren't equal
470  if (n1 != n2)
471    return false;
472
473  // If the number of bits fits in a word, we only need to compare the low word.
474  if (n1 <= APINT_BITS_PER_WORD)
475    return pVal[0] == RHS.pVal[0];
476
477  // Otherwise, compare everything
478  for (int i = whichWord(n1 - 1); i >= 0; --i)
479    if (pVal[i] != RHS.pVal[i])
480      return false;
481  return true;
482}
483
484bool APInt::EqualSlowCase(uint64_t Val) const {
485  unsigned n = getActiveBits();
486  if (n <= APINT_BITS_PER_WORD)
487    return pVal[0] == Val;
488  else
489    return false;
490}
491
492bool APInt::ult(const APInt& RHS) const {
493  assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
494  if (isSingleWord())
495    return VAL < RHS.VAL;
496
497  // Get active bit length of both operands
498  unsigned n1 = getActiveBits();
499  unsigned n2 = RHS.getActiveBits();
500
501  // If magnitude of LHS is less than RHS, return true.
502  if (n1 < n2)
503    return true;
504
505  // If magnitude of RHS is greather than LHS, return false.
506  if (n2 < n1)
507    return false;
508
509  // If they bot fit in a word, just compare the low order word
510  if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD)
511    return pVal[0] < RHS.pVal[0];
512
513  // Otherwise, compare all words
514  unsigned topWord = whichWord(std::max(n1,n2)-1);
515  for (int i = topWord; i >= 0; --i) {
516    if (pVal[i] > RHS.pVal[i])
517      return false;
518    if (pVal[i] < RHS.pVal[i])
519      return true;
520  }
521  return false;
522}
523
524bool APInt::slt(const APInt& RHS) const {
525  assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
526  if (isSingleWord()) {
527    int64_t lhsSext = (int64_t(VAL) << (64-BitWidth)) >> (64-BitWidth);
528    int64_t rhsSext = (int64_t(RHS.VAL) << (64-BitWidth)) >> (64-BitWidth);
529    return lhsSext < rhsSext;
530  }
531
532  APInt lhs(*this);
533  APInt rhs(RHS);
534  bool lhsNeg = isNegative();
535  bool rhsNeg = rhs.isNegative();
536  if (lhsNeg) {
537    // Sign bit is set so perform two's complement to make it positive
538    lhs.flip();
539    lhs++;
540  }
541  if (rhsNeg) {
542    // Sign bit is set so perform two's complement to make it positive
543    rhs.flip();
544    rhs++;
545  }
546
547  // Now we have unsigned values to compare so do the comparison if necessary
548  // based on the negativeness of the values.
549  if (lhsNeg)
550    if (rhsNeg)
551      return lhs.ugt(rhs);
552    else
553      return true;
554  else if (rhsNeg)
555    return false;
556  else
557    return lhs.ult(rhs);
558}
559
560APInt& APInt::set(unsigned bitPosition) {
561  if (isSingleWord())
562    VAL |= maskBit(bitPosition);
563  else
564    pVal[whichWord(bitPosition)] |= maskBit(bitPosition);
565  return *this;
566}
567
568/// Set the given bit to 0 whose position is given as "bitPosition".
569/// @brief Set a given bit to 0.
570APInt& APInt::clear(unsigned bitPosition) {
571  if (isSingleWord())
572    VAL &= ~maskBit(bitPosition);
573  else
574    pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition);
575  return *this;
576}
577
578/// @brief Toggle every bit to its opposite value.
579
580/// Toggle a given bit to its opposite value whose position is given
581/// as "bitPosition".
582/// @brief Toggles a given bit to its opposite value.
583APInt& APInt::flip(unsigned bitPosition) {
584  assert(bitPosition < BitWidth && "Out of the bit-width range!");
585  if ((*this)[bitPosition]) clear(bitPosition);
586  else set(bitPosition);
587  return *this;
588}
589
590unsigned APInt::getBitsNeeded(const char* str, unsigned slen, uint8_t radix) {
591  assert(str != 0 && "Invalid value string");
592  assert(slen > 0 && "Invalid string length");
593
594  // Each computation below needs to know if its negative
595  unsigned isNegative = str[0] == '-';
596  if (isNegative) {
597    slen--;
598    str++;
599  }
600  // For radixes of power-of-two values, the bits required is accurately and
601  // easily computed
602  if (radix == 2)
603    return slen + isNegative;
604  if (radix == 8)
605    return slen * 3 + isNegative;
606  if (radix == 16)
607    return slen * 4 + isNegative;
608
609  // Otherwise it must be radix == 10, the hard case
610  assert(radix == 10 && "Invalid radix");
611
612  // This is grossly inefficient but accurate. We could probably do something
613  // with a computation of roughly slen*64/20 and then adjust by the value of
614  // the first few digits. But, I'm not sure how accurate that could be.
615
616  // Compute a sufficient number of bits that is always large enough but might
617  // be too large. This avoids the assertion in the constructor.
618  unsigned sufficient = slen*64/18;
619
620  // Convert to the actual binary value.
621  APInt tmp(sufficient, str, slen, radix);
622
623  // Compute how many bits are required.
624  return isNegative + tmp.logBase2() + 1;
625}
626
627// From http://www.burtleburtle.net, byBob Jenkins.
628// When targeting x86, both GCC and LLVM seem to recognize this as a
629// rotate instruction.
630#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
631
632// From http://www.burtleburtle.net, by Bob Jenkins.
633#define mix(a,b,c) \
634  { \
635    a -= c;  a ^= rot(c, 4);  c += b; \
636    b -= a;  b ^= rot(a, 6);  a += c; \
637    c -= b;  c ^= rot(b, 8);  b += a; \
638    a -= c;  a ^= rot(c,16);  c += b; \
639    b -= a;  b ^= rot(a,19);  a += c; \
640    c -= b;  c ^= rot(b, 4);  b += a; \
641  }
642
643// From http://www.burtleburtle.net, by Bob Jenkins.
644#define final(a,b,c) \
645  { \
646    c ^= b; c -= rot(b,14); \
647    a ^= c; a -= rot(c,11); \
648    b ^= a; b -= rot(a,25); \
649    c ^= b; c -= rot(b,16); \
650    a ^= c; a -= rot(c,4);  \
651    b ^= a; b -= rot(a,14); \
652    c ^= b; c -= rot(b,24); \
653  }
654
655// hashword() was adapted from http://www.burtleburtle.net, by Bob
656// Jenkins.  k is a pointer to an array of uint32_t values; length is
657// the length of the key, in 32-bit chunks.  This version only handles
658// keys that are a multiple of 32 bits in size.
659static inline uint32_t hashword(const uint64_t *k64, size_t length)
660{
661  const uint32_t *k = reinterpret_cast<const uint32_t *>(k64);
662  uint32_t a,b,c;
663
664  /* Set up the internal state */
665  a = b = c = 0xdeadbeef + (((uint32_t)length)<<2);
666
667  /*------------------------------------------------- handle most of the key */
668  while (length > 3)
669    {
670      a += k[0];
671      b += k[1];
672      c += k[2];
673      mix(a,b,c);
674      length -= 3;
675      k += 3;
676    }
677
678  /*------------------------------------------- handle the last 3 uint32_t's */
679  switch (length) {                  /* all the case statements fall through */
680  case 3 : c+=k[2];
681  case 2 : b+=k[1];
682  case 1 : a+=k[0];
683    final(a,b,c);
684    case 0:     /* case 0: nothing left to add */
685      break;
686    }
687  /*------------------------------------------------------ report the result */
688  return c;
689}
690
691// hashword8() was adapted from http://www.burtleburtle.net, by Bob
692// Jenkins.  This computes a 32-bit hash from one 64-bit word.  When
693// targeting x86 (32 or 64 bit), both LLVM and GCC compile this
694// function into about 35 instructions when inlined.
695static inline uint32_t hashword8(const uint64_t k64)
696{
697  uint32_t a,b,c;
698  a = b = c = 0xdeadbeef + 4;
699  b += k64 >> 32;
700  a += k64 & 0xffffffff;
701  final(a,b,c);
702  return c;
703}
704#undef final
705#undef mix
706#undef rot
707
708uint64_t APInt::getHashValue() const {
709  uint64_t hash;
710  if (isSingleWord())
711    hash = hashword8(VAL);
712  else
713    hash = hashword(pVal, getNumWords()*2);
714  return hash;
715}
716
717/// HiBits - This function returns the high "numBits" bits of this APInt.
718APInt APInt::getHiBits(unsigned numBits) const {
719  return APIntOps::lshr(*this, BitWidth - numBits);
720}
721
722/// LoBits - This function returns the low "numBits" bits of this APInt.
723APInt APInt::getLoBits(unsigned numBits) const {
724  return APIntOps::lshr(APIntOps::shl(*this, BitWidth - numBits),
725                        BitWidth - numBits);
726}
727
728bool APInt::isPowerOf2() const {
729  return (!!*this) && !(*this & (*this - APInt(BitWidth,1)));
730}
731
732unsigned APInt::countLeadingZerosSlowCase() const {
733  unsigned Count = 0;
734  for (unsigned i = getNumWords(); i > 0u; --i) {
735    if (pVal[i-1] == 0)
736      Count += APINT_BITS_PER_WORD;
737    else {
738      Count += CountLeadingZeros_64(pVal[i-1]);
739      break;
740    }
741  }
742  unsigned remainder = BitWidth % APINT_BITS_PER_WORD;
743  if (remainder)
744    Count -= APINT_BITS_PER_WORD - remainder;
745  return std::min(Count, BitWidth);
746}
747
748static unsigned countLeadingOnes_64(uint64_t V, unsigned skip) {
749  unsigned Count = 0;
750  if (skip)
751    V <<= skip;
752  while (V && (V & (1ULL << 63))) {
753    Count++;
754    V <<= 1;
755  }
756  return Count;
757}
758
759unsigned APInt::countLeadingOnes() const {
760  if (isSingleWord())
761    return countLeadingOnes_64(VAL, APINT_BITS_PER_WORD - BitWidth);
762
763  unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
764  unsigned shift;
765  if (!highWordBits) {
766    highWordBits = APINT_BITS_PER_WORD;
767    shift = 0;
768  } else {
769    shift = APINT_BITS_PER_WORD - highWordBits;
770  }
771  int i = getNumWords() - 1;
772  unsigned Count = countLeadingOnes_64(pVal[i], shift);
773  if (Count == highWordBits) {
774    for (i--; i >= 0; --i) {
775      if (pVal[i] == -1ULL)
776        Count += APINT_BITS_PER_WORD;
777      else {
778        Count += countLeadingOnes_64(pVal[i], 0);
779        break;
780      }
781    }
782  }
783  return Count;
784}
785
786unsigned APInt::countTrailingZeros() const {
787  if (isSingleWord())
788    return std::min(unsigned(CountTrailingZeros_64(VAL)), BitWidth);
789  unsigned Count = 0;
790  unsigned i = 0;
791  for (; i < getNumWords() && pVal[i] == 0; ++i)
792    Count += APINT_BITS_PER_WORD;
793  if (i < getNumWords())
794    Count += CountTrailingZeros_64(pVal[i]);
795  return std::min(Count, BitWidth);
796}
797
798unsigned APInt::countTrailingOnesSlowCase() const {
799  unsigned Count = 0;
800  unsigned i = 0;
801  for (; i < getNumWords() && pVal[i] == -1ULL; ++i)
802    Count += APINT_BITS_PER_WORD;
803  if (i < getNumWords())
804    Count += CountTrailingOnes_64(pVal[i]);
805  return std::min(Count, BitWidth);
806}
807
808unsigned APInt::countPopulationSlowCase() const {
809  unsigned Count = 0;
810  for (unsigned i = 0; i < getNumWords(); ++i)
811    Count += CountPopulation_64(pVal[i]);
812  return Count;
813}
814
815APInt APInt::byteSwap() const {
816  assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
817  if (BitWidth == 16)
818    return APInt(BitWidth, ByteSwap_16(uint16_t(VAL)));
819  else if (BitWidth == 32)
820    return APInt(BitWidth, ByteSwap_32(unsigned(VAL)));
821  else if (BitWidth == 48) {
822    unsigned Tmp1 = unsigned(VAL >> 16);
823    Tmp1 = ByteSwap_32(Tmp1);
824    uint16_t Tmp2 = uint16_t(VAL);
825    Tmp2 = ByteSwap_16(Tmp2);
826    return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
827  } else if (BitWidth == 64)
828    return APInt(BitWidth, ByteSwap_64(VAL));
829  else {
830    APInt Result(BitWidth, 0);
831    char *pByte = (char*)Result.pVal;
832    for (unsigned i = 0; i < BitWidth / APINT_WORD_SIZE / 2; ++i) {
833      char Tmp = pByte[i];
834      pByte[i] = pByte[BitWidth / APINT_WORD_SIZE - 1 - i];
835      pByte[BitWidth / APINT_WORD_SIZE - i - 1] = Tmp;
836    }
837    return Result;
838  }
839}
840
841APInt llvm::APIntOps::GreatestCommonDivisor(const APInt& API1,
842                                            const APInt& API2) {
843  APInt A = API1, B = API2;
844  while (!!B) {
845    APInt T = B;
846    B = APIntOps::urem(A, B);
847    A = T;
848  }
849  return A;
850}
851
852APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
853  union {
854    double D;
855    uint64_t I;
856  } T;
857  T.D = Double;
858
859  // Get the sign bit from the highest order bit
860  bool isNeg = T.I >> 63;
861
862  // Get the 11-bit exponent and adjust for the 1023 bit bias
863  int64_t exp = ((T.I >> 52) & 0x7ff) - 1023;
864
865  // If the exponent is negative, the value is < 0 so just return 0.
866  if (exp < 0)
867    return APInt(width, 0u);
868
869  // Extract the mantissa by clearing the top 12 bits (sign + exponent).
870  uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52;
871
872  // If the exponent doesn't shift all bits out of the mantissa
873  if (exp < 52)
874    return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
875                    APInt(width, mantissa >> (52 - exp));
876
877  // If the client didn't provide enough bits for us to shift the mantissa into
878  // then the result is undefined, just return 0
879  if (width <= exp - 52)
880    return APInt(width, 0);
881
882  // Otherwise, we have to shift the mantissa bits up to the right location
883  APInt Tmp(width, mantissa);
884  Tmp = Tmp.shl((unsigned)exp - 52);
885  return isNeg ? -Tmp : Tmp;
886}
887
888/// RoundToDouble - This function convert this APInt to a double.
889/// The layout for double is as following (IEEE Standard 754):
890///  --------------------------------------
891/// |  Sign    Exponent    Fraction    Bias |
892/// |-------------------------------------- |
893/// |  1[63]   11[62-52]   52[51-00]   1023 |
894///  --------------------------------------
895double APInt::roundToDouble(bool isSigned) const {
896
897  // Handle the simple case where the value is contained in one uint64_t.
898  if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
899    if (isSigned) {
900      int64_t sext = (int64_t(VAL) << (64-BitWidth)) >> (64-BitWidth);
901      return double(sext);
902    } else
903      return double(VAL);
904  }
905
906  // Determine if the value is negative.
907  bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
908
909  // Construct the absolute value if we're negative.
910  APInt Tmp(isNeg ? -(*this) : (*this));
911
912  // Figure out how many bits we're using.
913  unsigned n = Tmp.getActiveBits();
914
915  // The exponent (without bias normalization) is just the number of bits
916  // we are using. Note that the sign bit is gone since we constructed the
917  // absolute value.
918  uint64_t exp = n;
919
920  // Return infinity for exponent overflow
921  if (exp > 1023) {
922    if (!isSigned || !isNeg)
923      return std::numeric_limits<double>::infinity();
924    else
925      return -std::numeric_limits<double>::infinity();
926  }
927  exp += 1023; // Increment for 1023 bias
928
929  // Number of bits in mantissa is 52. To obtain the mantissa value, we must
930  // extract the high 52 bits from the correct words in pVal.
931  uint64_t mantissa;
932  unsigned hiWord = whichWord(n-1);
933  if (hiWord == 0) {
934    mantissa = Tmp.pVal[0];
935    if (n > 52)
936      mantissa >>= n - 52; // shift down, we want the top 52 bits.
937  } else {
938    assert(hiWord > 0 && "huh?");
939    uint64_t hibits = Tmp.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
940    uint64_t lobits = Tmp.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
941    mantissa = hibits | lobits;
942  }
943
944  // The leading bit of mantissa is implicit, so get rid of it.
945  uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
946  union {
947    double D;
948    uint64_t I;
949  } T;
950  T.I = sign | (exp << 52) | mantissa;
951  return T.D;
952}
953
954// Truncate to new width.
955APInt &APInt::trunc(unsigned width) {
956  assert(width < BitWidth && "Invalid APInt Truncate request");
957  assert(width && "Can't truncate to 0 bits");
958  unsigned wordsBefore = getNumWords();
959  BitWidth = width;
960  unsigned wordsAfter = getNumWords();
961  if (wordsBefore != wordsAfter) {
962    if (wordsAfter == 1) {
963      uint64_t *tmp = pVal;
964      VAL = pVal[0];
965      delete [] tmp;
966    } else {
967      uint64_t *newVal = getClearedMemory(wordsAfter);
968      for (unsigned i = 0; i < wordsAfter; ++i)
969        newVal[i] = pVal[i];
970      delete [] pVal;
971      pVal = newVal;
972    }
973  }
974  return clearUnusedBits();
975}
976
977// Sign extend to a new width.
978APInt &APInt::sext(unsigned width) {
979  assert(width > BitWidth && "Invalid APInt SignExtend request");
980  // If the sign bit isn't set, this is the same as zext.
981  if (!isNegative()) {
982    zext(width);
983    return *this;
984  }
985
986  // The sign bit is set. First, get some facts
987  unsigned wordsBefore = getNumWords();
988  unsigned wordBits = BitWidth % APINT_BITS_PER_WORD;
989  BitWidth = width;
990  unsigned wordsAfter = getNumWords();
991
992  // Mask the high order word appropriately
993  if (wordsBefore == wordsAfter) {
994    unsigned newWordBits = width % APINT_BITS_PER_WORD;
995    // The extension is contained to the wordsBefore-1th word.
996    uint64_t mask = ~0ULL;
997    if (newWordBits)
998      mask >>= APINT_BITS_PER_WORD - newWordBits;
999    mask <<= wordBits;
1000    if (wordsBefore == 1)
1001      VAL |= mask;
1002    else
1003      pVal[wordsBefore-1] |= mask;
1004    return clearUnusedBits();
1005  }
1006
1007  uint64_t mask = wordBits == 0 ? 0 : ~0ULL << wordBits;
1008  uint64_t *newVal = getMemory(wordsAfter);
1009  if (wordsBefore == 1)
1010    newVal[0] = VAL | mask;
1011  else {
1012    for (unsigned i = 0; i < wordsBefore; ++i)
1013      newVal[i] = pVal[i];
1014    newVal[wordsBefore-1] |= mask;
1015  }
1016  for (unsigned i = wordsBefore; i < wordsAfter; i++)
1017    newVal[i] = -1ULL;
1018  if (wordsBefore != 1)
1019    delete [] pVal;
1020  pVal = newVal;
1021  return clearUnusedBits();
1022}
1023
1024//  Zero extend to a new width.
1025APInt &APInt::zext(unsigned width) {
1026  assert(width > BitWidth && "Invalid APInt ZeroExtend request");
1027  unsigned wordsBefore = getNumWords();
1028  BitWidth = width;
1029  unsigned wordsAfter = getNumWords();
1030  if (wordsBefore != wordsAfter) {
1031    uint64_t *newVal = getClearedMemory(wordsAfter);
1032    if (wordsBefore == 1)
1033      newVal[0] = VAL;
1034    else
1035      for (unsigned i = 0; i < wordsBefore; ++i)
1036        newVal[i] = pVal[i];
1037    if (wordsBefore != 1)
1038      delete [] pVal;
1039    pVal = newVal;
1040  }
1041  return *this;
1042}
1043
1044APInt &APInt::zextOrTrunc(unsigned width) {
1045  if (BitWidth < width)
1046    return zext(width);
1047  if (BitWidth > width)
1048    return trunc(width);
1049  return *this;
1050}
1051
1052APInt &APInt::sextOrTrunc(unsigned width) {
1053  if (BitWidth < width)
1054    return sext(width);
1055  if (BitWidth > width)
1056    return trunc(width);
1057  return *this;
1058}
1059
1060/// Arithmetic right-shift this APInt by shiftAmt.
1061/// @brief Arithmetic right-shift function.
1062APInt APInt::ashr(const APInt &shiftAmt) const {
1063  return ashr((unsigned)shiftAmt.getLimitedValue(BitWidth));
1064}
1065
1066/// Arithmetic right-shift this APInt by shiftAmt.
1067/// @brief Arithmetic right-shift function.
1068APInt APInt::ashr(unsigned shiftAmt) const {
1069  assert(shiftAmt <= BitWidth && "Invalid shift amount");
1070  // Handle a degenerate case
1071  if (shiftAmt == 0)
1072    return *this;
1073
1074  // Handle single word shifts with built-in ashr
1075  if (isSingleWord()) {
1076    if (shiftAmt == BitWidth)
1077      return APInt(BitWidth, 0); // undefined
1078    else {
1079      unsigned SignBit = APINT_BITS_PER_WORD - BitWidth;
1080      return APInt(BitWidth,
1081        (((int64_t(VAL) << SignBit) >> SignBit) >> shiftAmt));
1082    }
1083  }
1084
1085  // If all the bits were shifted out, the result is, technically, undefined.
1086  // We return -1 if it was negative, 0 otherwise. We check this early to avoid
1087  // issues in the algorithm below.
1088  if (shiftAmt == BitWidth) {
1089    if (isNegative())
1090      return APInt(BitWidth, -1ULL, true);
1091    else
1092      return APInt(BitWidth, 0);
1093  }
1094
1095  // Create some space for the result.
1096  uint64_t * val = new uint64_t[getNumWords()];
1097
1098  // Compute some values needed by the following shift algorithms
1099  unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD; // bits to shift per word
1100  unsigned offset = shiftAmt / APINT_BITS_PER_WORD; // word offset for shift
1101  unsigned breakWord = getNumWords() - 1 - offset; // last word affected
1102  unsigned bitsInWord = whichBit(BitWidth); // how many bits in last word?
1103  if (bitsInWord == 0)
1104    bitsInWord = APINT_BITS_PER_WORD;
1105
1106  // If we are shifting whole words, just move whole words
1107  if (wordShift == 0) {
1108    // Move the words containing significant bits
1109    for (unsigned i = 0; i <= breakWord; ++i)
1110      val[i] = pVal[i+offset]; // move whole word
1111
1112    // Adjust the top significant word for sign bit fill, if negative
1113    if (isNegative())
1114      if (bitsInWord < APINT_BITS_PER_WORD)
1115        val[breakWord] |= ~0ULL << bitsInWord; // set high bits
1116  } else {
1117    // Shift the low order words
1118    for (unsigned i = 0; i < breakWord; ++i) {
1119      // This combines the shifted corresponding word with the low bits from
1120      // the next word (shifted into this word's high bits).
1121      val[i] = (pVal[i+offset] >> wordShift) |
1122               (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
1123    }
1124
1125    // Shift the break word. In this case there are no bits from the next word
1126    // to include in this word.
1127    val[breakWord] = pVal[breakWord+offset] >> wordShift;
1128
1129    // Deal with sign extenstion in the break word, and possibly the word before
1130    // it.
1131    if (isNegative()) {
1132      if (wordShift > bitsInWord) {
1133        if (breakWord > 0)
1134          val[breakWord-1] |=
1135            ~0ULL << (APINT_BITS_PER_WORD - (wordShift - bitsInWord));
1136        val[breakWord] |= ~0ULL;
1137      } else
1138        val[breakWord] |= (~0ULL << (bitsInWord - wordShift));
1139    }
1140  }
1141
1142  // Remaining words are 0 or -1, just assign them.
1143  uint64_t fillValue = (isNegative() ? -1ULL : 0);
1144  for (unsigned i = breakWord+1; i < getNumWords(); ++i)
1145    val[i] = fillValue;
1146  return APInt(val, BitWidth).clearUnusedBits();
1147}
1148
1149/// Logical right-shift this APInt by shiftAmt.
1150/// @brief Logical right-shift function.
1151APInt APInt::lshr(const APInt &shiftAmt) const {
1152  return lshr((unsigned)shiftAmt.getLimitedValue(BitWidth));
1153}
1154
1155/// Logical right-shift this APInt by shiftAmt.
1156/// @brief Logical right-shift function.
1157APInt APInt::lshr(unsigned shiftAmt) const {
1158  if (isSingleWord()) {
1159    if (shiftAmt == BitWidth)
1160      return APInt(BitWidth, 0);
1161    else
1162      return APInt(BitWidth, this->VAL >> shiftAmt);
1163  }
1164
1165  // If all the bits were shifted out, the result is 0. This avoids issues
1166  // with shifting by the size of the integer type, which produces undefined
1167  // results. We define these "undefined results" to always be 0.
1168  if (shiftAmt == BitWidth)
1169    return APInt(BitWidth, 0);
1170
1171  // If none of the bits are shifted out, the result is *this. This avoids
1172  // issues with shifting by the size of the integer type, which produces
1173  // undefined results in the code below. This is also an optimization.
1174  if (shiftAmt == 0)
1175    return *this;
1176
1177  // Create some space for the result.
1178  uint64_t * val = new uint64_t[getNumWords()];
1179
1180  // If we are shifting less than a word, compute the shift with a simple carry
1181  if (shiftAmt < APINT_BITS_PER_WORD) {
1182    uint64_t carry = 0;
1183    for (int i = getNumWords()-1; i >= 0; --i) {
1184      val[i] = (pVal[i] >> shiftAmt) | carry;
1185      carry = pVal[i] << (APINT_BITS_PER_WORD - shiftAmt);
1186    }
1187    return APInt(val, BitWidth).clearUnusedBits();
1188  }
1189
1190  // Compute some values needed by the remaining shift algorithms
1191  unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
1192  unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
1193
1194  // If we are shifting whole words, just move whole words
1195  if (wordShift == 0) {
1196    for (unsigned i = 0; i < getNumWords() - offset; ++i)
1197      val[i] = pVal[i+offset];
1198    for (unsigned i = getNumWords()-offset; i < getNumWords(); i++)
1199      val[i] = 0;
1200    return APInt(val,BitWidth).clearUnusedBits();
1201  }
1202
1203  // Shift the low order words
1204  unsigned breakWord = getNumWords() - offset -1;
1205  for (unsigned i = 0; i < breakWord; ++i)
1206    val[i] = (pVal[i+offset] >> wordShift) |
1207             (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
1208  // Shift the break word.
1209  val[breakWord] = pVal[breakWord+offset] >> wordShift;
1210
1211  // Remaining words are 0
1212  for (unsigned i = breakWord+1; i < getNumWords(); ++i)
1213    val[i] = 0;
1214  return APInt(val, BitWidth).clearUnusedBits();
1215}
1216
1217/// Left-shift this APInt by shiftAmt.
1218/// @brief Left-shift function.
1219APInt APInt::shl(const APInt &shiftAmt) const {
1220  // It's undefined behavior in C to shift by BitWidth or greater.
1221  return shl((unsigned)shiftAmt.getLimitedValue(BitWidth));
1222}
1223
1224APInt APInt::shlSlowCase(unsigned shiftAmt) const {
1225  // If all the bits were shifted out, the result is 0. This avoids issues
1226  // with shifting by the size of the integer type, which produces undefined
1227  // results. We define these "undefined results" to always be 0.
1228  if (shiftAmt == BitWidth)
1229    return APInt(BitWidth, 0);
1230
1231  // If none of the bits are shifted out, the result is *this. This avoids a
1232  // lshr by the words size in the loop below which can produce incorrect
1233  // results. It also avoids the expensive computation below for a common case.
1234  if (shiftAmt == 0)
1235    return *this;
1236
1237  // Create some space for the result.
1238  uint64_t * val = new uint64_t[getNumWords()];
1239
1240  // If we are shifting less than a word, do it the easy way
1241  if (shiftAmt < APINT_BITS_PER_WORD) {
1242    uint64_t carry = 0;
1243    for (unsigned i = 0; i < getNumWords(); i++) {
1244      val[i] = pVal[i] << shiftAmt | carry;
1245      carry = pVal[i] >> (APINT_BITS_PER_WORD - shiftAmt);
1246    }
1247    return APInt(val, BitWidth).clearUnusedBits();
1248  }
1249
1250  // Compute some values needed by the remaining shift algorithms
1251  unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
1252  unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
1253
1254  // If we are shifting whole words, just move whole words
1255  if (wordShift == 0) {
1256    for (unsigned i = 0; i < offset; i++)
1257      val[i] = 0;
1258    for (unsigned i = offset; i < getNumWords(); i++)
1259      val[i] = pVal[i-offset];
1260    return APInt(val,BitWidth).clearUnusedBits();
1261  }
1262
1263  // Copy whole words from this to Result.
1264  unsigned i = getNumWords() - 1;
1265  for (; i > offset; --i)
1266    val[i] = pVal[i-offset] << wordShift |
1267             pVal[i-offset-1] >> (APINT_BITS_PER_WORD - wordShift);
1268  val[offset] = pVal[0] << wordShift;
1269  for (i = 0; i < offset; ++i)
1270    val[i] = 0;
1271  return APInt(val, BitWidth).clearUnusedBits();
1272}
1273
1274APInt APInt::rotl(const APInt &rotateAmt) const {
1275  return rotl((unsigned)rotateAmt.getLimitedValue(BitWidth));
1276}
1277
1278APInt APInt::rotl(unsigned rotateAmt) const {
1279  if (rotateAmt == 0)
1280    return *this;
1281  // Don't get too fancy, just use existing shift/or facilities
1282  APInt hi(*this);
1283  APInt lo(*this);
1284  hi.shl(rotateAmt);
1285  lo.lshr(BitWidth - rotateAmt);
1286  return hi | lo;
1287}
1288
1289APInt APInt::rotr(const APInt &rotateAmt) const {
1290  return rotr((unsigned)rotateAmt.getLimitedValue(BitWidth));
1291}
1292
1293APInt APInt::rotr(unsigned rotateAmt) const {
1294  if (rotateAmt == 0)
1295    return *this;
1296  // Don't get too fancy, just use existing shift/or facilities
1297  APInt hi(*this);
1298  APInt lo(*this);
1299  lo.lshr(rotateAmt);
1300  hi.shl(BitWidth - rotateAmt);
1301  return hi | lo;
1302}
1303
1304// Square Root - this method computes and returns the square root of "this".
1305// Three mechanisms are used for computation. For small values (<= 5 bits),
1306// a table lookup is done. This gets some performance for common cases. For
1307// values using less than 52 bits, the value is converted to double and then
1308// the libc sqrt function is called. The result is rounded and then converted
1309// back to a uint64_t which is then used to construct the result. Finally,
1310// the Babylonian method for computing square roots is used.
1311APInt APInt::sqrt() const {
1312
1313  // Determine the magnitude of the value.
1314  unsigned magnitude = getActiveBits();
1315
1316  // Use a fast table for some small values. This also gets rid of some
1317  // rounding errors in libc sqrt for small values.
1318  if (magnitude <= 5) {
1319    static const uint8_t results[32] = {
1320      /*     0 */ 0,
1321      /*  1- 2 */ 1, 1,
1322      /*  3- 6 */ 2, 2, 2, 2,
1323      /*  7-12 */ 3, 3, 3, 3, 3, 3,
1324      /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1325      /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1326      /*    31 */ 6
1327    };
1328    return APInt(BitWidth, results[ (isSingleWord() ? VAL : pVal[0]) ]);
1329  }
1330
1331  // If the magnitude of the value fits in less than 52 bits (the precision of
1332  // an IEEE double precision floating point value), then we can use the
1333  // libc sqrt function which will probably use a hardware sqrt computation.
1334  // This should be faster than the algorithm below.
1335  if (magnitude < 52) {
1336#ifdef _MSC_VER
1337    // Amazingly, VC++ doesn't have round().
1338    return APInt(BitWidth,
1339                 uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0]))) + 0.5);
1340#else
1341    return APInt(BitWidth,
1342                 uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0])))));
1343#endif
1344  }
1345
1346  // Okay, all the short cuts are exhausted. We must compute it. The following
1347  // is a classical Babylonian method for computing the square root. This code
1348  // was adapted to APINt from a wikipedia article on such computations.
1349  // See http://www.wikipedia.org/ and go to the page named
1350  // Calculate_an_integer_square_root.
1351  unsigned nbits = BitWidth, i = 4;
1352  APInt testy(BitWidth, 16);
1353  APInt x_old(BitWidth, 1);
1354  APInt x_new(BitWidth, 0);
1355  APInt two(BitWidth, 2);
1356
1357  // Select a good starting value using binary logarithms.
1358  for (;; i += 2, testy = testy.shl(2))
1359    if (i >= nbits || this->ule(testy)) {
1360      x_old = x_old.shl(i / 2);
1361      break;
1362    }
1363
1364  // Use the Babylonian method to arrive at the integer square root:
1365  for (;;) {
1366    x_new = (this->udiv(x_old) + x_old).udiv(two);
1367    if (x_old.ule(x_new))
1368      break;
1369    x_old = x_new;
1370  }
1371
1372  // Make sure we return the closest approximation
1373  // NOTE: The rounding calculation below is correct. It will produce an
1374  // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1375  // determined to be a rounding issue with pari/gp as it begins to use a
1376  // floating point representation after 192 bits. There are no discrepancies
1377  // between this algorithm and pari/gp for bit widths < 192 bits.
1378  APInt square(x_old * x_old);
1379  APInt nextSquare((x_old + 1) * (x_old +1));
1380  if (this->ult(square))
1381    return x_old;
1382  else if (this->ule(nextSquare)) {
1383    APInt midpoint((nextSquare - square).udiv(two));
1384    APInt offset(*this - square);
1385    if (offset.ult(midpoint))
1386      return x_old;
1387    else
1388      return x_old + 1;
1389  } else
1390    LLVM_UNREACHABLE("Error in APInt::sqrt computation");
1391  return x_old + 1;
1392}
1393
1394/// Computes the multiplicative inverse of this APInt for a given modulo. The
1395/// iterative extended Euclidean algorithm is used to solve for this value,
1396/// however we simplify it to speed up calculating only the inverse, and take
1397/// advantage of div+rem calculations. We also use some tricks to avoid copying
1398/// (potentially large) APInts around.
1399APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1400  assert(ult(modulo) && "This APInt must be smaller than the modulo");
1401
1402  // Using the properties listed at the following web page (accessed 06/21/08):
1403  //   http://www.numbertheory.org/php/euclid.html
1404  // (especially the properties numbered 3, 4 and 9) it can be proved that
1405  // BitWidth bits suffice for all the computations in the algorithm implemented
1406  // below. More precisely, this number of bits suffice if the multiplicative
1407  // inverse exists, but may not suffice for the general extended Euclidean
1408  // algorithm.
1409
1410  APInt r[2] = { modulo, *this };
1411  APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1412  APInt q(BitWidth, 0);
1413
1414  unsigned i;
1415  for (i = 0; r[i^1] != 0; i ^= 1) {
1416    // An overview of the math without the confusing bit-flipping:
1417    // q = r[i-2] / r[i-1]
1418    // r[i] = r[i-2] % r[i-1]
1419    // t[i] = t[i-2] - t[i-1] * q
1420    udivrem(r[i], r[i^1], q, r[i]);
1421    t[i] -= t[i^1] * q;
1422  }
1423
1424  // If this APInt and the modulo are not coprime, there is no multiplicative
1425  // inverse, so return 0. We check this by looking at the next-to-last
1426  // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1427  // algorithm.
1428  if (r[i] != 1)
1429    return APInt(BitWidth, 0);
1430
1431  // The next-to-last t is the multiplicative inverse.  However, we are
1432  // interested in a positive inverse. Calcuate a positive one from a negative
1433  // one if necessary. A simple addition of the modulo suffices because
1434  // abs(t[i]) is known to be less than *this/2 (see the link above).
1435  return t[i].isNegative() ? t[i] + modulo : t[i];
1436}
1437
1438/// Calculate the magic numbers required to implement a signed integer division
1439/// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1440/// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1441/// Warren, Jr., chapter 10.
1442APInt::ms APInt::magic() const {
1443  const APInt& d = *this;
1444  unsigned p;
1445  APInt ad, anc, delta, q1, r1, q2, r2, t;
1446  APInt allOnes = APInt::getAllOnesValue(d.getBitWidth());
1447  APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1448  APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1449  struct ms mag;
1450
1451  ad = d.abs();
1452  t = signedMin + (d.lshr(d.getBitWidth() - 1));
1453  anc = t - 1 - t.urem(ad);   // absolute value of nc
1454  p = d.getBitWidth() - 1;    // initialize p
1455  q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1456  r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1457  q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1458  r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1459  do {
1460    p = p + 1;
1461    q1 = q1<<1;          // update q1 = 2p/abs(nc)
1462    r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1463    if (r1.uge(anc)) {  // must be unsigned comparison
1464      q1 = q1 + 1;
1465      r1 = r1 - anc;
1466    }
1467    q2 = q2<<1;          // update q2 = 2p/abs(d)
1468    r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1469    if (r2.uge(ad)) {   // must be unsigned comparison
1470      q2 = q2 + 1;
1471      r2 = r2 - ad;
1472    }
1473    delta = ad - r2;
1474  } while (q1.ule(delta) || (q1 == delta && r1 == 0));
1475
1476  mag.m = q2 + 1;
1477  if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1478  mag.s = p - d.getBitWidth();          // resulting shift
1479  return mag;
1480}
1481
1482/// Calculate the magic numbers required to implement an unsigned integer
1483/// division by a constant as a sequence of multiplies, adds and shifts.
1484/// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1485/// S. Warren, Jr., chapter 10.
1486APInt::mu APInt::magicu() const {
1487  const APInt& d = *this;
1488  unsigned p;
1489  APInt nc, delta, q1, r1, q2, r2;
1490  struct mu magu;
1491  magu.a = 0;               // initialize "add" indicator
1492  APInt allOnes = APInt::getAllOnesValue(d.getBitWidth());
1493  APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1494  APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1495
1496  nc = allOnes - (-d).urem(d);
1497  p = d.getBitWidth() - 1;  // initialize p
1498  q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1499  r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1500  q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1501  r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1502  do {
1503    p = p + 1;
1504    if (r1.uge(nc - r1)) {
1505      q1 = q1 + q1 + 1;  // update q1
1506      r1 = r1 + r1 - nc; // update r1
1507    }
1508    else {
1509      q1 = q1+q1; // update q1
1510      r1 = r1+r1; // update r1
1511    }
1512    if ((r2 + 1).uge(d - r2)) {
1513      if (q2.uge(signedMax)) magu.a = 1;
1514      q2 = q2+q2 + 1;     // update q2
1515      r2 = r2+r2 + 1 - d; // update r2
1516    }
1517    else {
1518      if (q2.uge(signedMin)) magu.a = 1;
1519      q2 = q2+q2;     // update q2
1520      r2 = r2+r2 + 1; // update r2
1521    }
1522    delta = d - 1 - r2;
1523  } while (p < d.getBitWidth()*2 &&
1524           (q1.ult(delta) || (q1 == delta && r1 == 0)));
1525  magu.m = q2 + 1; // resulting magic number
1526  magu.s = p - d.getBitWidth();  // resulting shift
1527  return magu;
1528}
1529
1530/// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1531/// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1532/// variables here have the same names as in the algorithm. Comments explain
1533/// the algorithm and any deviation from it.
1534static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
1535                     unsigned m, unsigned n) {
1536  assert(u && "Must provide dividend");
1537  assert(v && "Must provide divisor");
1538  assert(q && "Must provide quotient");
1539  assert(u != v && u != q && v != q && "Must us different memory");
1540  assert(n>1 && "n must be > 1");
1541
1542  // Knuth uses the value b as the base of the number system. In our case b
1543  // is 2^31 so we just set it to -1u.
1544  uint64_t b = uint64_t(1) << 32;
1545
1546#if 0
1547  DEBUG(cerr << "KnuthDiv: m=" << m << " n=" << n << '\n');
1548  DEBUG(cerr << "KnuthDiv: original:");
1549  DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << std::setbase(16) << u[i]);
1550  DEBUG(cerr << " by");
1551  DEBUG(for (int i = n; i >0; i--) cerr << " " << std::setbase(16) << v[i-1]);
1552  DEBUG(cerr << '\n');
1553#endif
1554  // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1555  // u and v by d. Note that we have taken Knuth's advice here to use a power
1556  // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1557  // 2 allows us to shift instead of multiply and it is easy to determine the
1558  // shift amount from the leading zeros.  We are basically normalizing the u
1559  // and v so that its high bits are shifted to the top of v's range without
1560  // overflow. Note that this can require an extra word in u so that u must
1561  // be of length m+n+1.
1562  unsigned shift = CountLeadingZeros_32(v[n-1]);
1563  unsigned v_carry = 0;
1564  unsigned u_carry = 0;
1565  if (shift) {
1566    for (unsigned i = 0; i < m+n; ++i) {
1567      unsigned u_tmp = u[i] >> (32 - shift);
1568      u[i] = (u[i] << shift) | u_carry;
1569      u_carry = u_tmp;
1570    }
1571    for (unsigned i = 0; i < n; ++i) {
1572      unsigned v_tmp = v[i] >> (32 - shift);
1573      v[i] = (v[i] << shift) | v_carry;
1574      v_carry = v_tmp;
1575    }
1576  }
1577  u[m+n] = u_carry;
1578#if 0
1579  DEBUG(cerr << "KnuthDiv:   normal:");
1580  DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << std::setbase(16) << u[i]);
1581  DEBUG(cerr << " by");
1582  DEBUG(for (int i = n; i >0; i--) cerr << " " << std::setbase(16) << v[i-1]);
1583  DEBUG(cerr << '\n');
1584#endif
1585
1586  // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1587  int j = m;
1588  do {
1589    DEBUG(cerr << "KnuthDiv: quotient digit #" << j << '\n');
1590    // D3. [Calculate q'.].
1591    //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1592    //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1593    // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1594    // qp by 1, inrease rp by v[n-1], and repeat this test if rp < b. The test
1595    // on v[n-2] determines at high speed most of the cases in which the trial
1596    // value qp is one too large, and it eliminates all cases where qp is two
1597    // too large.
1598    uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]);
1599    DEBUG(cerr << "KnuthDiv: dividend == " << dividend << '\n');
1600    uint64_t qp = dividend / v[n-1];
1601    uint64_t rp = dividend % v[n-1];
1602    if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1603      qp--;
1604      rp += v[n-1];
1605      if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1606        qp--;
1607    }
1608    DEBUG(cerr << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1609
1610    // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1611    // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1612    // consists of a simple multiplication by a one-place number, combined with
1613    // a subtraction.
1614    bool isNeg = false;
1615    for (unsigned i = 0; i < n; ++i) {
1616      uint64_t u_tmp = uint64_t(u[j+i]) | (uint64_t(u[j+i+1]) << 32);
1617      uint64_t subtrahend = uint64_t(qp) * uint64_t(v[i]);
1618      bool borrow = subtrahend > u_tmp;
1619      DEBUG(cerr << "KnuthDiv: u_tmp == " << u_tmp
1620                 << ", subtrahend == " << subtrahend
1621                 << ", borrow = " << borrow << '\n');
1622
1623      uint64_t result = u_tmp - subtrahend;
1624      unsigned k = j + i;
1625      u[k++] = (unsigned)(result & (b-1)); // subtract low word
1626      u[k++] = (unsigned)(result >> 32);   // subtract high word
1627      while (borrow && k <= m+n) { // deal with borrow to the left
1628        borrow = u[k] == 0;
1629        u[k]--;
1630        k++;
1631      }
1632      isNeg |= borrow;
1633      DEBUG(cerr << "KnuthDiv: u[j+i] == " << u[j+i] << ",  u[j+i+1] == " <<
1634                    u[j+i+1] << '\n');
1635    }
1636    DEBUG(cerr << "KnuthDiv: after subtraction:");
1637    DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << u[i]);
1638    DEBUG(cerr << '\n');
1639    // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1640    // this step is actually negative, (u[j+n]...u[j]) should be left as the
1641    // true value plus b**(n+1), namely as the b's complement of
1642    // the true value, and a "borrow" to the left should be remembered.
1643    //
1644    if (isNeg) {
1645      bool carry = true;  // true because b's complement is "complement + 1"
1646      for (unsigned i = 0; i <= m+n; ++i) {
1647        u[i] = ~u[i] + carry; // b's complement
1648        carry = carry && u[i] == 0;
1649      }
1650    }
1651    DEBUG(cerr << "KnuthDiv: after complement:");
1652    DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << u[i]);
1653    DEBUG(cerr << '\n');
1654
1655    // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1656    // negative, go to step D6; otherwise go on to step D7.
1657    q[j] = (unsigned)qp;
1658    if (isNeg) {
1659      // D6. [Add back]. The probability that this step is necessary is very
1660      // small, on the order of only 2/b. Make sure that test data accounts for
1661      // this possibility. Decrease q[j] by 1
1662      q[j]--;
1663      // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1664      // A carry will occur to the left of u[j+n], and it should be ignored
1665      // since it cancels with the borrow that occurred in D4.
1666      bool carry = false;
1667      for (unsigned i = 0; i < n; i++) {
1668        unsigned limit = std::min(u[j+i],v[i]);
1669        u[j+i] += v[i] + carry;
1670        carry = u[j+i] < limit || (carry && u[j+i] == limit);
1671      }
1672      u[j+n] += carry;
1673    }
1674    DEBUG(cerr << "KnuthDiv: after correction:");
1675    DEBUG(for (int i = m+n; i >=0; i--) cerr <<" " << u[i]);
1676    DEBUG(cerr << "\nKnuthDiv: digit result = " << q[j] << '\n');
1677
1678  // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1679  } while (--j >= 0);
1680
1681  DEBUG(cerr << "KnuthDiv: quotient:");
1682  DEBUG(for (int i = m; i >=0; i--) cerr <<" " << q[i]);
1683  DEBUG(cerr << '\n');
1684
1685  // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1686  // remainder may be obtained by dividing u[...] by d. If r is non-null we
1687  // compute the remainder (urem uses this).
1688  if (r) {
1689    // The value d is expressed by the "shift" value above since we avoided
1690    // multiplication by d by using a shift left. So, all we have to do is
1691    // shift right here. In order to mak
1692    if (shift) {
1693      unsigned carry = 0;
1694      DEBUG(cerr << "KnuthDiv: remainder:");
1695      for (int i = n-1; i >= 0; i--) {
1696        r[i] = (u[i] >> shift) | carry;
1697        carry = u[i] << (32 - shift);
1698        DEBUG(cerr << " " << r[i]);
1699      }
1700    } else {
1701      for (int i = n-1; i >= 0; i--) {
1702        r[i] = u[i];
1703        DEBUG(cerr << " " << r[i]);
1704      }
1705    }
1706    DEBUG(cerr << '\n');
1707  }
1708#if 0
1709  DEBUG(cerr << std::setbase(10) << '\n');
1710#endif
1711}
1712
1713void APInt::divide(const APInt LHS, unsigned lhsWords,
1714                   const APInt &RHS, unsigned rhsWords,
1715                   APInt *Quotient, APInt *Remainder)
1716{
1717  assert(lhsWords >= rhsWords && "Fractional result");
1718
1719  // First, compose the values into an array of 32-bit words instead of
1720  // 64-bit words. This is a necessity of both the "short division" algorithm
1721  // and the the Knuth "classical algorithm" which requires there to be native
1722  // operations for +, -, and * on an m bit value with an m*2 bit result. We
1723  // can't use 64-bit operands here because we don't have native results of
1724  // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1725  // work on large-endian machines.
1726  uint64_t mask = ~0ull >> (sizeof(unsigned)*CHAR_BIT);
1727  unsigned n = rhsWords * 2;
1728  unsigned m = (lhsWords * 2) - n;
1729
1730  // Allocate space for the temporary values we need either on the stack, if
1731  // it will fit, or on the heap if it won't.
1732  unsigned SPACE[128];
1733  unsigned *U = 0;
1734  unsigned *V = 0;
1735  unsigned *Q = 0;
1736  unsigned *R = 0;
1737  if ((Remainder?4:3)*n+2*m+1 <= 128) {
1738    U = &SPACE[0];
1739    V = &SPACE[m+n+1];
1740    Q = &SPACE[(m+n+1) + n];
1741    if (Remainder)
1742      R = &SPACE[(m+n+1) + n + (m+n)];
1743  } else {
1744    U = new unsigned[m + n + 1];
1745    V = new unsigned[n];
1746    Q = new unsigned[m+n];
1747    if (Remainder)
1748      R = new unsigned[n];
1749  }
1750
1751  // Initialize the dividend
1752  memset(U, 0, (m+n+1)*sizeof(unsigned));
1753  for (unsigned i = 0; i < lhsWords; ++i) {
1754    uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.VAL : LHS.pVal[i]);
1755    U[i * 2] = (unsigned)(tmp & mask);
1756    U[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1757  }
1758  U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1759
1760  // Initialize the divisor
1761  memset(V, 0, (n)*sizeof(unsigned));
1762  for (unsigned i = 0; i < rhsWords; ++i) {
1763    uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.VAL : RHS.pVal[i]);
1764    V[i * 2] = (unsigned)(tmp & mask);
1765    V[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1766  }
1767
1768  // initialize the quotient and remainder
1769  memset(Q, 0, (m+n) * sizeof(unsigned));
1770  if (Remainder)
1771    memset(R, 0, n * sizeof(unsigned));
1772
1773  // Now, adjust m and n for the Knuth division. n is the number of words in
1774  // the divisor. m is the number of words by which the dividend exceeds the
1775  // divisor (i.e. m+n is the length of the dividend). These sizes must not
1776  // contain any zero words or the Knuth algorithm fails.
1777  for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1778    n--;
1779    m++;
1780  }
1781  for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1782    m--;
1783
1784  // If we're left with only a single word for the divisor, Knuth doesn't work
1785  // so we implement the short division algorithm here. This is much simpler
1786  // and faster because we are certain that we can divide a 64-bit quantity
1787  // by a 32-bit quantity at hardware speed and short division is simply a
1788  // series of such operations. This is just like doing short division but we
1789  // are using base 2^32 instead of base 10.
1790  assert(n != 0 && "Divide by zero?");
1791  if (n == 1) {
1792    unsigned divisor = V[0];
1793    unsigned remainder = 0;
1794    for (int i = m+n-1; i >= 0; i--) {
1795      uint64_t partial_dividend = uint64_t(remainder) << 32 | U[i];
1796      if (partial_dividend == 0) {
1797        Q[i] = 0;
1798        remainder = 0;
1799      } else if (partial_dividend < divisor) {
1800        Q[i] = 0;
1801        remainder = (unsigned)partial_dividend;
1802      } else if (partial_dividend == divisor) {
1803        Q[i] = 1;
1804        remainder = 0;
1805      } else {
1806        Q[i] = (unsigned)(partial_dividend / divisor);
1807        remainder = (unsigned)(partial_dividend - (Q[i] * divisor));
1808      }
1809    }
1810    if (R)
1811      R[0] = remainder;
1812  } else {
1813    // Now we're ready to invoke the Knuth classical divide algorithm. In this
1814    // case n > 1.
1815    KnuthDiv(U, V, Q, R, m, n);
1816  }
1817
1818  // If the caller wants the quotient
1819  if (Quotient) {
1820    // Set up the Quotient value's memory.
1821    if (Quotient->BitWidth != LHS.BitWidth) {
1822      if (Quotient->isSingleWord())
1823        Quotient->VAL = 0;
1824      else
1825        delete [] Quotient->pVal;
1826      Quotient->BitWidth = LHS.BitWidth;
1827      if (!Quotient->isSingleWord())
1828        Quotient->pVal = getClearedMemory(Quotient->getNumWords());
1829    } else
1830      Quotient->clear();
1831
1832    // The quotient is in Q. Reconstitute the quotient into Quotient's low
1833    // order words.
1834    if (lhsWords == 1) {
1835      uint64_t tmp =
1836        uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2));
1837      if (Quotient->isSingleWord())
1838        Quotient->VAL = tmp;
1839      else
1840        Quotient->pVal[0] = tmp;
1841    } else {
1842      assert(!Quotient->isSingleWord() && "Quotient APInt not large enough");
1843      for (unsigned i = 0; i < lhsWords; ++i)
1844        Quotient->pVal[i] =
1845          uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1846    }
1847  }
1848
1849  // If the caller wants the remainder
1850  if (Remainder) {
1851    // Set up the Remainder value's memory.
1852    if (Remainder->BitWidth != RHS.BitWidth) {
1853      if (Remainder->isSingleWord())
1854        Remainder->VAL = 0;
1855      else
1856        delete [] Remainder->pVal;
1857      Remainder->BitWidth = RHS.BitWidth;
1858      if (!Remainder->isSingleWord())
1859        Remainder->pVal = getClearedMemory(Remainder->getNumWords());
1860    } else
1861      Remainder->clear();
1862
1863    // The remainder is in R. Reconstitute the remainder into Remainder's low
1864    // order words.
1865    if (rhsWords == 1) {
1866      uint64_t tmp =
1867        uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2));
1868      if (Remainder->isSingleWord())
1869        Remainder->VAL = tmp;
1870      else
1871        Remainder->pVal[0] = tmp;
1872    } else {
1873      assert(!Remainder->isSingleWord() && "Remainder APInt not large enough");
1874      for (unsigned i = 0; i < rhsWords; ++i)
1875        Remainder->pVal[i] =
1876          uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1877    }
1878  }
1879
1880  // Clean up the memory we allocated.
1881  if (U != &SPACE[0]) {
1882    delete [] U;
1883    delete [] V;
1884    delete [] Q;
1885    delete [] R;
1886  }
1887}
1888
1889APInt APInt::udiv(const APInt& RHS) const {
1890  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1891
1892  // First, deal with the easy case
1893  if (isSingleWord()) {
1894    assert(RHS.VAL != 0 && "Divide by zero?");
1895    return APInt(BitWidth, VAL / RHS.VAL);
1896  }
1897
1898  // Get some facts about the LHS and RHS number of bits and words
1899  unsigned rhsBits = RHS.getActiveBits();
1900  unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1901  assert(rhsWords && "Divided by zero???");
1902  unsigned lhsBits = this->getActiveBits();
1903  unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
1904
1905  // Deal with some degenerate cases
1906  if (!lhsWords)
1907    // 0 / X ===> 0
1908    return APInt(BitWidth, 0);
1909  else if (lhsWords < rhsWords || this->ult(RHS)) {
1910    // X / Y ===> 0, iff X < Y
1911    return APInt(BitWidth, 0);
1912  } else if (*this == RHS) {
1913    // X / X ===> 1
1914    return APInt(BitWidth, 1);
1915  } else if (lhsWords == 1 && rhsWords == 1) {
1916    // All high words are zero, just use native divide
1917    return APInt(BitWidth, this->pVal[0] / RHS.pVal[0]);
1918  }
1919
1920  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1921  APInt Quotient(1,0); // to hold result.
1922  divide(*this, lhsWords, RHS, rhsWords, &Quotient, 0);
1923  return Quotient;
1924}
1925
1926APInt APInt::urem(const APInt& RHS) const {
1927  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1928  if (isSingleWord()) {
1929    assert(RHS.VAL != 0 && "Remainder by zero?");
1930    return APInt(BitWidth, VAL % RHS.VAL);
1931  }
1932
1933  // Get some facts about the LHS
1934  unsigned lhsBits = getActiveBits();
1935  unsigned lhsWords = !lhsBits ? 0 : (whichWord(lhsBits - 1) + 1);
1936
1937  // Get some facts about the RHS
1938  unsigned rhsBits = RHS.getActiveBits();
1939  unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1940  assert(rhsWords && "Performing remainder operation by zero ???");
1941
1942  // Check the degenerate cases
1943  if (lhsWords == 0) {
1944    // 0 % Y ===> 0
1945    return APInt(BitWidth, 0);
1946  } else if (lhsWords < rhsWords || this->ult(RHS)) {
1947    // X % Y ===> X, iff X < Y
1948    return *this;
1949  } else if (*this == RHS) {
1950    // X % X == 0;
1951    return APInt(BitWidth, 0);
1952  } else if (lhsWords == 1) {
1953    // All high words are zero, just use native remainder
1954    return APInt(BitWidth, pVal[0] % RHS.pVal[0]);
1955  }
1956
1957  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1958  APInt Remainder(1,0);
1959  divide(*this, lhsWords, RHS, rhsWords, 0, &Remainder);
1960  return Remainder;
1961}
1962
1963void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1964                    APInt &Quotient, APInt &Remainder) {
1965  // Get some size facts about the dividend and divisor
1966  unsigned lhsBits  = LHS.getActiveBits();
1967  unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
1968  unsigned rhsBits  = RHS.getActiveBits();
1969  unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1970
1971  // Check the degenerate cases
1972  if (lhsWords == 0) {
1973    Quotient = 0;                // 0 / Y ===> 0
1974    Remainder = 0;               // 0 % Y ===> 0
1975    return;
1976  }
1977
1978  if (lhsWords < rhsWords || LHS.ult(RHS)) {
1979    Quotient = 0;               // X / Y ===> 0, iff X < Y
1980    Remainder = LHS;            // X % Y ===> X, iff X < Y
1981    return;
1982  }
1983
1984  if (LHS == RHS) {
1985    Quotient  = 1;              // X / X ===> 1
1986    Remainder = 0;              // X % X ===> 0;
1987    return;
1988  }
1989
1990  if (lhsWords == 1 && rhsWords == 1) {
1991    // There is only one word to consider so use the native versions.
1992    uint64_t lhsValue = LHS.isSingleWord() ? LHS.VAL : LHS.pVal[0];
1993    uint64_t rhsValue = RHS.isSingleWord() ? RHS.VAL : RHS.pVal[0];
1994    Quotient = APInt(LHS.getBitWidth(), lhsValue / rhsValue);
1995    Remainder = APInt(LHS.getBitWidth(), lhsValue % rhsValue);
1996    return;
1997  }
1998
1999  // Okay, lets do it the long way
2000  divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder);
2001}
2002
2003void APInt::fromString(unsigned numbits, const char *str, unsigned slen,
2004                       uint8_t radix) {
2005  // Check our assumptions here
2006  assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
2007         "Radix should be 2, 8, 10, or 16!");
2008  assert(str && "String is null?");
2009  bool isNeg = str[0] == '-';
2010  if (isNeg)
2011    str++, slen--;
2012  assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2013  assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2014  assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2015  assert((((slen-1)*64)/22 <= numbits || radix != 10) && "Insufficient bit width");
2016
2017  // Allocate memory
2018  if (!isSingleWord())
2019    pVal = getClearedMemory(getNumWords());
2020
2021  // Figure out if we can shift instead of multiply
2022  unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2023
2024  // Set up an APInt for the digit to add outside the loop so we don't
2025  // constantly construct/destruct it.
2026  APInt apdigit(getBitWidth(), 0);
2027  APInt apradix(getBitWidth(), radix);
2028
2029  // Enter digit traversal loop
2030  for (unsigned i = 0; i < slen; i++) {
2031    // Get a digit
2032    unsigned digit = 0;
2033    char cdigit = str[i];
2034    if (radix == 16) {
2035      if (!isxdigit(cdigit))
2036        LLVM_UNREACHABLE("Invalid hex digit in string");
2037      if (isdigit(cdigit))
2038        digit = cdigit - '0';
2039      else if (cdigit >= 'a')
2040        digit = cdigit - 'a' + 10;
2041      else if (cdigit >= 'A')
2042        digit = cdigit - 'A' + 10;
2043      else
2044        LLVM_UNREACHABLE("huh? we shouldn't get here");
2045    } else if (isdigit(cdigit)) {
2046      digit = cdigit - '0';
2047      assert((radix == 10 ||
2048              (radix == 8 && digit != 8 && digit != 9) ||
2049              (radix == 2 && (digit == 0 || digit == 1))) &&
2050             "Invalid digit in string for given radix");
2051    } else {
2052      LLVM_UNREACHABLE("Invalid character in digit string");
2053    }
2054
2055    // Shift or multiply the value by the radix
2056    if (slen > 1) {
2057      if (shift)
2058        *this <<= shift;
2059      else
2060        *this *= apradix;
2061    }
2062
2063    // Add in the digit we just interpreted
2064    if (apdigit.isSingleWord())
2065      apdigit.VAL = digit;
2066    else
2067      apdigit.pVal[0] = digit;
2068    *this += apdigit;
2069  }
2070  // If its negative, put it in two's complement form
2071  if (isNeg) {
2072    (*this)--;
2073    this->flip();
2074  }
2075}
2076
2077void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2078                     bool Signed) const {
2079  assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2) &&
2080         "Radix should be 2, 8, 10, or 16!");
2081
2082  // First, check for a zero value and just short circuit the logic below.
2083  if (*this == 0) {
2084    Str.push_back('0');
2085    return;
2086  }
2087
2088  static const char Digits[] = "0123456789ABCDEF";
2089
2090  if (isSingleWord()) {
2091    char Buffer[65];
2092    char *BufPtr = Buffer+65;
2093
2094    uint64_t N;
2095    if (Signed) {
2096      int64_t I = getSExtValue();
2097      if (I < 0) {
2098        Str.push_back('-');
2099        I = -I;
2100      }
2101      N = I;
2102    } else {
2103      N = getZExtValue();
2104    }
2105
2106    while (N) {
2107      *--BufPtr = Digits[N % Radix];
2108      N /= Radix;
2109    }
2110    Str.append(BufPtr, Buffer+65);
2111    return;
2112  }
2113
2114  APInt Tmp(*this);
2115
2116  if (Signed && isNegative()) {
2117    // They want to print the signed version and it is a negative value
2118    // Flip the bits and add one to turn it into the equivalent positive
2119    // value and put a '-' in the result.
2120    Tmp.flip();
2121    Tmp++;
2122    Str.push_back('-');
2123  }
2124
2125  // We insert the digits backward, then reverse them to get the right order.
2126  unsigned StartDig = Str.size();
2127
2128  // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2129  // because the number of bits per digit (1, 3 and 4 respectively) divides
2130  // equaly.  We just shift until the value is zero.
2131  if (Radix != 10) {
2132    // Just shift tmp right for each digit width until it becomes zero
2133    unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2134    unsigned MaskAmt = Radix - 1;
2135
2136    while (Tmp != 0) {
2137      unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2138      Str.push_back(Digits[Digit]);
2139      Tmp = Tmp.lshr(ShiftAmt);
2140    }
2141  } else {
2142    APInt divisor(4, 10);
2143    while (Tmp != 0) {
2144      APInt APdigit(1, 0);
2145      APInt tmp2(Tmp.getBitWidth(), 0);
2146      divide(Tmp, Tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2,
2147             &APdigit);
2148      unsigned Digit = (unsigned)APdigit.getZExtValue();
2149      assert(Digit < Radix && "divide failed");
2150      Str.push_back(Digits[Digit]);
2151      Tmp = tmp2;
2152    }
2153  }
2154
2155  // Reverse the digits before returning.
2156  std::reverse(Str.begin()+StartDig, Str.end());
2157}
2158
2159/// toString - This returns the APInt as a std::string.  Note that this is an
2160/// inefficient method.  It is better to pass in a SmallVector/SmallString
2161/// to the methods above.
2162std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2163  SmallString<40> S;
2164  toString(S, Radix, Signed);
2165  return S.c_str();
2166}
2167
2168
2169void APInt::dump() const {
2170  SmallString<40> S, U;
2171  this->toStringUnsigned(U);
2172  this->toStringSigned(S);
2173  fprintf(stderr, "APInt(%db, %su %ss)", BitWidth, U.c_str(), S.c_str());
2174}
2175
2176void APInt::print(raw_ostream &OS, bool isSigned) const {
2177  SmallString<40> S;
2178  this->toString(S, 10, isSigned);
2179  OS << S.c_str();
2180}
2181
2182std::ostream &llvm::operator<<(std::ostream &o, const APInt &I) {
2183  raw_os_ostream OS(o);
2184  OS << I;
2185  return o;
2186}
2187
2188// This implements a variety of operations on a representation of
2189// arbitrary precision, two's-complement, bignum integer values.
2190
2191/* Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2192   and unrestricting assumption.  */
2193#define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
2194COMPILE_TIME_ASSERT(integerPartWidth % 2 == 0);
2195
2196/* Some handy functions local to this file.  */
2197namespace {
2198
2199  /* Returns the integer part with the least significant BITS set.
2200     BITS cannot be zero.  */
2201  static inline integerPart
2202  lowBitMask(unsigned int bits)
2203  {
2204    assert (bits != 0 && bits <= integerPartWidth);
2205
2206    return ~(integerPart) 0 >> (integerPartWidth - bits);
2207  }
2208
2209  /* Returns the value of the lower half of PART.  */
2210  static inline integerPart
2211  lowHalf(integerPart part)
2212  {
2213    return part & lowBitMask(integerPartWidth / 2);
2214  }
2215
2216  /* Returns the value of the upper half of PART.  */
2217  static inline integerPart
2218  highHalf(integerPart part)
2219  {
2220    return part >> (integerPartWidth / 2);
2221  }
2222
2223  /* Returns the bit number of the most significant set bit of a part.
2224     If the input number has no bits set -1U is returned.  */
2225  static unsigned int
2226  partMSB(integerPart value)
2227  {
2228    unsigned int n, msb;
2229
2230    if (value == 0)
2231      return -1U;
2232
2233    n = integerPartWidth / 2;
2234
2235    msb = 0;
2236    do {
2237      if (value >> n) {
2238        value >>= n;
2239        msb += n;
2240      }
2241
2242      n >>= 1;
2243    } while (n);
2244
2245    return msb;
2246  }
2247
2248  /* Returns the bit number of the least significant set bit of a
2249     part.  If the input number has no bits set -1U is returned.  */
2250  static unsigned int
2251  partLSB(integerPart value)
2252  {
2253    unsigned int n, lsb;
2254
2255    if (value == 0)
2256      return -1U;
2257
2258    lsb = integerPartWidth - 1;
2259    n = integerPartWidth / 2;
2260
2261    do {
2262      if (value << n) {
2263        value <<= n;
2264        lsb -= n;
2265      }
2266
2267      n >>= 1;
2268    } while (n);
2269
2270    return lsb;
2271  }
2272}
2273
2274/* Sets the least significant part of a bignum to the input value, and
2275   zeroes out higher parts.  */
2276void
2277APInt::tcSet(integerPart *dst, integerPart part, unsigned int parts)
2278{
2279  unsigned int i;
2280
2281  assert (parts > 0);
2282
2283  dst[0] = part;
2284  for(i = 1; i < parts; i++)
2285    dst[i] = 0;
2286}
2287
2288/* Assign one bignum to another.  */
2289void
2290APInt::tcAssign(integerPart *dst, const integerPart *src, unsigned int parts)
2291{
2292  unsigned int i;
2293
2294  for(i = 0; i < parts; i++)
2295    dst[i] = src[i];
2296}
2297
2298/* Returns true if a bignum is zero, false otherwise.  */
2299bool
2300APInt::tcIsZero(const integerPart *src, unsigned int parts)
2301{
2302  unsigned int i;
2303
2304  for(i = 0; i < parts; i++)
2305    if (src[i])
2306      return false;
2307
2308  return true;
2309}
2310
2311/* Extract the given bit of a bignum; returns 0 or 1.  */
2312int
2313APInt::tcExtractBit(const integerPart *parts, unsigned int bit)
2314{
2315  return(parts[bit / integerPartWidth]
2316         & ((integerPart) 1 << bit % integerPartWidth)) != 0;
2317}
2318
2319/* Set the given bit of a bignum.  */
2320void
2321APInt::tcSetBit(integerPart *parts, unsigned int bit)
2322{
2323  parts[bit / integerPartWidth] |= (integerPart) 1 << (bit % integerPartWidth);
2324}
2325
2326/* Returns the bit number of the least significant set bit of a
2327   number.  If the input number has no bits set -1U is returned.  */
2328unsigned int
2329APInt::tcLSB(const integerPart *parts, unsigned int n)
2330{
2331  unsigned int i, lsb;
2332
2333  for(i = 0; i < n; i++) {
2334      if (parts[i] != 0) {
2335          lsb = partLSB(parts[i]);
2336
2337          return lsb + i * integerPartWidth;
2338      }
2339  }
2340
2341  return -1U;
2342}
2343
2344/* Returns the bit number of the most significant set bit of a number.
2345   If the input number has no bits set -1U is returned.  */
2346unsigned int
2347APInt::tcMSB(const integerPart *parts, unsigned int n)
2348{
2349  unsigned int msb;
2350
2351  do {
2352      --n;
2353
2354      if (parts[n] != 0) {
2355          msb = partMSB(parts[n]);
2356
2357          return msb + n * integerPartWidth;
2358      }
2359  } while (n);
2360
2361  return -1U;
2362}
2363
2364/* Copy the bit vector of width srcBITS from SRC, starting at bit
2365   srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2366   the least significant bit of DST.  All high bits above srcBITS in
2367   DST are zero-filled.  */
2368void
2369APInt::tcExtract(integerPart *dst, unsigned int dstCount,const integerPart *src,
2370                 unsigned int srcBits, unsigned int srcLSB)
2371{
2372  unsigned int firstSrcPart, dstParts, shift, n;
2373
2374  dstParts = (srcBits + integerPartWidth - 1) / integerPartWidth;
2375  assert (dstParts <= dstCount);
2376
2377  firstSrcPart = srcLSB / integerPartWidth;
2378  tcAssign (dst, src + firstSrcPart, dstParts);
2379
2380  shift = srcLSB % integerPartWidth;
2381  tcShiftRight (dst, dstParts, shift);
2382
2383  /* We now have (dstParts * integerPartWidth - shift) bits from SRC
2384     in DST.  If this is less that srcBits, append the rest, else
2385     clear the high bits.  */
2386  n = dstParts * integerPartWidth - shift;
2387  if (n < srcBits) {
2388    integerPart mask = lowBitMask (srcBits - n);
2389    dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2390                          << n % integerPartWidth);
2391  } else if (n > srcBits) {
2392    if (srcBits % integerPartWidth)
2393      dst[dstParts - 1] &= lowBitMask (srcBits % integerPartWidth);
2394  }
2395
2396  /* Clear high parts.  */
2397  while (dstParts < dstCount)
2398    dst[dstParts++] = 0;
2399}
2400
2401/* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2402integerPart
2403APInt::tcAdd(integerPart *dst, const integerPart *rhs,
2404             integerPart c, unsigned int parts)
2405{
2406  unsigned int i;
2407
2408  assert(c <= 1);
2409
2410  for(i = 0; i < parts; i++) {
2411    integerPart l;
2412
2413    l = dst[i];
2414    if (c) {
2415      dst[i] += rhs[i] + 1;
2416      c = (dst[i] <= l);
2417    } else {
2418      dst[i] += rhs[i];
2419      c = (dst[i] < l);
2420    }
2421  }
2422
2423  return c;
2424}
2425
2426/* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2427integerPart
2428APInt::tcSubtract(integerPart *dst, const integerPart *rhs,
2429                  integerPart c, unsigned int parts)
2430{
2431  unsigned int i;
2432
2433  assert(c <= 1);
2434
2435  for(i = 0; i < parts; i++) {
2436    integerPart l;
2437
2438    l = dst[i];
2439    if (c) {
2440      dst[i] -= rhs[i] + 1;
2441      c = (dst[i] >= l);
2442    } else {
2443      dst[i] -= rhs[i];
2444      c = (dst[i] > l);
2445    }
2446  }
2447
2448  return c;
2449}
2450
2451/* Negate a bignum in-place.  */
2452void
2453APInt::tcNegate(integerPart *dst, unsigned int parts)
2454{
2455  tcComplement(dst, parts);
2456  tcIncrement(dst, parts);
2457}
2458
2459/*  DST += SRC * MULTIPLIER + CARRY   if add is true
2460    DST  = SRC * MULTIPLIER + CARRY   if add is false
2461
2462    Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2463    they must start at the same point, i.e. DST == SRC.
2464
2465    If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2466    returned.  Otherwise DST is filled with the least significant
2467    DSTPARTS parts of the result, and if all of the omitted higher
2468    parts were zero return zero, otherwise overflow occurred and
2469    return one.  */
2470int
2471APInt::tcMultiplyPart(integerPart *dst, const integerPart *src,
2472                      integerPart multiplier, integerPart carry,
2473                      unsigned int srcParts, unsigned int dstParts,
2474                      bool add)
2475{
2476  unsigned int i, n;
2477
2478  /* Otherwise our writes of DST kill our later reads of SRC.  */
2479  assert(dst <= src || dst >= src + srcParts);
2480  assert(dstParts <= srcParts + 1);
2481
2482  /* N loops; minimum of dstParts and srcParts.  */
2483  n = dstParts < srcParts ? dstParts: srcParts;
2484
2485  for(i = 0; i < n; i++) {
2486    integerPart low, mid, high, srcPart;
2487
2488      /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2489
2490         This cannot overflow, because
2491
2492         (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2493
2494         which is less than n^2.  */
2495
2496    srcPart = src[i];
2497
2498    if (multiplier == 0 || srcPart == 0)        {
2499      low = carry;
2500      high = 0;
2501    } else {
2502      low = lowHalf(srcPart) * lowHalf(multiplier);
2503      high = highHalf(srcPart) * highHalf(multiplier);
2504
2505      mid = lowHalf(srcPart) * highHalf(multiplier);
2506      high += highHalf(mid);
2507      mid <<= integerPartWidth / 2;
2508      if (low + mid < low)
2509        high++;
2510      low += mid;
2511
2512      mid = highHalf(srcPart) * lowHalf(multiplier);
2513      high += highHalf(mid);
2514      mid <<= integerPartWidth / 2;
2515      if (low + mid < low)
2516        high++;
2517      low += mid;
2518
2519      /* Now add carry.  */
2520      if (low + carry < low)
2521        high++;
2522      low += carry;
2523    }
2524
2525    if (add) {
2526      /* And now DST[i], and store the new low part there.  */
2527      if (low + dst[i] < low)
2528        high++;
2529      dst[i] += low;
2530    } else
2531      dst[i] = low;
2532
2533    carry = high;
2534  }
2535
2536  if (i < dstParts) {
2537    /* Full multiplication, there is no overflow.  */
2538    assert(i + 1 == dstParts);
2539    dst[i] = carry;
2540    return 0;
2541  } else {
2542    /* We overflowed if there is carry.  */
2543    if (carry)
2544      return 1;
2545
2546    /* We would overflow if any significant unwritten parts would be
2547       non-zero.  This is true if any remaining src parts are non-zero
2548       and the multiplier is non-zero.  */
2549    if (multiplier)
2550      for(; i < srcParts; i++)
2551        if (src[i])
2552          return 1;
2553
2554    /* We fitted in the narrow destination.  */
2555    return 0;
2556  }
2557}
2558
2559/* DST = LHS * RHS, where DST has the same width as the operands and
2560   is filled with the least significant parts of the result.  Returns
2561   one if overflow occurred, otherwise zero.  DST must be disjoint
2562   from both operands.  */
2563int
2564APInt::tcMultiply(integerPart *dst, const integerPart *lhs,
2565                  const integerPart *rhs, unsigned int parts)
2566{
2567  unsigned int i;
2568  int overflow;
2569
2570  assert(dst != lhs && dst != rhs);
2571
2572  overflow = 0;
2573  tcSet(dst, 0, parts);
2574
2575  for(i = 0; i < parts; i++)
2576    overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2577                               parts - i, true);
2578
2579  return overflow;
2580}
2581
2582/* DST = LHS * RHS, where DST has width the sum of the widths of the
2583   operands.  No overflow occurs.  DST must be disjoint from both
2584   operands.  Returns the number of parts required to hold the
2585   result.  */
2586unsigned int
2587APInt::tcFullMultiply(integerPart *dst, const integerPart *lhs,
2588                      const integerPart *rhs, unsigned int lhsParts,
2589                      unsigned int rhsParts)
2590{
2591  /* Put the narrower number on the LHS for less loops below.  */
2592  if (lhsParts > rhsParts) {
2593    return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2594  } else {
2595    unsigned int n;
2596
2597    assert(dst != lhs && dst != rhs);
2598
2599    tcSet(dst, 0, rhsParts);
2600
2601    for(n = 0; n < lhsParts; n++)
2602      tcMultiplyPart(&dst[n], rhs, lhs[n], 0, rhsParts, rhsParts + 1, true);
2603
2604    n = lhsParts + rhsParts;
2605
2606    return n - (dst[n - 1] == 0);
2607  }
2608}
2609
2610/* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2611   Otherwise set LHS to LHS / RHS with the fractional part discarded,
2612   set REMAINDER to the remainder, return zero.  i.e.
2613
2614   OLD_LHS = RHS * LHS + REMAINDER
2615
2616   SCRATCH is a bignum of the same size as the operands and result for
2617   use by the routine; its contents need not be initialized and are
2618   destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2619*/
2620int
2621APInt::tcDivide(integerPart *lhs, const integerPart *rhs,
2622                integerPart *remainder, integerPart *srhs,
2623                unsigned int parts)
2624{
2625  unsigned int n, shiftCount;
2626  integerPart mask;
2627
2628  assert(lhs != remainder && lhs != srhs && remainder != srhs);
2629
2630  shiftCount = tcMSB(rhs, parts) + 1;
2631  if (shiftCount == 0)
2632    return true;
2633
2634  shiftCount = parts * integerPartWidth - shiftCount;
2635  n = shiftCount / integerPartWidth;
2636  mask = (integerPart) 1 << (shiftCount % integerPartWidth);
2637
2638  tcAssign(srhs, rhs, parts);
2639  tcShiftLeft(srhs, parts, shiftCount);
2640  tcAssign(remainder, lhs, parts);
2641  tcSet(lhs, 0, parts);
2642
2643  /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2644     the total.  */
2645  for(;;) {
2646      int compare;
2647
2648      compare = tcCompare(remainder, srhs, parts);
2649      if (compare >= 0) {
2650        tcSubtract(remainder, srhs, 0, parts);
2651        lhs[n] |= mask;
2652      }
2653
2654      if (shiftCount == 0)
2655        break;
2656      shiftCount--;
2657      tcShiftRight(srhs, parts, 1);
2658      if ((mask >>= 1) == 0)
2659        mask = (integerPart) 1 << (integerPartWidth - 1), n--;
2660  }
2661
2662  return false;
2663}
2664
2665/* Shift a bignum left COUNT bits in-place.  Shifted in bits are zero.
2666   There are no restrictions on COUNT.  */
2667void
2668APInt::tcShiftLeft(integerPart *dst, unsigned int parts, unsigned int count)
2669{
2670  if (count) {
2671    unsigned int jump, shift;
2672
2673    /* Jump is the inter-part jump; shift is is intra-part shift.  */
2674    jump = count / integerPartWidth;
2675    shift = count % integerPartWidth;
2676
2677    while (parts > jump) {
2678      integerPart part;
2679
2680      parts--;
2681
2682      /* dst[i] comes from the two parts src[i - jump] and, if we have
2683         an intra-part shift, src[i - jump - 1].  */
2684      part = dst[parts - jump];
2685      if (shift) {
2686        part <<= shift;
2687        if (parts >= jump + 1)
2688          part |= dst[parts - jump - 1] >> (integerPartWidth - shift);
2689      }
2690
2691      dst[parts] = part;
2692    }
2693
2694    while (parts > 0)
2695      dst[--parts] = 0;
2696  }
2697}
2698
2699/* Shift a bignum right COUNT bits in-place.  Shifted in bits are
2700   zero.  There are no restrictions on COUNT.  */
2701void
2702APInt::tcShiftRight(integerPart *dst, unsigned int parts, unsigned int count)
2703{
2704  if (count) {
2705    unsigned int i, jump, shift;
2706
2707    /* Jump is the inter-part jump; shift is is intra-part shift.  */
2708    jump = count / integerPartWidth;
2709    shift = count % integerPartWidth;
2710
2711    /* Perform the shift.  This leaves the most significant COUNT bits
2712       of the result at zero.  */
2713    for(i = 0; i < parts; i++) {
2714      integerPart part;
2715
2716      if (i + jump >= parts) {
2717        part = 0;
2718      } else {
2719        part = dst[i + jump];
2720        if (shift) {
2721          part >>= shift;
2722          if (i + jump + 1 < parts)
2723            part |= dst[i + jump + 1] << (integerPartWidth - shift);
2724        }
2725      }
2726
2727      dst[i] = part;
2728    }
2729  }
2730}
2731
2732/* Bitwise and of two bignums.  */
2733void
2734APInt::tcAnd(integerPart *dst, const integerPart *rhs, unsigned int parts)
2735{
2736  unsigned int i;
2737
2738  for(i = 0; i < parts; i++)
2739    dst[i] &= rhs[i];
2740}
2741
2742/* Bitwise inclusive or of two bignums.  */
2743void
2744APInt::tcOr(integerPart *dst, const integerPart *rhs, unsigned int parts)
2745{
2746  unsigned int i;
2747
2748  for(i = 0; i < parts; i++)
2749    dst[i] |= rhs[i];
2750}
2751
2752/* Bitwise exclusive or of two bignums.  */
2753void
2754APInt::tcXor(integerPart *dst, const integerPart *rhs, unsigned int parts)
2755{
2756  unsigned int i;
2757
2758  for(i = 0; i < parts; i++)
2759    dst[i] ^= rhs[i];
2760}
2761
2762/* Complement a bignum in-place.  */
2763void
2764APInt::tcComplement(integerPart *dst, unsigned int parts)
2765{
2766  unsigned int i;
2767
2768  for(i = 0; i < parts; i++)
2769    dst[i] = ~dst[i];
2770}
2771
2772/* Comparison (unsigned) of two bignums.  */
2773int
2774APInt::tcCompare(const integerPart *lhs, const integerPart *rhs,
2775                 unsigned int parts)
2776{
2777  while (parts) {
2778      parts--;
2779      if (lhs[parts] == rhs[parts])
2780        continue;
2781
2782      if (lhs[parts] > rhs[parts])
2783        return 1;
2784      else
2785        return -1;
2786    }
2787
2788  return 0;
2789}
2790
2791/* Increment a bignum in-place, return the carry flag.  */
2792integerPart
2793APInt::tcIncrement(integerPart *dst, unsigned int parts)
2794{
2795  unsigned int i;
2796
2797  for(i = 0; i < parts; i++)
2798    if (++dst[i] != 0)
2799      break;
2800
2801  return i == parts;
2802}
2803
2804/* Set the least significant BITS bits of a bignum, clear the
2805   rest.  */
2806void
2807APInt::tcSetLeastSignificantBits(integerPart *dst, unsigned int parts,
2808                                 unsigned int bits)
2809{
2810  unsigned int i;
2811
2812  i = 0;
2813  while (bits > integerPartWidth) {
2814    dst[i++] = ~(integerPart) 0;
2815    bits -= integerPartWidth;
2816  }
2817
2818  if (bits)
2819    dst[i++] = ~(integerPart) 0 >> (integerPartWidth - bits);
2820
2821  while (i < parts)
2822    dst[i++] = 0;
2823}
2824