APInt.cpp revision 3ba292dbc2acee2d1052fb7ffe332e2164147b47
1//===-- APInt.cpp - Implement APInt class ---------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements a class to represent arbitrary precision integer
11// constant values and provide a variety of arithmetic operations on them.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "apint"
16#include "llvm/ADT/APInt.h"
17#include "llvm/ADT/StringRef.h"
18#include "llvm/ADT/FoldingSet.h"
19#include "llvm/ADT/SmallString.h"
20#include "llvm/Support/Debug.h"
21#include "llvm/Support/ErrorHandling.h"
22#include "llvm/Support/MathExtras.h"
23#include "llvm/Support/raw_ostream.h"
24#include <cmath>
25#include <limits>
26#include <cstring>
27#include <cstdlib>
28using namespace llvm;
29
30/// A utility function for allocating memory, checking for allocation failures,
31/// and ensuring the contents are zeroed.
32inline static uint64_t* getClearedMemory(unsigned numWords) {
33  uint64_t * result = new uint64_t[numWords];
34  assert(result && "APInt memory allocation fails!");
35  memset(result, 0, numWords * sizeof(uint64_t));
36  return result;
37}
38
39/// A utility function for allocating memory and checking for allocation
40/// failure.  The content is not zeroed.
41inline static uint64_t* getMemory(unsigned numWords) {
42  uint64_t * result = new uint64_t[numWords];
43  assert(result && "APInt memory allocation fails!");
44  return result;
45}
46
47/// A utility function that converts a character to a digit.
48inline static unsigned getDigit(char cdigit, uint8_t radix) {
49  unsigned r;
50
51  if (radix == 16) {
52    r = cdigit - '0';
53    if (r <= 9)
54      return r;
55
56    r = cdigit - 'A';
57    if (r <= 5)
58      return r + 10;
59
60    r = cdigit - 'a';
61    if (r <= 5)
62      return r + 10;
63  }
64
65  r = cdigit - '0';
66  if (r < radix)
67    return r;
68
69  return -1U;
70}
71
72
73void APInt::initSlowCase(unsigned numBits, uint64_t val, bool isSigned) {
74  pVal = getClearedMemory(getNumWords());
75  pVal[0] = val;
76  if (isSigned && int64_t(val) < 0)
77    for (unsigned i = 1; i < getNumWords(); ++i)
78      pVal[i] = -1ULL;
79}
80
81void APInt::initSlowCase(const APInt& that) {
82  pVal = getMemory(getNumWords());
83  memcpy(pVal, that.pVal, getNumWords() * APINT_WORD_SIZE);
84}
85
86void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
87  assert(BitWidth && "Bitwidth too small");
88  assert(bigVal.data() && "Null pointer detected!");
89  if (isSingleWord())
90    VAL = bigVal[0];
91  else {
92    // Get memory, cleared to 0
93    pVal = getClearedMemory(getNumWords());
94    // Calculate the number of words to copy
95    unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
96    // Copy the words from bigVal to pVal
97    memcpy(pVal, bigVal.data(), words * APINT_WORD_SIZE);
98  }
99  // Make sure unused high bits are cleared
100  clearUnusedBits();
101}
102
103APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
104  : BitWidth(numBits), VAL(0) {
105  initFromArray(bigVal);
106}
107
108APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
109  : BitWidth(numBits), VAL(0) {
110  initFromArray(makeArrayRef(bigVal, numWords));
111}
112
113APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
114  : BitWidth(numbits), VAL(0) {
115  assert(BitWidth && "Bitwidth too small");
116  fromString(numbits, Str, radix);
117}
118
119APInt& APInt::AssignSlowCase(const APInt& RHS) {
120  // Don't do anything for X = X
121  if (this == &RHS)
122    return *this;
123
124  if (BitWidth == RHS.getBitWidth()) {
125    // assume same bit-width single-word case is already handled
126    assert(!isSingleWord());
127    memcpy(pVal, RHS.pVal, getNumWords() * APINT_WORD_SIZE);
128    return *this;
129  }
130
131  if (isSingleWord()) {
132    // assume case where both are single words is already handled
133    assert(!RHS.isSingleWord());
134    VAL = 0;
135    pVal = getMemory(RHS.getNumWords());
136    memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
137  } else if (getNumWords() == RHS.getNumWords())
138    memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
139  else if (RHS.isSingleWord()) {
140    delete [] pVal;
141    VAL = RHS.VAL;
142  } else {
143    delete [] pVal;
144    pVal = getMemory(RHS.getNumWords());
145    memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
146  }
147  BitWidth = RHS.BitWidth;
148  return clearUnusedBits();
149}
150
151APInt& APInt::operator=(uint64_t RHS) {
152  if (isSingleWord())
153    VAL = RHS;
154  else {
155    pVal[0] = RHS;
156    memset(pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
157  }
158  return clearUnusedBits();
159}
160
161/// Profile - This method 'profiles' an APInt for use with FoldingSet.
162void APInt::Profile(FoldingSetNodeID& ID) const {
163  ID.AddInteger(BitWidth);
164
165  if (isSingleWord()) {
166    ID.AddInteger(VAL);
167    return;
168  }
169
170  unsigned NumWords = getNumWords();
171  for (unsigned i = 0; i < NumWords; ++i)
172    ID.AddInteger(pVal[i]);
173}
174
175/// add_1 - This function adds a single "digit" integer, y, to the multiple
176/// "digit" integer array,  x[]. x[] is modified to reflect the addition and
177/// 1 is returned if there is a carry out, otherwise 0 is returned.
178/// @returns the carry of the addition.
179static bool add_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
180  for (unsigned i = 0; i < len; ++i) {
181    dest[i] = y + x[i];
182    if (dest[i] < y)
183      y = 1; // Carry one to next digit.
184    else {
185      y = 0; // No need to carry so exit early
186      break;
187    }
188  }
189  return y;
190}
191
192/// @brief Prefix increment operator. Increments the APInt by one.
193APInt& APInt::operator++() {
194  if (isSingleWord())
195    ++VAL;
196  else
197    add_1(pVal, pVal, getNumWords(), 1);
198  return clearUnusedBits();
199}
200
201/// sub_1 - This function subtracts a single "digit" (64-bit word), y, from
202/// the multi-digit integer array, x[], propagating the borrowed 1 value until
203/// no further borrowing is neeeded or it runs out of "digits" in x.  The result
204/// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted.
205/// In other words, if y > x then this function returns 1, otherwise 0.
206/// @returns the borrow out of the subtraction
207static bool sub_1(uint64_t x[], unsigned len, uint64_t y) {
208  for (unsigned i = 0; i < len; ++i) {
209    uint64_t X = x[i];
210    x[i] -= y;
211    if (y > X)
212      y = 1;  // We have to "borrow 1" from next "digit"
213    else {
214      y = 0;  // No need to borrow
215      break;  // Remaining digits are unchanged so exit early
216    }
217  }
218  return bool(y);
219}
220
221/// @brief Prefix decrement operator. Decrements the APInt by one.
222APInt& APInt::operator--() {
223  if (isSingleWord())
224    --VAL;
225  else
226    sub_1(pVal, getNumWords(), 1);
227  return clearUnusedBits();
228}
229
230/// add - This function adds the integer array x to the integer array Y and
231/// places the result in dest.
232/// @returns the carry out from the addition
233/// @brief General addition of 64-bit integer arrays
234static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y,
235                unsigned len) {
236  bool carry = false;
237  for (unsigned i = 0; i< len; ++i) {
238    uint64_t limit = std::min(x[i],y[i]); // must come first in case dest == x
239    dest[i] = x[i] + y[i] + carry;
240    carry = dest[i] < limit || (carry && dest[i] == limit);
241  }
242  return carry;
243}
244
245/// Adds the RHS APint to this APInt.
246/// @returns this, after addition of RHS.
247/// @brief Addition assignment operator.
248APInt& APInt::operator+=(const APInt& RHS) {
249  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
250  if (isSingleWord())
251    VAL += RHS.VAL;
252  else {
253    add(pVal, pVal, RHS.pVal, getNumWords());
254  }
255  return clearUnusedBits();
256}
257
258/// Subtracts the integer array y from the integer array x
259/// @returns returns the borrow out.
260/// @brief Generalized subtraction of 64-bit integer arrays.
261static bool sub(uint64_t *dest, const uint64_t *x, const uint64_t *y,
262                unsigned len) {
263  bool borrow = false;
264  for (unsigned i = 0; i < len; ++i) {
265    uint64_t x_tmp = borrow ? x[i] - 1 : x[i];
266    borrow = y[i] > x_tmp || (borrow && x[i] == 0);
267    dest[i] = x_tmp - y[i];
268  }
269  return borrow;
270}
271
272/// Subtracts the RHS APInt from this APInt
273/// @returns this, after subtraction
274/// @brief Subtraction assignment operator.
275APInt& APInt::operator-=(const APInt& RHS) {
276  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
277  if (isSingleWord())
278    VAL -= RHS.VAL;
279  else
280    sub(pVal, pVal, RHS.pVal, getNumWords());
281  return clearUnusedBits();
282}
283
284/// Multiplies an integer array, x, by a uint64_t integer and places the result
285/// into dest.
286/// @returns the carry out of the multiplication.
287/// @brief Multiply a multi-digit APInt by a single digit (64-bit) integer.
288static uint64_t mul_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
289  // Split y into high 32-bit part (hy)  and low 32-bit part (ly)
290  uint64_t ly = y & 0xffffffffULL, hy = y >> 32;
291  uint64_t carry = 0;
292
293  // For each digit of x.
294  for (unsigned i = 0; i < len; ++i) {
295    // Split x into high and low words
296    uint64_t lx = x[i] & 0xffffffffULL;
297    uint64_t hx = x[i] >> 32;
298    // hasCarry - A flag to indicate if there is a carry to the next digit.
299    // hasCarry == 0, no carry
300    // hasCarry == 1, has carry
301    // hasCarry == 2, no carry and the calculation result == 0.
302    uint8_t hasCarry = 0;
303    dest[i] = carry + lx * ly;
304    // Determine if the add above introduces carry.
305    hasCarry = (dest[i] < carry) ? 1 : 0;
306    carry = hx * ly + (dest[i] >> 32) + (hasCarry ? (1ULL << 32) : 0);
307    // The upper limit of carry can be (2^32 - 1)(2^32 - 1) +
308    // (2^32 - 1) + 2^32 = 2^64.
309    hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
310
311    carry += (lx * hy) & 0xffffffffULL;
312    dest[i] = (carry << 32) | (dest[i] & 0xffffffffULL);
313    carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0) +
314            (carry >> 32) + ((lx * hy) >> 32) + hx * hy;
315  }
316  return carry;
317}
318
319/// Multiplies integer array x by integer array y and stores the result into
320/// the integer array dest. Note that dest's size must be >= xlen + ylen.
321/// @brief Generalized multiplicate of integer arrays.
322static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[],
323                unsigned ylen) {
324  dest[xlen] = mul_1(dest, x, xlen, y[0]);
325  for (unsigned i = 1; i < ylen; ++i) {
326    uint64_t ly = y[i] & 0xffffffffULL, hy = y[i] >> 32;
327    uint64_t carry = 0, lx = 0, hx = 0;
328    for (unsigned j = 0; j < xlen; ++j) {
329      lx = x[j] & 0xffffffffULL;
330      hx = x[j] >> 32;
331      // hasCarry - A flag to indicate if has carry.
332      // hasCarry == 0, no carry
333      // hasCarry == 1, has carry
334      // hasCarry == 2, no carry and the calculation result == 0.
335      uint8_t hasCarry = 0;
336      uint64_t resul = carry + lx * ly;
337      hasCarry = (resul < carry) ? 1 : 0;
338      carry = (hasCarry ? (1ULL << 32) : 0) + hx * ly + (resul >> 32);
339      hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
340
341      carry += (lx * hy) & 0xffffffffULL;
342      resul = (carry << 32) | (resul & 0xffffffffULL);
343      dest[i+j] += resul;
344      carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0)+
345              (carry >> 32) + (dest[i+j] < resul ? 1 : 0) +
346              ((lx * hy) >> 32) + hx * hy;
347    }
348    dest[i+xlen] = carry;
349  }
350}
351
352APInt& APInt::operator*=(const APInt& RHS) {
353  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
354  if (isSingleWord()) {
355    VAL *= RHS.VAL;
356    clearUnusedBits();
357    return *this;
358  }
359
360  // Get some bit facts about LHS and check for zero
361  unsigned lhsBits = getActiveBits();
362  unsigned lhsWords = !lhsBits ? 0 : whichWord(lhsBits - 1) + 1;
363  if (!lhsWords)
364    // 0 * X ===> 0
365    return *this;
366
367  // Get some bit facts about RHS and check for zero
368  unsigned rhsBits = RHS.getActiveBits();
369  unsigned rhsWords = !rhsBits ? 0 : whichWord(rhsBits - 1) + 1;
370  if (!rhsWords) {
371    // X * 0 ===> 0
372    clearAllBits();
373    return *this;
374  }
375
376  // Allocate space for the result
377  unsigned destWords = rhsWords + lhsWords;
378  uint64_t *dest = getMemory(destWords);
379
380  // Perform the long multiply
381  mul(dest, pVal, lhsWords, RHS.pVal, rhsWords);
382
383  // Copy result back into *this
384  clearAllBits();
385  unsigned wordsToCopy = destWords >= getNumWords() ? getNumWords() : destWords;
386  memcpy(pVal, dest, wordsToCopy * APINT_WORD_SIZE);
387
388  // delete dest array and return
389  delete[] dest;
390  return *this;
391}
392
393APInt& APInt::operator&=(const APInt& RHS) {
394  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
395  if (isSingleWord()) {
396    VAL &= RHS.VAL;
397    return *this;
398  }
399  unsigned numWords = getNumWords();
400  for (unsigned i = 0; i < numWords; ++i)
401    pVal[i] &= RHS.pVal[i];
402  return *this;
403}
404
405APInt& APInt::operator|=(const APInt& RHS) {
406  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
407  if (isSingleWord()) {
408    VAL |= RHS.VAL;
409    return *this;
410  }
411  unsigned numWords = getNumWords();
412  for (unsigned i = 0; i < numWords; ++i)
413    pVal[i] |= RHS.pVal[i];
414  return *this;
415}
416
417APInt& APInt::operator^=(const APInt& RHS) {
418  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
419  if (isSingleWord()) {
420    VAL ^= RHS.VAL;
421    this->clearUnusedBits();
422    return *this;
423  }
424  unsigned numWords = getNumWords();
425  for (unsigned i = 0; i < numWords; ++i)
426    pVal[i] ^= RHS.pVal[i];
427  return clearUnusedBits();
428}
429
430APInt APInt::AndSlowCase(const APInt& RHS) const {
431  unsigned numWords = getNumWords();
432  uint64_t* val = getMemory(numWords);
433  for (unsigned i = 0; i < numWords; ++i)
434    val[i] = pVal[i] & RHS.pVal[i];
435  return APInt(val, getBitWidth());
436}
437
438APInt APInt::OrSlowCase(const APInt& RHS) const {
439  unsigned numWords = getNumWords();
440  uint64_t *val = getMemory(numWords);
441  for (unsigned i = 0; i < numWords; ++i)
442    val[i] = pVal[i] | RHS.pVal[i];
443  return APInt(val, getBitWidth());
444}
445
446APInt APInt::XorSlowCase(const APInt& RHS) const {
447  unsigned numWords = getNumWords();
448  uint64_t *val = getMemory(numWords);
449  for (unsigned i = 0; i < numWords; ++i)
450    val[i] = pVal[i] ^ RHS.pVal[i];
451
452  // 0^0==1 so clear the high bits in case they got set.
453  return APInt(val, getBitWidth()).clearUnusedBits();
454}
455
456bool APInt::operator !() const {
457  if (isSingleWord())
458    return !VAL;
459
460  for (unsigned i = 0; i < getNumWords(); ++i)
461    if (pVal[i])
462      return false;
463  return true;
464}
465
466APInt APInt::operator*(const APInt& RHS) const {
467  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
468  if (isSingleWord())
469    return APInt(BitWidth, VAL * RHS.VAL);
470  APInt Result(*this);
471  Result *= RHS;
472  return Result.clearUnusedBits();
473}
474
475APInt APInt::operator+(const APInt& RHS) const {
476  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
477  if (isSingleWord())
478    return APInt(BitWidth, VAL + RHS.VAL);
479  APInt Result(BitWidth, 0);
480  add(Result.pVal, this->pVal, RHS.pVal, getNumWords());
481  return Result.clearUnusedBits();
482}
483
484APInt APInt::operator-(const APInt& RHS) const {
485  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
486  if (isSingleWord())
487    return APInt(BitWidth, VAL - RHS.VAL);
488  APInt Result(BitWidth, 0);
489  sub(Result.pVal, this->pVal, RHS.pVal, getNumWords());
490  return Result.clearUnusedBits();
491}
492
493bool APInt::operator[](unsigned bitPosition) const {
494  assert(bitPosition < getBitWidth() && "Bit position out of bounds!");
495  return (maskBit(bitPosition) &
496          (isSingleWord() ?  VAL : pVal[whichWord(bitPosition)])) != 0;
497}
498
499bool APInt::EqualSlowCase(const APInt& RHS) const {
500  // Get some facts about the number of bits used in the two operands.
501  unsigned n1 = getActiveBits();
502  unsigned n2 = RHS.getActiveBits();
503
504  // If the number of bits isn't the same, they aren't equal
505  if (n1 != n2)
506    return false;
507
508  // If the number of bits fits in a word, we only need to compare the low word.
509  if (n1 <= APINT_BITS_PER_WORD)
510    return pVal[0] == RHS.pVal[0];
511
512  // Otherwise, compare everything
513  for (int i = whichWord(n1 - 1); i >= 0; --i)
514    if (pVal[i] != RHS.pVal[i])
515      return false;
516  return true;
517}
518
519bool APInt::EqualSlowCase(uint64_t Val) const {
520  unsigned n = getActiveBits();
521  if (n <= APINT_BITS_PER_WORD)
522    return pVal[0] == Val;
523  else
524    return false;
525}
526
527bool APInt::ult(const APInt& RHS) const {
528  assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
529  if (isSingleWord())
530    return VAL < RHS.VAL;
531
532  // Get active bit length of both operands
533  unsigned n1 = getActiveBits();
534  unsigned n2 = RHS.getActiveBits();
535
536  // If magnitude of LHS is less than RHS, return true.
537  if (n1 < n2)
538    return true;
539
540  // If magnitude of RHS is greather than LHS, return false.
541  if (n2 < n1)
542    return false;
543
544  // If they bot fit in a word, just compare the low order word
545  if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD)
546    return pVal[0] < RHS.pVal[0];
547
548  // Otherwise, compare all words
549  unsigned topWord = whichWord(std::max(n1,n2)-1);
550  for (int i = topWord; i >= 0; --i) {
551    if (pVal[i] > RHS.pVal[i])
552      return false;
553    if (pVal[i] < RHS.pVal[i])
554      return true;
555  }
556  return false;
557}
558
559bool APInt::slt(const APInt& RHS) const {
560  assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
561  if (isSingleWord()) {
562    int64_t lhsSext = (int64_t(VAL) << (64-BitWidth)) >> (64-BitWidth);
563    int64_t rhsSext = (int64_t(RHS.VAL) << (64-BitWidth)) >> (64-BitWidth);
564    return lhsSext < rhsSext;
565  }
566
567  APInt lhs(*this);
568  APInt rhs(RHS);
569  bool lhsNeg = isNegative();
570  bool rhsNeg = rhs.isNegative();
571  if (lhsNeg) {
572    // Sign bit is set so perform two's complement to make it positive
573    lhs.flipAllBits();
574    lhs++;
575  }
576  if (rhsNeg) {
577    // Sign bit is set so perform two's complement to make it positive
578    rhs.flipAllBits();
579    rhs++;
580  }
581
582  // Now we have unsigned values to compare so do the comparison if necessary
583  // based on the negativeness of the values.
584  if (lhsNeg)
585    if (rhsNeg)
586      return lhs.ugt(rhs);
587    else
588      return true;
589  else if (rhsNeg)
590    return false;
591  else
592    return lhs.ult(rhs);
593}
594
595void APInt::setBit(unsigned bitPosition) {
596  if (isSingleWord())
597    VAL |= maskBit(bitPosition);
598  else
599    pVal[whichWord(bitPosition)] |= maskBit(bitPosition);
600}
601
602/// Set the given bit to 0 whose position is given as "bitPosition".
603/// @brief Set a given bit to 0.
604void APInt::clearBit(unsigned bitPosition) {
605  if (isSingleWord())
606    VAL &= ~maskBit(bitPosition);
607  else
608    pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition);
609}
610
611/// @brief Toggle every bit to its opposite value.
612
613/// Toggle a given bit to its opposite value whose position is given
614/// as "bitPosition".
615/// @brief Toggles a given bit to its opposite value.
616void APInt::flipBit(unsigned bitPosition) {
617  assert(bitPosition < BitWidth && "Out of the bit-width range!");
618  if ((*this)[bitPosition]) clearBit(bitPosition);
619  else setBit(bitPosition);
620}
621
622unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
623  assert(!str.empty() && "Invalid string length");
624  assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
625         "Radix should be 2, 8, 10, or 16!");
626
627  size_t slen = str.size();
628
629  // Each computation below needs to know if it's negative.
630  StringRef::iterator p = str.begin();
631  unsigned isNegative = *p == '-';
632  if (*p == '-' || *p == '+') {
633    p++;
634    slen--;
635    assert(slen && "String is only a sign, needs a value.");
636  }
637
638  // For radixes of power-of-two values, the bits required is accurately and
639  // easily computed
640  if (radix == 2)
641    return slen + isNegative;
642  if (radix == 8)
643    return slen * 3 + isNegative;
644  if (radix == 16)
645    return slen * 4 + isNegative;
646
647  // This is grossly inefficient but accurate. We could probably do something
648  // with a computation of roughly slen*64/20 and then adjust by the value of
649  // the first few digits. But, I'm not sure how accurate that could be.
650
651  // Compute a sufficient number of bits that is always large enough but might
652  // be too large. This avoids the assertion in the constructor. This
653  // calculation doesn't work appropriately for the numbers 0-9, so just use 4
654  // bits in that case.
655  unsigned sufficient = slen == 1 ? 4 : slen * 64/18;
656
657  // Convert to the actual binary value.
658  APInt tmp(sufficient, StringRef(p, slen), radix);
659
660  // Compute how many bits are required. If the log is infinite, assume we need
661  // just bit.
662  unsigned log = tmp.logBase2();
663  if (log == (unsigned)-1) {
664    return isNegative + 1;
665  } else {
666    return isNegative + log + 1;
667  }
668}
669
670// From http://www.burtleburtle.net, byBob Jenkins.
671// When targeting x86, both GCC and LLVM seem to recognize this as a
672// rotate instruction.
673#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
674
675// From http://www.burtleburtle.net, by Bob Jenkins.
676#define mix(a,b,c) \
677  { \
678    a -= c;  a ^= rot(c, 4);  c += b; \
679    b -= a;  b ^= rot(a, 6);  a += c; \
680    c -= b;  c ^= rot(b, 8);  b += a; \
681    a -= c;  a ^= rot(c,16);  c += b; \
682    b -= a;  b ^= rot(a,19);  a += c; \
683    c -= b;  c ^= rot(b, 4);  b += a; \
684  }
685
686// From http://www.burtleburtle.net, by Bob Jenkins.
687#define final(a,b,c) \
688  { \
689    c ^= b; c -= rot(b,14); \
690    a ^= c; a -= rot(c,11); \
691    b ^= a; b -= rot(a,25); \
692    c ^= b; c -= rot(b,16); \
693    a ^= c; a -= rot(c,4);  \
694    b ^= a; b -= rot(a,14); \
695    c ^= b; c -= rot(b,24); \
696  }
697
698// hashword() was adapted from http://www.burtleburtle.net, by Bob
699// Jenkins.  k is a pointer to an array of uint32_t values; length is
700// the length of the key, in 32-bit chunks.  This version only handles
701// keys that are a multiple of 32 bits in size.
702static inline uint32_t hashword(const uint64_t *k64, size_t length)
703{
704  const uint32_t *k = reinterpret_cast<const uint32_t *>(k64);
705  uint32_t a,b,c;
706
707  /* Set up the internal state */
708  a = b = c = 0xdeadbeef + (((uint32_t)length)<<2);
709
710  /*------------------------------------------------- handle most of the key */
711  while (length > 3) {
712    a += k[0];
713    b += k[1];
714    c += k[2];
715    mix(a,b,c);
716    length -= 3;
717    k += 3;
718  }
719
720  /*------------------------------------------- handle the last 3 uint32_t's */
721  switch (length) {                  /* all the case statements fall through */
722  case 3 : c+=k[2];
723  case 2 : b+=k[1];
724  case 1 : a+=k[0];
725    final(a,b,c);
726    case 0:     /* case 0: nothing left to add */
727      break;
728    }
729  /*------------------------------------------------------ report the result */
730  return c;
731}
732
733// hashword8() was adapted from http://www.burtleburtle.net, by Bob
734// Jenkins.  This computes a 32-bit hash from one 64-bit word.  When
735// targeting x86 (32 or 64 bit), both LLVM and GCC compile this
736// function into about 35 instructions when inlined.
737static inline uint32_t hashword8(const uint64_t k64)
738{
739  uint32_t a,b,c;
740  a = b = c = 0xdeadbeef + 4;
741  b += k64 >> 32;
742  a += k64 & 0xffffffff;
743  final(a,b,c);
744  return c;
745}
746#undef final
747#undef mix
748#undef rot
749
750uint64_t APInt::getHashValue() const {
751  uint64_t hash;
752  if (isSingleWord())
753    hash = hashword8(VAL);
754  else
755    hash = hashword(pVal, getNumWords()*2);
756  return hash;
757}
758
759/// HiBits - This function returns the high "numBits" bits of this APInt.
760APInt APInt::getHiBits(unsigned numBits) const {
761  return APIntOps::lshr(*this, BitWidth - numBits);
762}
763
764/// LoBits - This function returns the low "numBits" bits of this APInt.
765APInt APInt::getLoBits(unsigned numBits) const {
766  return APIntOps::lshr(APIntOps::shl(*this, BitWidth - numBits),
767                        BitWidth - numBits);
768}
769
770unsigned APInt::countLeadingZerosSlowCase() const {
771  // Treat the most significand word differently because it might have
772  // meaningless bits set beyond the precision.
773  unsigned BitsInMSW = BitWidth % APINT_BITS_PER_WORD;
774  integerPart MSWMask;
775  if (BitsInMSW) MSWMask = (integerPart(1) << BitsInMSW) - 1;
776  else {
777    MSWMask = ~integerPart(0);
778    BitsInMSW = APINT_BITS_PER_WORD;
779  }
780
781  unsigned i = getNumWords();
782  integerPart MSW = pVal[i-1] & MSWMask;
783  if (MSW)
784    return CountLeadingZeros_64(MSW) - (APINT_BITS_PER_WORD - BitsInMSW);
785
786  unsigned Count = BitsInMSW;
787  for (--i; i > 0u; --i) {
788    if (pVal[i-1] == 0)
789      Count += APINT_BITS_PER_WORD;
790    else {
791      Count += CountLeadingZeros_64(pVal[i-1]);
792      break;
793    }
794  }
795  return Count;
796}
797
798static unsigned countLeadingOnes_64(uint64_t V, unsigned skip) {
799  unsigned Count = 0;
800  if (skip)
801    V <<= skip;
802  while (V && (V & (1ULL << 63))) {
803    Count++;
804    V <<= 1;
805  }
806  return Count;
807}
808
809unsigned APInt::countLeadingOnes() const {
810  if (isSingleWord())
811    return countLeadingOnes_64(VAL, APINT_BITS_PER_WORD - BitWidth);
812
813  unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
814  unsigned shift;
815  if (!highWordBits) {
816    highWordBits = APINT_BITS_PER_WORD;
817    shift = 0;
818  } else {
819    shift = APINT_BITS_PER_WORD - highWordBits;
820  }
821  int i = getNumWords() - 1;
822  unsigned Count = countLeadingOnes_64(pVal[i], shift);
823  if (Count == highWordBits) {
824    for (i--; i >= 0; --i) {
825      if (pVal[i] == -1ULL)
826        Count += APINT_BITS_PER_WORD;
827      else {
828        Count += countLeadingOnes_64(pVal[i], 0);
829        break;
830      }
831    }
832  }
833  return Count;
834}
835
836unsigned APInt::countTrailingZeros() const {
837  if (isSingleWord())
838    return std::min(unsigned(CountTrailingZeros_64(VAL)), BitWidth);
839  unsigned Count = 0;
840  unsigned i = 0;
841  for (; i < getNumWords() && pVal[i] == 0; ++i)
842    Count += APINT_BITS_PER_WORD;
843  if (i < getNumWords())
844    Count += CountTrailingZeros_64(pVal[i]);
845  return std::min(Count, BitWidth);
846}
847
848unsigned APInt::countTrailingOnesSlowCase() const {
849  unsigned Count = 0;
850  unsigned i = 0;
851  for (; i < getNumWords() && pVal[i] == -1ULL; ++i)
852    Count += APINT_BITS_PER_WORD;
853  if (i < getNumWords())
854    Count += CountTrailingOnes_64(pVal[i]);
855  return std::min(Count, BitWidth);
856}
857
858unsigned APInt::countPopulationSlowCase() const {
859  unsigned Count = 0;
860  for (unsigned i = 0; i < getNumWords(); ++i)
861    Count += CountPopulation_64(pVal[i]);
862  return Count;
863}
864
865APInt APInt::byteSwap() const {
866  assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
867  if (BitWidth == 16)
868    return APInt(BitWidth, ByteSwap_16(uint16_t(VAL)));
869  else if (BitWidth == 32)
870    return APInt(BitWidth, ByteSwap_32(unsigned(VAL)));
871  else if (BitWidth == 48) {
872    unsigned Tmp1 = unsigned(VAL >> 16);
873    Tmp1 = ByteSwap_32(Tmp1);
874    uint16_t Tmp2 = uint16_t(VAL);
875    Tmp2 = ByteSwap_16(Tmp2);
876    return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
877  } else if (BitWidth == 64)
878    return APInt(BitWidth, ByteSwap_64(VAL));
879  else {
880    APInt Result(BitWidth, 0);
881    char *pByte = (char*)Result.pVal;
882    for (unsigned i = 0; i < BitWidth / APINT_WORD_SIZE / 2; ++i) {
883      char Tmp = pByte[i];
884      pByte[i] = pByte[BitWidth / APINT_WORD_SIZE - 1 - i];
885      pByte[BitWidth / APINT_WORD_SIZE - i - 1] = Tmp;
886    }
887    return Result;
888  }
889}
890
891APInt llvm::APIntOps::GreatestCommonDivisor(const APInt& API1,
892                                            const APInt& API2) {
893  APInt A = API1, B = API2;
894  while (!!B) {
895    APInt T = B;
896    B = APIntOps::urem(A, B);
897    A = T;
898  }
899  return A;
900}
901
902APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
903  union {
904    double D;
905    uint64_t I;
906  } T;
907  T.D = Double;
908
909  // Get the sign bit from the highest order bit
910  bool isNeg = T.I >> 63;
911
912  // Get the 11-bit exponent and adjust for the 1023 bit bias
913  int64_t exp = ((T.I >> 52) & 0x7ff) - 1023;
914
915  // If the exponent is negative, the value is < 0 so just return 0.
916  if (exp < 0)
917    return APInt(width, 0u);
918
919  // Extract the mantissa by clearing the top 12 bits (sign + exponent).
920  uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52;
921
922  // If the exponent doesn't shift all bits out of the mantissa
923  if (exp < 52)
924    return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
925                    APInt(width, mantissa >> (52 - exp));
926
927  // If the client didn't provide enough bits for us to shift the mantissa into
928  // then the result is undefined, just return 0
929  if (width <= exp - 52)
930    return APInt(width, 0);
931
932  // Otherwise, we have to shift the mantissa bits up to the right location
933  APInt Tmp(width, mantissa);
934  Tmp = Tmp.shl((unsigned)exp - 52);
935  return isNeg ? -Tmp : Tmp;
936}
937
938/// RoundToDouble - This function converts this APInt to a double.
939/// The layout for double is as following (IEEE Standard 754):
940///  --------------------------------------
941/// |  Sign    Exponent    Fraction    Bias |
942/// |-------------------------------------- |
943/// |  1[63]   11[62-52]   52[51-00]   1023 |
944///  --------------------------------------
945double APInt::roundToDouble(bool isSigned) const {
946
947  // Handle the simple case where the value is contained in one uint64_t.
948  // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
949  if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
950    if (isSigned) {
951      int64_t sext = (int64_t(getWord(0)) << (64-BitWidth)) >> (64-BitWidth);
952      return double(sext);
953    } else
954      return double(getWord(0));
955  }
956
957  // Determine if the value is negative.
958  bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
959
960  // Construct the absolute value if we're negative.
961  APInt Tmp(isNeg ? -(*this) : (*this));
962
963  // Figure out how many bits we're using.
964  unsigned n = Tmp.getActiveBits();
965
966  // The exponent (without bias normalization) is just the number of bits
967  // we are using. Note that the sign bit is gone since we constructed the
968  // absolute value.
969  uint64_t exp = n;
970
971  // Return infinity for exponent overflow
972  if (exp > 1023) {
973    if (!isSigned || !isNeg)
974      return std::numeric_limits<double>::infinity();
975    else
976      return -std::numeric_limits<double>::infinity();
977  }
978  exp += 1023; // Increment for 1023 bias
979
980  // Number of bits in mantissa is 52. To obtain the mantissa value, we must
981  // extract the high 52 bits from the correct words in pVal.
982  uint64_t mantissa;
983  unsigned hiWord = whichWord(n-1);
984  if (hiWord == 0) {
985    mantissa = Tmp.pVal[0];
986    if (n > 52)
987      mantissa >>= n - 52; // shift down, we want the top 52 bits.
988  } else {
989    assert(hiWord > 0 && "huh?");
990    uint64_t hibits = Tmp.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
991    uint64_t lobits = Tmp.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
992    mantissa = hibits | lobits;
993  }
994
995  // The leading bit of mantissa is implicit, so get rid of it.
996  uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
997  union {
998    double D;
999    uint64_t I;
1000  } T;
1001  T.I = sign | (exp << 52) | mantissa;
1002  return T.D;
1003}
1004
1005// Truncate to new width.
1006APInt APInt::trunc(unsigned width) const {
1007  assert(width < BitWidth && "Invalid APInt Truncate request");
1008  assert(width && "Can't truncate to 0 bits");
1009
1010  if (width <= APINT_BITS_PER_WORD)
1011    return APInt(width, getRawData()[0]);
1012
1013  APInt Result(getMemory(getNumWords(width)), width);
1014
1015  // Copy full words.
1016  unsigned i;
1017  for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
1018    Result.pVal[i] = pVal[i];
1019
1020  // Truncate and copy any partial word.
1021  unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
1022  if (bits != 0)
1023    Result.pVal[i] = pVal[i] << bits >> bits;
1024
1025  return Result;
1026}
1027
1028// Sign extend to a new width.
1029APInt APInt::sext(unsigned width) const {
1030  assert(width > BitWidth && "Invalid APInt SignExtend request");
1031
1032  if (width <= APINT_BITS_PER_WORD) {
1033    uint64_t val = VAL << (APINT_BITS_PER_WORD - BitWidth);
1034    val = (int64_t)val >> (width - BitWidth);
1035    return APInt(width, val >> (APINT_BITS_PER_WORD - width));
1036  }
1037
1038  APInt Result(getMemory(getNumWords(width)), width);
1039
1040  // Copy full words.
1041  unsigned i;
1042  uint64_t word = 0;
1043  for (i = 0; i != BitWidth / APINT_BITS_PER_WORD; i++) {
1044    word = getRawData()[i];
1045    Result.pVal[i] = word;
1046  }
1047
1048  // Read and sign-extend any partial word.
1049  unsigned bits = (0 - BitWidth) % APINT_BITS_PER_WORD;
1050  if (bits != 0)
1051    word = (int64_t)getRawData()[i] << bits >> bits;
1052  else
1053    word = (int64_t)word >> (APINT_BITS_PER_WORD - 1);
1054
1055  // Write remaining full words.
1056  for (; i != width / APINT_BITS_PER_WORD; i++) {
1057    Result.pVal[i] = word;
1058    word = (int64_t)word >> (APINT_BITS_PER_WORD - 1);
1059  }
1060
1061  // Write any partial word.
1062  bits = (0 - width) % APINT_BITS_PER_WORD;
1063  if (bits != 0)
1064    Result.pVal[i] = word << bits >> bits;
1065
1066  return Result;
1067}
1068
1069//  Zero extend to a new width.
1070APInt APInt::zext(unsigned width) const {
1071  assert(width > BitWidth && "Invalid APInt ZeroExtend request");
1072
1073  if (width <= APINT_BITS_PER_WORD)
1074    return APInt(width, VAL);
1075
1076  APInt Result(getMemory(getNumWords(width)), width);
1077
1078  // Copy words.
1079  unsigned i;
1080  for (i = 0; i != getNumWords(); i++)
1081    Result.pVal[i] = getRawData()[i];
1082
1083  // Zero remaining words.
1084  memset(&Result.pVal[i], 0, (Result.getNumWords() - i) * APINT_WORD_SIZE);
1085
1086  return Result;
1087}
1088
1089APInt APInt::zextOrTrunc(unsigned width) const {
1090  if (BitWidth < width)
1091    return zext(width);
1092  if (BitWidth > width)
1093    return trunc(width);
1094  return *this;
1095}
1096
1097APInt APInt::sextOrTrunc(unsigned width) const {
1098  if (BitWidth < width)
1099    return sext(width);
1100  if (BitWidth > width)
1101    return trunc(width);
1102  return *this;
1103}
1104
1105/// Arithmetic right-shift this APInt by shiftAmt.
1106/// @brief Arithmetic right-shift function.
1107APInt APInt::ashr(const APInt &shiftAmt) const {
1108  return ashr((unsigned)shiftAmt.getLimitedValue(BitWidth));
1109}
1110
1111/// Arithmetic right-shift this APInt by shiftAmt.
1112/// @brief Arithmetic right-shift function.
1113APInt APInt::ashr(unsigned shiftAmt) const {
1114  assert(shiftAmt <= BitWidth && "Invalid shift amount");
1115  // Handle a degenerate case
1116  if (shiftAmt == 0)
1117    return *this;
1118
1119  // Handle single word shifts with built-in ashr
1120  if (isSingleWord()) {
1121    if (shiftAmt == BitWidth)
1122      return APInt(BitWidth, 0); // undefined
1123    else {
1124      unsigned SignBit = APINT_BITS_PER_WORD - BitWidth;
1125      return APInt(BitWidth,
1126        (((int64_t(VAL) << SignBit) >> SignBit) >> shiftAmt));
1127    }
1128  }
1129
1130  // If all the bits were shifted out, the result is, technically, undefined.
1131  // We return -1 if it was negative, 0 otherwise. We check this early to avoid
1132  // issues in the algorithm below.
1133  if (shiftAmt == BitWidth) {
1134    if (isNegative())
1135      return APInt(BitWidth, -1ULL, true);
1136    else
1137      return APInt(BitWidth, 0);
1138  }
1139
1140  // Create some space for the result.
1141  uint64_t * val = new uint64_t[getNumWords()];
1142
1143  // Compute some values needed by the following shift algorithms
1144  unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD; // bits to shift per word
1145  unsigned offset = shiftAmt / APINT_BITS_PER_WORD; // word offset for shift
1146  unsigned breakWord = getNumWords() - 1 - offset; // last word affected
1147  unsigned bitsInWord = whichBit(BitWidth); // how many bits in last word?
1148  if (bitsInWord == 0)
1149    bitsInWord = APINT_BITS_PER_WORD;
1150
1151  // If we are shifting whole words, just move whole words
1152  if (wordShift == 0) {
1153    // Move the words containing significant bits
1154    for (unsigned i = 0; i <= breakWord; ++i)
1155      val[i] = pVal[i+offset]; // move whole word
1156
1157    // Adjust the top significant word for sign bit fill, if negative
1158    if (isNegative())
1159      if (bitsInWord < APINT_BITS_PER_WORD)
1160        val[breakWord] |= ~0ULL << bitsInWord; // set high bits
1161  } else {
1162    // Shift the low order words
1163    for (unsigned i = 0; i < breakWord; ++i) {
1164      // This combines the shifted corresponding word with the low bits from
1165      // the next word (shifted into this word's high bits).
1166      val[i] = (pVal[i+offset] >> wordShift) |
1167               (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
1168    }
1169
1170    // Shift the break word. In this case there are no bits from the next word
1171    // to include in this word.
1172    val[breakWord] = pVal[breakWord+offset] >> wordShift;
1173
1174    // Deal with sign extenstion in the break word, and possibly the word before
1175    // it.
1176    if (isNegative()) {
1177      if (wordShift > bitsInWord) {
1178        if (breakWord > 0)
1179          val[breakWord-1] |=
1180            ~0ULL << (APINT_BITS_PER_WORD - (wordShift - bitsInWord));
1181        val[breakWord] |= ~0ULL;
1182      } else
1183        val[breakWord] |= (~0ULL << (bitsInWord - wordShift));
1184    }
1185  }
1186
1187  // Remaining words are 0 or -1, just assign them.
1188  uint64_t fillValue = (isNegative() ? -1ULL : 0);
1189  for (unsigned i = breakWord+1; i < getNumWords(); ++i)
1190    val[i] = fillValue;
1191  return APInt(val, BitWidth).clearUnusedBits();
1192}
1193
1194/// Logical right-shift this APInt by shiftAmt.
1195/// @brief Logical right-shift function.
1196APInt APInt::lshr(const APInt &shiftAmt) const {
1197  return lshr((unsigned)shiftAmt.getLimitedValue(BitWidth));
1198}
1199
1200/// Logical right-shift this APInt by shiftAmt.
1201/// @brief Logical right-shift function.
1202APInt APInt::lshr(unsigned shiftAmt) const {
1203  if (isSingleWord()) {
1204    if (shiftAmt == BitWidth)
1205      return APInt(BitWidth, 0);
1206    else
1207      return APInt(BitWidth, this->VAL >> shiftAmt);
1208  }
1209
1210  // If all the bits were shifted out, the result is 0. This avoids issues
1211  // with shifting by the size of the integer type, which produces undefined
1212  // results. We define these "undefined results" to always be 0.
1213  if (shiftAmt == BitWidth)
1214    return APInt(BitWidth, 0);
1215
1216  // If none of the bits are shifted out, the result is *this. This avoids
1217  // issues with shifting by the size of the integer type, which produces
1218  // undefined results in the code below. This is also an optimization.
1219  if (shiftAmt == 0)
1220    return *this;
1221
1222  // Create some space for the result.
1223  uint64_t * val = new uint64_t[getNumWords()];
1224
1225  // If we are shifting less than a word, compute the shift with a simple carry
1226  if (shiftAmt < APINT_BITS_PER_WORD) {
1227    uint64_t carry = 0;
1228    for (int i = getNumWords()-1; i >= 0; --i) {
1229      val[i] = (pVal[i] >> shiftAmt) | carry;
1230      carry = pVal[i] << (APINT_BITS_PER_WORD - shiftAmt);
1231    }
1232    return APInt(val, BitWidth).clearUnusedBits();
1233  }
1234
1235  // Compute some values needed by the remaining shift algorithms
1236  unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
1237  unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
1238
1239  // If we are shifting whole words, just move whole words
1240  if (wordShift == 0) {
1241    for (unsigned i = 0; i < getNumWords() - offset; ++i)
1242      val[i] = pVal[i+offset];
1243    for (unsigned i = getNumWords()-offset; i < getNumWords(); i++)
1244      val[i] = 0;
1245    return APInt(val,BitWidth).clearUnusedBits();
1246  }
1247
1248  // Shift the low order words
1249  unsigned breakWord = getNumWords() - offset -1;
1250  for (unsigned i = 0; i < breakWord; ++i)
1251    val[i] = (pVal[i+offset] >> wordShift) |
1252             (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
1253  // Shift the break word.
1254  val[breakWord] = pVal[breakWord+offset] >> wordShift;
1255
1256  // Remaining words are 0
1257  for (unsigned i = breakWord+1; i < getNumWords(); ++i)
1258    val[i] = 0;
1259  return APInt(val, BitWidth).clearUnusedBits();
1260}
1261
1262/// Left-shift this APInt by shiftAmt.
1263/// @brief Left-shift function.
1264APInt APInt::shl(const APInt &shiftAmt) const {
1265  // It's undefined behavior in C to shift by BitWidth or greater.
1266  return shl((unsigned)shiftAmt.getLimitedValue(BitWidth));
1267}
1268
1269APInt APInt::shlSlowCase(unsigned shiftAmt) const {
1270  // If all the bits were shifted out, the result is 0. This avoids issues
1271  // with shifting by the size of the integer type, which produces undefined
1272  // results. We define these "undefined results" to always be 0.
1273  if (shiftAmt == BitWidth)
1274    return APInt(BitWidth, 0);
1275
1276  // If none of the bits are shifted out, the result is *this. This avoids a
1277  // lshr by the words size in the loop below which can produce incorrect
1278  // results. It also avoids the expensive computation below for a common case.
1279  if (shiftAmt == 0)
1280    return *this;
1281
1282  // Create some space for the result.
1283  uint64_t * val = new uint64_t[getNumWords()];
1284
1285  // If we are shifting less than a word, do it the easy way
1286  if (shiftAmt < APINT_BITS_PER_WORD) {
1287    uint64_t carry = 0;
1288    for (unsigned i = 0; i < getNumWords(); i++) {
1289      val[i] = pVal[i] << shiftAmt | carry;
1290      carry = pVal[i] >> (APINT_BITS_PER_WORD - shiftAmt);
1291    }
1292    return APInt(val, BitWidth).clearUnusedBits();
1293  }
1294
1295  // Compute some values needed by the remaining shift algorithms
1296  unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
1297  unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
1298
1299  // If we are shifting whole words, just move whole words
1300  if (wordShift == 0) {
1301    for (unsigned i = 0; i < offset; i++)
1302      val[i] = 0;
1303    for (unsigned i = offset; i < getNumWords(); i++)
1304      val[i] = pVal[i-offset];
1305    return APInt(val,BitWidth).clearUnusedBits();
1306  }
1307
1308  // Copy whole words from this to Result.
1309  unsigned i = getNumWords() - 1;
1310  for (; i > offset; --i)
1311    val[i] = pVal[i-offset] << wordShift |
1312             pVal[i-offset-1] >> (APINT_BITS_PER_WORD - wordShift);
1313  val[offset] = pVal[0] << wordShift;
1314  for (i = 0; i < offset; ++i)
1315    val[i] = 0;
1316  return APInt(val, BitWidth).clearUnusedBits();
1317}
1318
1319APInt APInt::rotl(const APInt &rotateAmt) const {
1320  return rotl((unsigned)rotateAmt.getLimitedValue(BitWidth));
1321}
1322
1323APInt APInt::rotl(unsigned rotateAmt) const {
1324  if (rotateAmt == 0)
1325    return *this;
1326  // Don't get too fancy, just use existing shift/or facilities
1327  APInt hi(*this);
1328  APInt lo(*this);
1329  hi.shl(rotateAmt);
1330  lo.lshr(BitWidth - rotateAmt);
1331  return hi | lo;
1332}
1333
1334APInt APInt::rotr(const APInt &rotateAmt) const {
1335  return rotr((unsigned)rotateAmt.getLimitedValue(BitWidth));
1336}
1337
1338APInt APInt::rotr(unsigned rotateAmt) const {
1339  if (rotateAmt == 0)
1340    return *this;
1341  // Don't get too fancy, just use existing shift/or facilities
1342  APInt hi(*this);
1343  APInt lo(*this);
1344  lo.lshr(rotateAmt);
1345  hi.shl(BitWidth - rotateAmt);
1346  return hi | lo;
1347}
1348
1349// Square Root - this method computes and returns the square root of "this".
1350// Three mechanisms are used for computation. For small values (<= 5 bits),
1351// a table lookup is done. This gets some performance for common cases. For
1352// values using less than 52 bits, the value is converted to double and then
1353// the libc sqrt function is called. The result is rounded and then converted
1354// back to a uint64_t which is then used to construct the result. Finally,
1355// the Babylonian method for computing square roots is used.
1356APInt APInt::sqrt() const {
1357
1358  // Determine the magnitude of the value.
1359  unsigned magnitude = getActiveBits();
1360
1361  // Use a fast table for some small values. This also gets rid of some
1362  // rounding errors in libc sqrt for small values.
1363  if (magnitude <= 5) {
1364    static const uint8_t results[32] = {
1365      /*     0 */ 0,
1366      /*  1- 2 */ 1, 1,
1367      /*  3- 6 */ 2, 2, 2, 2,
1368      /*  7-12 */ 3, 3, 3, 3, 3, 3,
1369      /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1370      /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1371      /*    31 */ 6
1372    };
1373    return APInt(BitWidth, results[ (isSingleWord() ? VAL : pVal[0]) ]);
1374  }
1375
1376  // If the magnitude of the value fits in less than 52 bits (the precision of
1377  // an IEEE double precision floating point value), then we can use the
1378  // libc sqrt function which will probably use a hardware sqrt computation.
1379  // This should be faster than the algorithm below.
1380  if (magnitude < 52) {
1381#if HAVE_ROUND
1382    return APInt(BitWidth,
1383                 uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0])))));
1384#else
1385    return APInt(BitWidth,
1386                 uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0])) + 0.5));
1387#endif
1388  }
1389
1390  // Okay, all the short cuts are exhausted. We must compute it. The following
1391  // is a classical Babylonian method for computing the square root. This code
1392  // was adapted to APINt from a wikipedia article on such computations.
1393  // See http://www.wikipedia.org/ and go to the page named
1394  // Calculate_an_integer_square_root.
1395  unsigned nbits = BitWidth, i = 4;
1396  APInt testy(BitWidth, 16);
1397  APInt x_old(BitWidth, 1);
1398  APInt x_new(BitWidth, 0);
1399  APInt two(BitWidth, 2);
1400
1401  // Select a good starting value using binary logarithms.
1402  for (;; i += 2, testy = testy.shl(2))
1403    if (i >= nbits || this->ule(testy)) {
1404      x_old = x_old.shl(i / 2);
1405      break;
1406    }
1407
1408  // Use the Babylonian method to arrive at the integer square root:
1409  for (;;) {
1410    x_new = (this->udiv(x_old) + x_old).udiv(two);
1411    if (x_old.ule(x_new))
1412      break;
1413    x_old = x_new;
1414  }
1415
1416  // Make sure we return the closest approximation
1417  // NOTE: The rounding calculation below is correct. It will produce an
1418  // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1419  // determined to be a rounding issue with pari/gp as it begins to use a
1420  // floating point representation after 192 bits. There are no discrepancies
1421  // between this algorithm and pari/gp for bit widths < 192 bits.
1422  APInt square(x_old * x_old);
1423  APInt nextSquare((x_old + 1) * (x_old +1));
1424  if (this->ult(square))
1425    return x_old;
1426  else if (this->ule(nextSquare)) {
1427    APInt midpoint((nextSquare - square).udiv(two));
1428    APInt offset(*this - square);
1429    if (offset.ult(midpoint))
1430      return x_old;
1431    else
1432      return x_old + 1;
1433  } else
1434    llvm_unreachable("Error in APInt::sqrt computation");
1435  return x_old + 1;
1436}
1437
1438/// Computes the multiplicative inverse of this APInt for a given modulo. The
1439/// iterative extended Euclidean algorithm is used to solve for this value,
1440/// however we simplify it to speed up calculating only the inverse, and take
1441/// advantage of div+rem calculations. We also use some tricks to avoid copying
1442/// (potentially large) APInts around.
1443APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1444  assert(ult(modulo) && "This APInt must be smaller than the modulo");
1445
1446  // Using the properties listed at the following web page (accessed 06/21/08):
1447  //   http://www.numbertheory.org/php/euclid.html
1448  // (especially the properties numbered 3, 4 and 9) it can be proved that
1449  // BitWidth bits suffice for all the computations in the algorithm implemented
1450  // below. More precisely, this number of bits suffice if the multiplicative
1451  // inverse exists, but may not suffice for the general extended Euclidean
1452  // algorithm.
1453
1454  APInt r[2] = { modulo, *this };
1455  APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1456  APInt q(BitWidth, 0);
1457
1458  unsigned i;
1459  for (i = 0; r[i^1] != 0; i ^= 1) {
1460    // An overview of the math without the confusing bit-flipping:
1461    // q = r[i-2] / r[i-1]
1462    // r[i] = r[i-2] % r[i-1]
1463    // t[i] = t[i-2] - t[i-1] * q
1464    udivrem(r[i], r[i^1], q, r[i]);
1465    t[i] -= t[i^1] * q;
1466  }
1467
1468  // If this APInt and the modulo are not coprime, there is no multiplicative
1469  // inverse, so return 0. We check this by looking at the next-to-last
1470  // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1471  // algorithm.
1472  if (r[i] != 1)
1473    return APInt(BitWidth, 0);
1474
1475  // The next-to-last t is the multiplicative inverse.  However, we are
1476  // interested in a positive inverse. Calcuate a positive one from a negative
1477  // one if necessary. A simple addition of the modulo suffices because
1478  // abs(t[i]) is known to be less than *this/2 (see the link above).
1479  return t[i].isNegative() ? t[i] + modulo : t[i];
1480}
1481
1482/// Calculate the magic numbers required to implement a signed integer division
1483/// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1484/// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1485/// Warren, Jr., chapter 10.
1486APInt::ms APInt::magic() const {
1487  const APInt& d = *this;
1488  unsigned p;
1489  APInt ad, anc, delta, q1, r1, q2, r2, t;
1490  APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1491  struct ms mag;
1492
1493  ad = d.abs();
1494  t = signedMin + (d.lshr(d.getBitWidth() - 1));
1495  anc = t - 1 - t.urem(ad);   // absolute value of nc
1496  p = d.getBitWidth() - 1;    // initialize p
1497  q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1498  r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1499  q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1500  r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1501  do {
1502    p = p + 1;
1503    q1 = q1<<1;          // update q1 = 2p/abs(nc)
1504    r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1505    if (r1.uge(anc)) {  // must be unsigned comparison
1506      q1 = q1 + 1;
1507      r1 = r1 - anc;
1508    }
1509    q2 = q2<<1;          // update q2 = 2p/abs(d)
1510    r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1511    if (r2.uge(ad)) {   // must be unsigned comparison
1512      q2 = q2 + 1;
1513      r2 = r2 - ad;
1514    }
1515    delta = ad - r2;
1516  } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1517
1518  mag.m = q2 + 1;
1519  if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1520  mag.s = p - d.getBitWidth();          // resulting shift
1521  return mag;
1522}
1523
1524/// Calculate the magic numbers required to implement an unsigned integer
1525/// division by a constant as a sequence of multiplies, adds and shifts.
1526/// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1527/// S. Warren, Jr., chapter 10.
1528/// LeadingZeros can be used to simplify the calculation if the upper bits
1529/// of the divided value are known zero.
1530APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1531  const APInt& d = *this;
1532  unsigned p;
1533  APInt nc, delta, q1, r1, q2, r2;
1534  struct mu magu;
1535  magu.a = 0;               // initialize "add" indicator
1536  APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1537  APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1538  APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1539
1540  nc = allOnes - (-d).urem(d);
1541  p = d.getBitWidth() - 1;  // initialize p
1542  q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1543  r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1544  q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1545  r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1546  do {
1547    p = p + 1;
1548    if (r1.uge(nc - r1)) {
1549      q1 = q1 + q1 + 1;  // update q1
1550      r1 = r1 + r1 - nc; // update r1
1551    }
1552    else {
1553      q1 = q1+q1; // update q1
1554      r1 = r1+r1; // update r1
1555    }
1556    if ((r2 + 1).uge(d - r2)) {
1557      if (q2.uge(signedMax)) magu.a = 1;
1558      q2 = q2+q2 + 1;     // update q2
1559      r2 = r2+r2 + 1 - d; // update r2
1560    }
1561    else {
1562      if (q2.uge(signedMin)) magu.a = 1;
1563      q2 = q2+q2;     // update q2
1564      r2 = r2+r2 + 1; // update r2
1565    }
1566    delta = d - 1 - r2;
1567  } while (p < d.getBitWidth()*2 &&
1568           (q1.ult(delta) || (q1 == delta && r1 == 0)));
1569  magu.m = q2 + 1; // resulting magic number
1570  magu.s = p - d.getBitWidth();  // resulting shift
1571  return magu;
1572}
1573
1574/// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1575/// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1576/// variables here have the same names as in the algorithm. Comments explain
1577/// the algorithm and any deviation from it.
1578static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
1579                     unsigned m, unsigned n) {
1580  assert(u && "Must provide dividend");
1581  assert(v && "Must provide divisor");
1582  assert(q && "Must provide quotient");
1583  assert(u != v && u != q && v != q && "Must us different memory");
1584  assert(n>1 && "n must be > 1");
1585
1586  // Knuth uses the value b as the base of the number system. In our case b
1587  // is 2^31 so we just set it to -1u.
1588  uint64_t b = uint64_t(1) << 32;
1589
1590#if 0
1591  DEBUG(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1592  DEBUG(dbgs() << "KnuthDiv: original:");
1593  DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1594  DEBUG(dbgs() << " by");
1595  DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
1596  DEBUG(dbgs() << '\n');
1597#endif
1598  // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1599  // u and v by d. Note that we have taken Knuth's advice here to use a power
1600  // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1601  // 2 allows us to shift instead of multiply and it is easy to determine the
1602  // shift amount from the leading zeros.  We are basically normalizing the u
1603  // and v so that its high bits are shifted to the top of v's range without
1604  // overflow. Note that this can require an extra word in u so that u must
1605  // be of length m+n+1.
1606  unsigned shift = CountLeadingZeros_32(v[n-1]);
1607  unsigned v_carry = 0;
1608  unsigned u_carry = 0;
1609  if (shift) {
1610    for (unsigned i = 0; i < m+n; ++i) {
1611      unsigned u_tmp = u[i] >> (32 - shift);
1612      u[i] = (u[i] << shift) | u_carry;
1613      u_carry = u_tmp;
1614    }
1615    for (unsigned i = 0; i < n; ++i) {
1616      unsigned v_tmp = v[i] >> (32 - shift);
1617      v[i] = (v[i] << shift) | v_carry;
1618      v_carry = v_tmp;
1619    }
1620  }
1621  u[m+n] = u_carry;
1622#if 0
1623  DEBUG(dbgs() << "KnuthDiv:   normal:");
1624  DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1625  DEBUG(dbgs() << " by");
1626  DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
1627  DEBUG(dbgs() << '\n');
1628#endif
1629
1630  // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1631  int j = m;
1632  do {
1633    DEBUG(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1634    // D3. [Calculate q'.].
1635    //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1636    //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1637    // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1638    // qp by 1, inrease rp by v[n-1], and repeat this test if rp < b. The test
1639    // on v[n-2] determines at high speed most of the cases in which the trial
1640    // value qp is one too large, and it eliminates all cases where qp is two
1641    // too large.
1642    uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]);
1643    DEBUG(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1644    uint64_t qp = dividend / v[n-1];
1645    uint64_t rp = dividend % v[n-1];
1646    if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1647      qp--;
1648      rp += v[n-1];
1649      if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1650        qp--;
1651    }
1652    DEBUG(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1653
1654    // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1655    // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1656    // consists of a simple multiplication by a one-place number, combined with
1657    // a subtraction.
1658    bool isNeg = false;
1659    for (unsigned i = 0; i < n; ++i) {
1660      uint64_t u_tmp = uint64_t(u[j+i]) | (uint64_t(u[j+i+1]) << 32);
1661      uint64_t subtrahend = uint64_t(qp) * uint64_t(v[i]);
1662      bool borrow = subtrahend > u_tmp;
1663      DEBUG(dbgs() << "KnuthDiv: u_tmp == " << u_tmp
1664                   << ", subtrahend == " << subtrahend
1665                   << ", borrow = " << borrow << '\n');
1666
1667      uint64_t result = u_tmp - subtrahend;
1668      unsigned k = j + i;
1669      u[k++] = (unsigned)(result & (b-1)); // subtract low word
1670      u[k++] = (unsigned)(result >> 32);   // subtract high word
1671      while (borrow && k <= m+n) { // deal with borrow to the left
1672        borrow = u[k] == 0;
1673        u[k]--;
1674        k++;
1675      }
1676      isNeg |= borrow;
1677      DEBUG(dbgs() << "KnuthDiv: u[j+i] == " << u[j+i] << ",  u[j+i+1] == " <<
1678                    u[j+i+1] << '\n');
1679    }
1680    DEBUG(dbgs() << "KnuthDiv: after subtraction:");
1681    DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1682    DEBUG(dbgs() << '\n');
1683    // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1684    // this step is actually negative, (u[j+n]...u[j]) should be left as the
1685    // true value plus b**(n+1), namely as the b's complement of
1686    // the true value, and a "borrow" to the left should be remembered.
1687    //
1688    if (isNeg) {
1689      bool carry = true;  // true because b's complement is "complement + 1"
1690      for (unsigned i = 0; i <= m+n; ++i) {
1691        u[i] = ~u[i] + carry; // b's complement
1692        carry = carry && u[i] == 0;
1693      }
1694    }
1695    DEBUG(dbgs() << "KnuthDiv: after complement:");
1696    DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1697    DEBUG(dbgs() << '\n');
1698
1699    // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1700    // negative, go to step D6; otherwise go on to step D7.
1701    q[j] = (unsigned)qp;
1702    if (isNeg) {
1703      // D6. [Add back]. The probability that this step is necessary is very
1704      // small, on the order of only 2/b. Make sure that test data accounts for
1705      // this possibility. Decrease q[j] by 1
1706      q[j]--;
1707      // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1708      // A carry will occur to the left of u[j+n], and it should be ignored
1709      // since it cancels with the borrow that occurred in D4.
1710      bool carry = false;
1711      for (unsigned i = 0; i < n; i++) {
1712        unsigned limit = std::min(u[j+i],v[i]);
1713        u[j+i] += v[i] + carry;
1714        carry = u[j+i] < limit || (carry && u[j+i] == limit);
1715      }
1716      u[j+n] += carry;
1717    }
1718    DEBUG(dbgs() << "KnuthDiv: after correction:");
1719    DEBUG(for (int i = m+n; i >=0; i--) dbgs() <<" " << u[i]);
1720    DEBUG(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1721
1722  // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1723  } while (--j >= 0);
1724
1725  DEBUG(dbgs() << "KnuthDiv: quotient:");
1726  DEBUG(for (int i = m; i >=0; i--) dbgs() <<" " << q[i]);
1727  DEBUG(dbgs() << '\n');
1728
1729  // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1730  // remainder may be obtained by dividing u[...] by d. If r is non-null we
1731  // compute the remainder (urem uses this).
1732  if (r) {
1733    // The value d is expressed by the "shift" value above since we avoided
1734    // multiplication by d by using a shift left. So, all we have to do is
1735    // shift right here. In order to mak
1736    if (shift) {
1737      unsigned carry = 0;
1738      DEBUG(dbgs() << "KnuthDiv: remainder:");
1739      for (int i = n-1; i >= 0; i--) {
1740        r[i] = (u[i] >> shift) | carry;
1741        carry = u[i] << (32 - shift);
1742        DEBUG(dbgs() << " " << r[i]);
1743      }
1744    } else {
1745      for (int i = n-1; i >= 0; i--) {
1746        r[i] = u[i];
1747        DEBUG(dbgs() << " " << r[i]);
1748      }
1749    }
1750    DEBUG(dbgs() << '\n');
1751  }
1752#if 0
1753  DEBUG(dbgs() << '\n');
1754#endif
1755}
1756
1757void APInt::divide(const APInt LHS, unsigned lhsWords,
1758                   const APInt &RHS, unsigned rhsWords,
1759                   APInt *Quotient, APInt *Remainder)
1760{
1761  assert(lhsWords >= rhsWords && "Fractional result");
1762
1763  // First, compose the values into an array of 32-bit words instead of
1764  // 64-bit words. This is a necessity of both the "short division" algorithm
1765  // and the Knuth "classical algorithm" which requires there to be native
1766  // operations for +, -, and * on an m bit value with an m*2 bit result. We
1767  // can't use 64-bit operands here because we don't have native results of
1768  // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1769  // work on large-endian machines.
1770  uint64_t mask = ~0ull >> (sizeof(unsigned)*CHAR_BIT);
1771  unsigned n = rhsWords * 2;
1772  unsigned m = (lhsWords * 2) - n;
1773
1774  // Allocate space for the temporary values we need either on the stack, if
1775  // it will fit, or on the heap if it won't.
1776  unsigned SPACE[128];
1777  unsigned *U = 0;
1778  unsigned *V = 0;
1779  unsigned *Q = 0;
1780  unsigned *R = 0;
1781  if ((Remainder?4:3)*n+2*m+1 <= 128) {
1782    U = &SPACE[0];
1783    V = &SPACE[m+n+1];
1784    Q = &SPACE[(m+n+1) + n];
1785    if (Remainder)
1786      R = &SPACE[(m+n+1) + n + (m+n)];
1787  } else {
1788    U = new unsigned[m + n + 1];
1789    V = new unsigned[n];
1790    Q = new unsigned[m+n];
1791    if (Remainder)
1792      R = new unsigned[n];
1793  }
1794
1795  // Initialize the dividend
1796  memset(U, 0, (m+n+1)*sizeof(unsigned));
1797  for (unsigned i = 0; i < lhsWords; ++i) {
1798    uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.VAL : LHS.pVal[i]);
1799    U[i * 2] = (unsigned)(tmp & mask);
1800    U[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1801  }
1802  U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1803
1804  // Initialize the divisor
1805  memset(V, 0, (n)*sizeof(unsigned));
1806  for (unsigned i = 0; i < rhsWords; ++i) {
1807    uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.VAL : RHS.pVal[i]);
1808    V[i * 2] = (unsigned)(tmp & mask);
1809    V[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1810  }
1811
1812  // initialize the quotient and remainder
1813  memset(Q, 0, (m+n) * sizeof(unsigned));
1814  if (Remainder)
1815    memset(R, 0, n * sizeof(unsigned));
1816
1817  // Now, adjust m and n for the Knuth division. n is the number of words in
1818  // the divisor. m is the number of words by which the dividend exceeds the
1819  // divisor (i.e. m+n is the length of the dividend). These sizes must not
1820  // contain any zero words or the Knuth algorithm fails.
1821  for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1822    n--;
1823    m++;
1824  }
1825  for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1826    m--;
1827
1828  // If we're left with only a single word for the divisor, Knuth doesn't work
1829  // so we implement the short division algorithm here. This is much simpler
1830  // and faster because we are certain that we can divide a 64-bit quantity
1831  // by a 32-bit quantity at hardware speed and short division is simply a
1832  // series of such operations. This is just like doing short division but we
1833  // are using base 2^32 instead of base 10.
1834  assert(n != 0 && "Divide by zero?");
1835  if (n == 1) {
1836    unsigned divisor = V[0];
1837    unsigned remainder = 0;
1838    for (int i = m+n-1; i >= 0; i--) {
1839      uint64_t partial_dividend = uint64_t(remainder) << 32 | U[i];
1840      if (partial_dividend == 0) {
1841        Q[i] = 0;
1842        remainder = 0;
1843      } else if (partial_dividend < divisor) {
1844        Q[i] = 0;
1845        remainder = (unsigned)partial_dividend;
1846      } else if (partial_dividend == divisor) {
1847        Q[i] = 1;
1848        remainder = 0;
1849      } else {
1850        Q[i] = (unsigned)(partial_dividend / divisor);
1851        remainder = (unsigned)(partial_dividend - (Q[i] * divisor));
1852      }
1853    }
1854    if (R)
1855      R[0] = remainder;
1856  } else {
1857    // Now we're ready to invoke the Knuth classical divide algorithm. In this
1858    // case n > 1.
1859    KnuthDiv(U, V, Q, R, m, n);
1860  }
1861
1862  // If the caller wants the quotient
1863  if (Quotient) {
1864    // Set up the Quotient value's memory.
1865    if (Quotient->BitWidth != LHS.BitWidth) {
1866      if (Quotient->isSingleWord())
1867        Quotient->VAL = 0;
1868      else
1869        delete [] Quotient->pVal;
1870      Quotient->BitWidth = LHS.BitWidth;
1871      if (!Quotient->isSingleWord())
1872        Quotient->pVal = getClearedMemory(Quotient->getNumWords());
1873    } else
1874      Quotient->clearAllBits();
1875
1876    // The quotient is in Q. Reconstitute the quotient into Quotient's low
1877    // order words.
1878    if (lhsWords == 1) {
1879      uint64_t tmp =
1880        uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2));
1881      if (Quotient->isSingleWord())
1882        Quotient->VAL = tmp;
1883      else
1884        Quotient->pVal[0] = tmp;
1885    } else {
1886      assert(!Quotient->isSingleWord() && "Quotient APInt not large enough");
1887      for (unsigned i = 0; i < lhsWords; ++i)
1888        Quotient->pVal[i] =
1889          uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1890    }
1891  }
1892
1893  // If the caller wants the remainder
1894  if (Remainder) {
1895    // Set up the Remainder value's memory.
1896    if (Remainder->BitWidth != RHS.BitWidth) {
1897      if (Remainder->isSingleWord())
1898        Remainder->VAL = 0;
1899      else
1900        delete [] Remainder->pVal;
1901      Remainder->BitWidth = RHS.BitWidth;
1902      if (!Remainder->isSingleWord())
1903        Remainder->pVal = getClearedMemory(Remainder->getNumWords());
1904    } else
1905      Remainder->clearAllBits();
1906
1907    // The remainder is in R. Reconstitute the remainder into Remainder's low
1908    // order words.
1909    if (rhsWords == 1) {
1910      uint64_t tmp =
1911        uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2));
1912      if (Remainder->isSingleWord())
1913        Remainder->VAL = tmp;
1914      else
1915        Remainder->pVal[0] = tmp;
1916    } else {
1917      assert(!Remainder->isSingleWord() && "Remainder APInt not large enough");
1918      for (unsigned i = 0; i < rhsWords; ++i)
1919        Remainder->pVal[i] =
1920          uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1921    }
1922  }
1923
1924  // Clean up the memory we allocated.
1925  if (U != &SPACE[0]) {
1926    delete [] U;
1927    delete [] V;
1928    delete [] Q;
1929    delete [] R;
1930  }
1931}
1932
1933APInt APInt::udiv(const APInt& RHS) const {
1934  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1935
1936  // First, deal with the easy case
1937  if (isSingleWord()) {
1938    assert(RHS.VAL != 0 && "Divide by zero?");
1939    return APInt(BitWidth, VAL / RHS.VAL);
1940  }
1941
1942  // Get some facts about the LHS and RHS number of bits and words
1943  unsigned rhsBits = RHS.getActiveBits();
1944  unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1945  assert(rhsWords && "Divided by zero???");
1946  unsigned lhsBits = this->getActiveBits();
1947  unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
1948
1949  // Deal with some degenerate cases
1950  if (!lhsWords)
1951    // 0 / X ===> 0
1952    return APInt(BitWidth, 0);
1953  else if (lhsWords < rhsWords || this->ult(RHS)) {
1954    // X / Y ===> 0, iff X < Y
1955    return APInt(BitWidth, 0);
1956  } else if (*this == RHS) {
1957    // X / X ===> 1
1958    return APInt(BitWidth, 1);
1959  } else if (lhsWords == 1 && rhsWords == 1) {
1960    // All high words are zero, just use native divide
1961    return APInt(BitWidth, this->pVal[0] / RHS.pVal[0]);
1962  }
1963
1964  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1965  APInt Quotient(1,0); // to hold result.
1966  divide(*this, lhsWords, RHS, rhsWords, &Quotient, 0);
1967  return Quotient;
1968}
1969
1970APInt APInt::urem(const APInt& RHS) const {
1971  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1972  if (isSingleWord()) {
1973    assert(RHS.VAL != 0 && "Remainder by zero?");
1974    return APInt(BitWidth, VAL % RHS.VAL);
1975  }
1976
1977  // Get some facts about the LHS
1978  unsigned lhsBits = getActiveBits();
1979  unsigned lhsWords = !lhsBits ? 0 : (whichWord(lhsBits - 1) + 1);
1980
1981  // Get some facts about the RHS
1982  unsigned rhsBits = RHS.getActiveBits();
1983  unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1984  assert(rhsWords && "Performing remainder operation by zero ???");
1985
1986  // Check the degenerate cases
1987  if (lhsWords == 0) {
1988    // 0 % Y ===> 0
1989    return APInt(BitWidth, 0);
1990  } else if (lhsWords < rhsWords || this->ult(RHS)) {
1991    // X % Y ===> X, iff X < Y
1992    return *this;
1993  } else if (*this == RHS) {
1994    // X % X == 0;
1995    return APInt(BitWidth, 0);
1996  } else if (lhsWords == 1) {
1997    // All high words are zero, just use native remainder
1998    return APInt(BitWidth, pVal[0] % RHS.pVal[0]);
1999  }
2000
2001  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
2002  APInt Remainder(1,0);
2003  divide(*this, lhsWords, RHS, rhsWords, 0, &Remainder);
2004  return Remainder;
2005}
2006
2007void APInt::udivrem(const APInt &LHS, const APInt &RHS,
2008                    APInt &Quotient, APInt &Remainder) {
2009  // Get some size facts about the dividend and divisor
2010  unsigned lhsBits  = LHS.getActiveBits();
2011  unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
2012  unsigned rhsBits  = RHS.getActiveBits();
2013  unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
2014
2015  // Check the degenerate cases
2016  if (lhsWords == 0) {
2017    Quotient = 0;                // 0 / Y ===> 0
2018    Remainder = 0;               // 0 % Y ===> 0
2019    return;
2020  }
2021
2022  if (lhsWords < rhsWords || LHS.ult(RHS)) {
2023    Remainder = LHS;            // X % Y ===> X, iff X < Y
2024    Quotient = 0;               // X / Y ===> 0, iff X < Y
2025    return;
2026  }
2027
2028  if (LHS == RHS) {
2029    Quotient  = 1;              // X / X ===> 1
2030    Remainder = 0;              // X % X ===> 0;
2031    return;
2032  }
2033
2034  if (lhsWords == 1 && rhsWords == 1) {
2035    // There is only one word to consider so use the native versions.
2036    uint64_t lhsValue = LHS.isSingleWord() ? LHS.VAL : LHS.pVal[0];
2037    uint64_t rhsValue = RHS.isSingleWord() ? RHS.VAL : RHS.pVal[0];
2038    Quotient = APInt(LHS.getBitWidth(), lhsValue / rhsValue);
2039    Remainder = APInt(LHS.getBitWidth(), lhsValue % rhsValue);
2040    return;
2041  }
2042
2043  // Okay, lets do it the long way
2044  divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder);
2045}
2046
2047APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
2048  APInt Res = *this+RHS;
2049  Overflow = isNonNegative() == RHS.isNonNegative() &&
2050             Res.isNonNegative() != isNonNegative();
2051  return Res;
2052}
2053
2054APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
2055  APInt Res = *this+RHS;
2056  Overflow = Res.ult(RHS);
2057  return Res;
2058}
2059
2060APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
2061  APInt Res = *this - RHS;
2062  Overflow = isNonNegative() != RHS.isNonNegative() &&
2063             Res.isNonNegative() != isNonNegative();
2064  return Res;
2065}
2066
2067APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
2068  APInt Res = *this-RHS;
2069  Overflow = Res.ugt(*this);
2070  return Res;
2071}
2072
2073APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
2074  // MININT/-1  -->  overflow.
2075  Overflow = isMinSignedValue() && RHS.isAllOnesValue();
2076  return sdiv(RHS);
2077}
2078
2079APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
2080  APInt Res = *this * RHS;
2081
2082  if (*this != 0 && RHS != 0)
2083    Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
2084  else
2085    Overflow = false;
2086  return Res;
2087}
2088
2089APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
2090  APInt Res = *this * RHS;
2091
2092  if (*this != 0 && RHS != 0)
2093    Overflow = Res.udiv(RHS) != *this || Res.udiv(*this) != RHS;
2094  else
2095    Overflow = false;
2096  return Res;
2097}
2098
2099APInt APInt::sshl_ov(unsigned ShAmt, bool &Overflow) const {
2100  Overflow = ShAmt >= getBitWidth();
2101  if (Overflow)
2102    ShAmt = getBitWidth()-1;
2103
2104  if (isNonNegative()) // Don't allow sign change.
2105    Overflow = ShAmt >= countLeadingZeros();
2106  else
2107    Overflow = ShAmt >= countLeadingOnes();
2108
2109  return *this << ShAmt;
2110}
2111
2112
2113
2114
2115void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
2116  // Check our assumptions here
2117  assert(!str.empty() && "Invalid string length");
2118  assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
2119         "Radix should be 2, 8, 10, or 16!");
2120
2121  StringRef::iterator p = str.begin();
2122  size_t slen = str.size();
2123  bool isNeg = *p == '-';
2124  if (*p == '-' || *p == '+') {
2125    p++;
2126    slen--;
2127    assert(slen && "String is only a sign, needs a value.");
2128  }
2129  assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2130  assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2131  assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2132  assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
2133         "Insufficient bit width");
2134
2135  // Allocate memory
2136  if (!isSingleWord())
2137    pVal = getClearedMemory(getNumWords());
2138
2139  // Figure out if we can shift instead of multiply
2140  unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2141
2142  // Set up an APInt for the digit to add outside the loop so we don't
2143  // constantly construct/destruct it.
2144  APInt apdigit(getBitWidth(), 0);
2145  APInt apradix(getBitWidth(), radix);
2146
2147  // Enter digit traversal loop
2148  for (StringRef::iterator e = str.end(); p != e; ++p) {
2149    unsigned digit = getDigit(*p, radix);
2150    assert(digit < radix && "Invalid character in digit string");
2151
2152    // Shift or multiply the value by the radix
2153    if (slen > 1) {
2154      if (shift)
2155        *this <<= shift;
2156      else
2157        *this *= apradix;
2158    }
2159
2160    // Add in the digit we just interpreted
2161    if (apdigit.isSingleWord())
2162      apdigit.VAL = digit;
2163    else
2164      apdigit.pVal[0] = digit;
2165    *this += apdigit;
2166  }
2167  // If its negative, put it in two's complement form
2168  if (isNeg) {
2169    (*this)--;
2170    this->flipAllBits();
2171  }
2172}
2173
2174void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2175                     bool Signed, bool formatAsCLiteral) const {
2176  assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2) &&
2177         "Radix should be 2, 8, 10, or 16!");
2178
2179  const char *Prefix = "";
2180  if (formatAsCLiteral) {
2181    switch (Radix) {
2182      case 2:
2183        // Binary literals are a non-standard extension added in gcc 4.3:
2184        // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2185        Prefix = "0b";
2186        break;
2187      case 8:
2188        Prefix = "0";
2189        break;
2190      case 16:
2191        Prefix = "0x";
2192        break;
2193    }
2194  }
2195
2196  // First, check for a zero value and just short circuit the logic below.
2197  if (*this == 0) {
2198    while (*Prefix) {
2199      Str.push_back(*Prefix);
2200      ++Prefix;
2201    };
2202    Str.push_back('0');
2203    return;
2204  }
2205
2206  static const char Digits[] = "0123456789ABCDEF";
2207
2208  if (isSingleWord()) {
2209    char Buffer[65];
2210    char *BufPtr = Buffer+65;
2211
2212    uint64_t N;
2213    if (!Signed) {
2214      N = getZExtValue();
2215    } else {
2216      int64_t I = getSExtValue();
2217      if (I >= 0) {
2218        N = I;
2219      } else {
2220        Str.push_back('-');
2221        N = -(uint64_t)I;
2222      }
2223    }
2224
2225    while (*Prefix) {
2226      Str.push_back(*Prefix);
2227      ++Prefix;
2228    };
2229
2230    while (N) {
2231      *--BufPtr = Digits[N % Radix];
2232      N /= Radix;
2233    }
2234    Str.append(BufPtr, Buffer+65);
2235    return;
2236  }
2237
2238  APInt Tmp(*this);
2239
2240  if (Signed && isNegative()) {
2241    // They want to print the signed version and it is a negative value
2242    // Flip the bits and add one to turn it into the equivalent positive
2243    // value and put a '-' in the result.
2244    Tmp.flipAllBits();
2245    Tmp++;
2246    Str.push_back('-');
2247  }
2248
2249  while (*Prefix) {
2250    Str.push_back(*Prefix);
2251    ++Prefix;
2252  };
2253
2254  // We insert the digits backward, then reverse them to get the right order.
2255  unsigned StartDig = Str.size();
2256
2257  // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2258  // because the number of bits per digit (1, 3 and 4 respectively) divides
2259  // equaly.  We just shift until the value is zero.
2260  if (Radix != 10) {
2261    // Just shift tmp right for each digit width until it becomes zero
2262    unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2263    unsigned MaskAmt = Radix - 1;
2264
2265    while (Tmp != 0) {
2266      unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2267      Str.push_back(Digits[Digit]);
2268      Tmp = Tmp.lshr(ShiftAmt);
2269    }
2270  } else {
2271    APInt divisor(4, 10);
2272    while (Tmp != 0) {
2273      APInt APdigit(1, 0);
2274      APInt tmp2(Tmp.getBitWidth(), 0);
2275      divide(Tmp, Tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2,
2276             &APdigit);
2277      unsigned Digit = (unsigned)APdigit.getZExtValue();
2278      assert(Digit < Radix && "divide failed");
2279      Str.push_back(Digits[Digit]);
2280      Tmp = tmp2;
2281    }
2282  }
2283
2284  // Reverse the digits before returning.
2285  std::reverse(Str.begin()+StartDig, Str.end());
2286}
2287
2288/// toString - This returns the APInt as a std::string.  Note that this is an
2289/// inefficient method.  It is better to pass in a SmallVector/SmallString
2290/// to the methods above.
2291std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2292  SmallString<40> S;
2293  toString(S, Radix, Signed, /* formatAsCLiteral = */false);
2294  return S.str();
2295}
2296
2297
2298void APInt::dump() const {
2299  SmallString<40> S, U;
2300  this->toStringUnsigned(U);
2301  this->toStringSigned(S);
2302  dbgs() << "APInt(" << BitWidth << "b, "
2303         << U.str() << "u " << S.str() << "s)";
2304}
2305
2306void APInt::print(raw_ostream &OS, bool isSigned) const {
2307  SmallString<40> S;
2308  this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2309  OS << S.str();
2310}
2311
2312// This implements a variety of operations on a representation of
2313// arbitrary precision, two's-complement, bignum integer values.
2314
2315// Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2316// and unrestricting assumption.
2317#define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
2318COMPILE_TIME_ASSERT(integerPartWidth % 2 == 0);
2319
2320/* Some handy functions local to this file.  */
2321namespace {
2322
2323  /* Returns the integer part with the least significant BITS set.
2324     BITS cannot be zero.  */
2325  static inline integerPart
2326  lowBitMask(unsigned int bits)
2327  {
2328    assert(bits != 0 && bits <= integerPartWidth);
2329
2330    return ~(integerPart) 0 >> (integerPartWidth - bits);
2331  }
2332
2333  /* Returns the value of the lower half of PART.  */
2334  static inline integerPart
2335  lowHalf(integerPart part)
2336  {
2337    return part & lowBitMask(integerPartWidth / 2);
2338  }
2339
2340  /* Returns the value of the upper half of PART.  */
2341  static inline integerPart
2342  highHalf(integerPart part)
2343  {
2344    return part >> (integerPartWidth / 2);
2345  }
2346
2347  /* Returns the bit number of the most significant set bit of a part.
2348     If the input number has no bits set -1U is returned.  */
2349  static unsigned int
2350  partMSB(integerPart value)
2351  {
2352    unsigned int n, msb;
2353
2354    if (value == 0)
2355      return -1U;
2356
2357    n = integerPartWidth / 2;
2358
2359    msb = 0;
2360    do {
2361      if (value >> n) {
2362        value >>= n;
2363        msb += n;
2364      }
2365
2366      n >>= 1;
2367    } while (n);
2368
2369    return msb;
2370  }
2371
2372  /* Returns the bit number of the least significant set bit of a
2373     part.  If the input number has no bits set -1U is returned.  */
2374  static unsigned int
2375  partLSB(integerPart value)
2376  {
2377    unsigned int n, lsb;
2378
2379    if (value == 0)
2380      return -1U;
2381
2382    lsb = integerPartWidth - 1;
2383    n = integerPartWidth / 2;
2384
2385    do {
2386      if (value << n) {
2387        value <<= n;
2388        lsb -= n;
2389      }
2390
2391      n >>= 1;
2392    } while (n);
2393
2394    return lsb;
2395  }
2396}
2397
2398/* Sets the least significant part of a bignum to the input value, and
2399   zeroes out higher parts.  */
2400void
2401APInt::tcSet(integerPart *dst, integerPart part, unsigned int parts)
2402{
2403  unsigned int i;
2404
2405  assert(parts > 0);
2406
2407  dst[0] = part;
2408  for (i = 1; i < parts; i++)
2409    dst[i] = 0;
2410}
2411
2412/* Assign one bignum to another.  */
2413void
2414APInt::tcAssign(integerPart *dst, const integerPart *src, unsigned int parts)
2415{
2416  unsigned int i;
2417
2418  for (i = 0; i < parts; i++)
2419    dst[i] = src[i];
2420}
2421
2422/* Returns true if a bignum is zero, false otherwise.  */
2423bool
2424APInt::tcIsZero(const integerPart *src, unsigned int parts)
2425{
2426  unsigned int i;
2427
2428  for (i = 0; i < parts; i++)
2429    if (src[i])
2430      return false;
2431
2432  return true;
2433}
2434
2435/* Extract the given bit of a bignum; returns 0 or 1.  */
2436int
2437APInt::tcExtractBit(const integerPart *parts, unsigned int bit)
2438{
2439  return (parts[bit / integerPartWidth] &
2440          ((integerPart) 1 << bit % integerPartWidth)) != 0;
2441}
2442
2443/* Set the given bit of a bignum. */
2444void
2445APInt::tcSetBit(integerPart *parts, unsigned int bit)
2446{
2447  parts[bit / integerPartWidth] |= (integerPart) 1 << (bit % integerPartWidth);
2448}
2449
2450/* Clears the given bit of a bignum. */
2451void
2452APInt::tcClearBit(integerPart *parts, unsigned int bit)
2453{
2454  parts[bit / integerPartWidth] &=
2455    ~((integerPart) 1 << (bit % integerPartWidth));
2456}
2457
2458/* Returns the bit number of the least significant set bit of a
2459   number.  If the input number has no bits set -1U is returned.  */
2460unsigned int
2461APInt::tcLSB(const integerPart *parts, unsigned int n)
2462{
2463  unsigned int i, lsb;
2464
2465  for (i = 0; i < n; i++) {
2466      if (parts[i] != 0) {
2467          lsb = partLSB(parts[i]);
2468
2469          return lsb + i * integerPartWidth;
2470      }
2471  }
2472
2473  return -1U;
2474}
2475
2476/* Returns the bit number of the most significant set bit of a number.
2477   If the input number has no bits set -1U is returned.  */
2478unsigned int
2479APInt::tcMSB(const integerPart *parts, unsigned int n)
2480{
2481  unsigned int msb;
2482
2483  do {
2484    --n;
2485
2486    if (parts[n] != 0) {
2487      msb = partMSB(parts[n]);
2488
2489      return msb + n * integerPartWidth;
2490    }
2491  } while (n);
2492
2493  return -1U;
2494}
2495
2496/* Copy the bit vector of width srcBITS from SRC, starting at bit
2497   srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2498   the least significant bit of DST.  All high bits above srcBITS in
2499   DST are zero-filled.  */
2500void
2501APInt::tcExtract(integerPart *dst, unsigned int dstCount,const integerPart *src,
2502                 unsigned int srcBits, unsigned int srcLSB)
2503{
2504  unsigned int firstSrcPart, dstParts, shift, n;
2505
2506  dstParts = (srcBits + integerPartWidth - 1) / integerPartWidth;
2507  assert(dstParts <= dstCount);
2508
2509  firstSrcPart = srcLSB / integerPartWidth;
2510  tcAssign (dst, src + firstSrcPart, dstParts);
2511
2512  shift = srcLSB % integerPartWidth;
2513  tcShiftRight (dst, dstParts, shift);
2514
2515  /* We now have (dstParts * integerPartWidth - shift) bits from SRC
2516     in DST.  If this is less that srcBits, append the rest, else
2517     clear the high bits.  */
2518  n = dstParts * integerPartWidth - shift;
2519  if (n < srcBits) {
2520    integerPart mask = lowBitMask (srcBits - n);
2521    dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2522                          << n % integerPartWidth);
2523  } else if (n > srcBits) {
2524    if (srcBits % integerPartWidth)
2525      dst[dstParts - 1] &= lowBitMask (srcBits % integerPartWidth);
2526  }
2527
2528  /* Clear high parts.  */
2529  while (dstParts < dstCount)
2530    dst[dstParts++] = 0;
2531}
2532
2533/* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2534integerPart
2535APInt::tcAdd(integerPart *dst, const integerPart *rhs,
2536             integerPart c, unsigned int parts)
2537{
2538  unsigned int i;
2539
2540  assert(c <= 1);
2541
2542  for (i = 0; i < parts; i++) {
2543    integerPart l;
2544
2545    l = dst[i];
2546    if (c) {
2547      dst[i] += rhs[i] + 1;
2548      c = (dst[i] <= l);
2549    } else {
2550      dst[i] += rhs[i];
2551      c = (dst[i] < l);
2552    }
2553  }
2554
2555  return c;
2556}
2557
2558/* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2559integerPart
2560APInt::tcSubtract(integerPart *dst, const integerPart *rhs,
2561                  integerPart c, unsigned int parts)
2562{
2563  unsigned int i;
2564
2565  assert(c <= 1);
2566
2567  for (i = 0; i < parts; i++) {
2568    integerPart l;
2569
2570    l = dst[i];
2571    if (c) {
2572      dst[i] -= rhs[i] + 1;
2573      c = (dst[i] >= l);
2574    } else {
2575      dst[i] -= rhs[i];
2576      c = (dst[i] > l);
2577    }
2578  }
2579
2580  return c;
2581}
2582
2583/* Negate a bignum in-place.  */
2584void
2585APInt::tcNegate(integerPart *dst, unsigned int parts)
2586{
2587  tcComplement(dst, parts);
2588  tcIncrement(dst, parts);
2589}
2590
2591/*  DST += SRC * MULTIPLIER + CARRY   if add is true
2592    DST  = SRC * MULTIPLIER + CARRY   if add is false
2593
2594    Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2595    they must start at the same point, i.e. DST == SRC.
2596
2597    If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2598    returned.  Otherwise DST is filled with the least significant
2599    DSTPARTS parts of the result, and if all of the omitted higher
2600    parts were zero return zero, otherwise overflow occurred and
2601    return one.  */
2602int
2603APInt::tcMultiplyPart(integerPart *dst, const integerPart *src,
2604                      integerPart multiplier, integerPart carry,
2605                      unsigned int srcParts, unsigned int dstParts,
2606                      bool add)
2607{
2608  unsigned int i, n;
2609
2610  /* Otherwise our writes of DST kill our later reads of SRC.  */
2611  assert(dst <= src || dst >= src + srcParts);
2612  assert(dstParts <= srcParts + 1);
2613
2614  /* N loops; minimum of dstParts and srcParts.  */
2615  n = dstParts < srcParts ? dstParts: srcParts;
2616
2617  for (i = 0; i < n; i++) {
2618    integerPart low, mid, high, srcPart;
2619
2620      /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2621
2622         This cannot overflow, because
2623
2624         (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2625
2626         which is less than n^2.  */
2627
2628    srcPart = src[i];
2629
2630    if (multiplier == 0 || srcPart == 0)        {
2631      low = carry;
2632      high = 0;
2633    } else {
2634      low = lowHalf(srcPart) * lowHalf(multiplier);
2635      high = highHalf(srcPart) * highHalf(multiplier);
2636
2637      mid = lowHalf(srcPart) * highHalf(multiplier);
2638      high += highHalf(mid);
2639      mid <<= integerPartWidth / 2;
2640      if (low + mid < low)
2641        high++;
2642      low += mid;
2643
2644      mid = highHalf(srcPart) * lowHalf(multiplier);
2645      high += highHalf(mid);
2646      mid <<= integerPartWidth / 2;
2647      if (low + mid < low)
2648        high++;
2649      low += mid;
2650
2651      /* Now add carry.  */
2652      if (low + carry < low)
2653        high++;
2654      low += carry;
2655    }
2656
2657    if (add) {
2658      /* And now DST[i], and store the new low part there.  */
2659      if (low + dst[i] < low)
2660        high++;
2661      dst[i] += low;
2662    } else
2663      dst[i] = low;
2664
2665    carry = high;
2666  }
2667
2668  if (i < dstParts) {
2669    /* Full multiplication, there is no overflow.  */
2670    assert(i + 1 == dstParts);
2671    dst[i] = carry;
2672    return 0;
2673  } else {
2674    /* We overflowed if there is carry.  */
2675    if (carry)
2676      return 1;
2677
2678    /* We would overflow if any significant unwritten parts would be
2679       non-zero.  This is true if any remaining src parts are non-zero
2680       and the multiplier is non-zero.  */
2681    if (multiplier)
2682      for (; i < srcParts; i++)
2683        if (src[i])
2684          return 1;
2685
2686    /* We fitted in the narrow destination.  */
2687    return 0;
2688  }
2689}
2690
2691/* DST = LHS * RHS, where DST has the same width as the operands and
2692   is filled with the least significant parts of the result.  Returns
2693   one if overflow occurred, otherwise zero.  DST must be disjoint
2694   from both operands.  */
2695int
2696APInt::tcMultiply(integerPart *dst, const integerPart *lhs,
2697                  const integerPart *rhs, unsigned int parts)
2698{
2699  unsigned int i;
2700  int overflow;
2701
2702  assert(dst != lhs && dst != rhs);
2703
2704  overflow = 0;
2705  tcSet(dst, 0, parts);
2706
2707  for (i = 0; i < parts; i++)
2708    overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2709                               parts - i, true);
2710
2711  return overflow;
2712}
2713
2714/* DST = LHS * RHS, where DST has width the sum of the widths of the
2715   operands.  No overflow occurs.  DST must be disjoint from both
2716   operands.  Returns the number of parts required to hold the
2717   result.  */
2718unsigned int
2719APInt::tcFullMultiply(integerPart *dst, const integerPart *lhs,
2720                      const integerPart *rhs, unsigned int lhsParts,
2721                      unsigned int rhsParts)
2722{
2723  /* Put the narrower number on the LHS for less loops below.  */
2724  if (lhsParts > rhsParts) {
2725    return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2726  } else {
2727    unsigned int n;
2728
2729    assert(dst != lhs && dst != rhs);
2730
2731    tcSet(dst, 0, rhsParts);
2732
2733    for (n = 0; n < lhsParts; n++)
2734      tcMultiplyPart(&dst[n], rhs, lhs[n], 0, rhsParts, rhsParts + 1, true);
2735
2736    n = lhsParts + rhsParts;
2737
2738    return n - (dst[n - 1] == 0);
2739  }
2740}
2741
2742/* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2743   Otherwise set LHS to LHS / RHS with the fractional part discarded,
2744   set REMAINDER to the remainder, return zero.  i.e.
2745
2746   OLD_LHS = RHS * LHS + REMAINDER
2747
2748   SCRATCH is a bignum of the same size as the operands and result for
2749   use by the routine; its contents need not be initialized and are
2750   destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2751*/
2752int
2753APInt::tcDivide(integerPart *lhs, const integerPart *rhs,
2754                integerPart *remainder, integerPart *srhs,
2755                unsigned int parts)
2756{
2757  unsigned int n, shiftCount;
2758  integerPart mask;
2759
2760  assert(lhs != remainder && lhs != srhs && remainder != srhs);
2761
2762  shiftCount = tcMSB(rhs, parts) + 1;
2763  if (shiftCount == 0)
2764    return true;
2765
2766  shiftCount = parts * integerPartWidth - shiftCount;
2767  n = shiftCount / integerPartWidth;
2768  mask = (integerPart) 1 << (shiftCount % integerPartWidth);
2769
2770  tcAssign(srhs, rhs, parts);
2771  tcShiftLeft(srhs, parts, shiftCount);
2772  tcAssign(remainder, lhs, parts);
2773  tcSet(lhs, 0, parts);
2774
2775  /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2776     the total.  */
2777  for (;;) {
2778      int compare;
2779
2780      compare = tcCompare(remainder, srhs, parts);
2781      if (compare >= 0) {
2782        tcSubtract(remainder, srhs, 0, parts);
2783        lhs[n] |= mask;
2784      }
2785
2786      if (shiftCount == 0)
2787        break;
2788      shiftCount--;
2789      tcShiftRight(srhs, parts, 1);
2790      if ((mask >>= 1) == 0)
2791        mask = (integerPart) 1 << (integerPartWidth - 1), n--;
2792  }
2793
2794  return false;
2795}
2796
2797/* Shift a bignum left COUNT bits in-place.  Shifted in bits are zero.
2798   There are no restrictions on COUNT.  */
2799void
2800APInt::tcShiftLeft(integerPart *dst, unsigned int parts, unsigned int count)
2801{
2802  if (count) {
2803    unsigned int jump, shift;
2804
2805    /* Jump is the inter-part jump; shift is is intra-part shift.  */
2806    jump = count / integerPartWidth;
2807    shift = count % integerPartWidth;
2808
2809    while (parts > jump) {
2810      integerPart part;
2811
2812      parts--;
2813
2814      /* dst[i] comes from the two parts src[i - jump] and, if we have
2815         an intra-part shift, src[i - jump - 1].  */
2816      part = dst[parts - jump];
2817      if (shift) {
2818        part <<= shift;
2819        if (parts >= jump + 1)
2820          part |= dst[parts - jump - 1] >> (integerPartWidth - shift);
2821      }
2822
2823      dst[parts] = part;
2824    }
2825
2826    while (parts > 0)
2827      dst[--parts] = 0;
2828  }
2829}
2830
2831/* Shift a bignum right COUNT bits in-place.  Shifted in bits are
2832   zero.  There are no restrictions on COUNT.  */
2833void
2834APInt::tcShiftRight(integerPart *dst, unsigned int parts, unsigned int count)
2835{
2836  if (count) {
2837    unsigned int i, jump, shift;
2838
2839    /* Jump is the inter-part jump; shift is is intra-part shift.  */
2840    jump = count / integerPartWidth;
2841    shift = count % integerPartWidth;
2842
2843    /* Perform the shift.  This leaves the most significant COUNT bits
2844       of the result at zero.  */
2845    for (i = 0; i < parts; i++) {
2846      integerPart part;
2847
2848      if (i + jump >= parts) {
2849        part = 0;
2850      } else {
2851        part = dst[i + jump];
2852        if (shift) {
2853          part >>= shift;
2854          if (i + jump + 1 < parts)
2855            part |= dst[i + jump + 1] << (integerPartWidth - shift);
2856        }
2857      }
2858
2859      dst[i] = part;
2860    }
2861  }
2862}
2863
2864/* Bitwise and of two bignums.  */
2865void
2866APInt::tcAnd(integerPart *dst, const integerPart *rhs, unsigned int parts)
2867{
2868  unsigned int i;
2869
2870  for (i = 0; i < parts; i++)
2871    dst[i] &= rhs[i];
2872}
2873
2874/* Bitwise inclusive or of two bignums.  */
2875void
2876APInt::tcOr(integerPart *dst, const integerPart *rhs, unsigned int parts)
2877{
2878  unsigned int i;
2879
2880  for (i = 0; i < parts; i++)
2881    dst[i] |= rhs[i];
2882}
2883
2884/* Bitwise exclusive or of two bignums.  */
2885void
2886APInt::tcXor(integerPart *dst, const integerPart *rhs, unsigned int parts)
2887{
2888  unsigned int i;
2889
2890  for (i = 0; i < parts; i++)
2891    dst[i] ^= rhs[i];
2892}
2893
2894/* Complement a bignum in-place.  */
2895void
2896APInt::tcComplement(integerPart *dst, unsigned int parts)
2897{
2898  unsigned int i;
2899
2900  for (i = 0; i < parts; i++)
2901    dst[i] = ~dst[i];
2902}
2903
2904/* Comparison (unsigned) of two bignums.  */
2905int
2906APInt::tcCompare(const integerPart *lhs, const integerPart *rhs,
2907                 unsigned int parts)
2908{
2909  while (parts) {
2910      parts--;
2911      if (lhs[parts] == rhs[parts])
2912        continue;
2913
2914      if (lhs[parts] > rhs[parts])
2915        return 1;
2916      else
2917        return -1;
2918    }
2919
2920  return 0;
2921}
2922
2923/* Increment a bignum in-place, return the carry flag.  */
2924integerPart
2925APInt::tcIncrement(integerPart *dst, unsigned int parts)
2926{
2927  unsigned int i;
2928
2929  for (i = 0; i < parts; i++)
2930    if (++dst[i] != 0)
2931      break;
2932
2933  return i == parts;
2934}
2935
2936/* Set the least significant BITS bits of a bignum, clear the
2937   rest.  */
2938void
2939APInt::tcSetLeastSignificantBits(integerPart *dst, unsigned int parts,
2940                                 unsigned int bits)
2941{
2942  unsigned int i;
2943
2944  i = 0;
2945  while (bits > integerPartWidth) {
2946    dst[i++] = ~(integerPart) 0;
2947    bits -= integerPartWidth;
2948  }
2949
2950  if (bits)
2951    dst[i++] = ~(integerPart) 0 >> (integerPartWidth - bits);
2952
2953  while (i < parts)
2954    dst[i++] = 0;
2955}
2956