1//===-- APFloat.cpp - Implement APFloat class -----------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements a class to represent arbitrary precision floating 11// point values and provide a variety of arithmetic operations on them. 12// 13//===----------------------------------------------------------------------===// 14 15#include "llvm/ADT/APFloat.h" 16#include "llvm/ADT/APSInt.h" 17#include "llvm/ADT/FoldingSet.h" 18#include "llvm/ADT/Hashing.h" 19#include "llvm/ADT/StringExtras.h" 20#include "llvm/ADT/StringRef.h" 21#include "llvm/Support/ErrorHandling.h" 22#include "llvm/Support/MathExtras.h" 23#include <cstring> 24#include <limits.h> 25 26using namespace llvm; 27 28/// A macro used to combine two fcCategory enums into one key which can be used 29/// in a switch statement to classify how the interaction of two APFloat's 30/// categories affects an operation. 31/// 32/// TODO: If clang source code is ever allowed to use constexpr in its own 33/// codebase, change this into a static inline function. 34#define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs)) 35 36/* Assumed in hexadecimal significand parsing, and conversion to 37 hexadecimal strings. */ 38static_assert(integerPartWidth % 4 == 0, "Part width must be divisible by 4!"); 39 40namespace llvm { 41 42 /* Represents floating point arithmetic semantics. */ 43 struct fltSemantics { 44 /* The largest E such that 2^E is representable; this matches the 45 definition of IEEE 754. */ 46 APFloat::ExponentType maxExponent; 47 48 /* The smallest E such that 2^E is a normalized number; this 49 matches the definition of IEEE 754. */ 50 APFloat::ExponentType minExponent; 51 52 /* Number of bits in the significand. This includes the integer 53 bit. */ 54 unsigned int precision; 55 56 /* Number of bits actually used in the semantics. */ 57 unsigned int sizeInBits; 58 }; 59 60 const fltSemantics APFloat::IEEEhalf = { 15, -14, 11, 16 }; 61 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, 32 }; 62 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, 64 }; 63 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, 128 }; 64 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, 80 }; 65 const fltSemantics APFloat::Bogus = { 0, 0, 0, 0 }; 66 67 /* The PowerPC format consists of two doubles. It does not map cleanly 68 onto the usual format above. It is approximated using twice the 69 mantissa bits. Note that for exponents near the double minimum, 70 we no longer can represent the full 106 mantissa bits, so those 71 will be treated as denormal numbers. 72 73 FIXME: While this approximation is equivalent to what GCC uses for 74 compile-time arithmetic on PPC double-double numbers, it is not able 75 to represent all possible values held by a PPC double-double number, 76 for example: (long double) 1.0 + (long double) 0x1p-106 77 Should this be replaced by a full emulation of PPC double-double? */ 78 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022 + 53, 53 + 53, 128 }; 79 80 /* A tight upper bound on number of parts required to hold the value 81 pow(5, power) is 82 83 power * 815 / (351 * integerPartWidth) + 1 84 85 However, whilst the result may require only this many parts, 86 because we are multiplying two values to get it, the 87 multiplication may require an extra part with the excess part 88 being zero (consider the trivial case of 1 * 1, tcFullMultiply 89 requires two parts to hold the single-part result). So we add an 90 extra one to guarantee enough space whilst multiplying. */ 91 const unsigned int maxExponent = 16383; 92 const unsigned int maxPrecision = 113; 93 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; 94 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) 95 / (351 * integerPartWidth)); 96} 97 98/* A bunch of private, handy routines. */ 99 100static inline unsigned int 101partCountForBits(unsigned int bits) 102{ 103 return ((bits) + integerPartWidth - 1) / integerPartWidth; 104} 105 106/* Returns 0U-9U. Return values >= 10U are not digits. */ 107static inline unsigned int 108decDigitValue(unsigned int c) 109{ 110 return c - '0'; 111} 112 113/* Return the value of a decimal exponent of the form 114 [+-]ddddddd. 115 116 If the exponent overflows, returns a large exponent with the 117 appropriate sign. */ 118static int 119readExponent(StringRef::iterator begin, StringRef::iterator end) 120{ 121 bool isNegative; 122 unsigned int absExponent; 123 const unsigned int overlargeExponent = 24000; /* FIXME. */ 124 StringRef::iterator p = begin; 125 126 assert(p != end && "Exponent has no digits"); 127 128 isNegative = (*p == '-'); 129 if (*p == '-' || *p == '+') { 130 p++; 131 assert(p != end && "Exponent has no digits"); 132 } 133 134 absExponent = decDigitValue(*p++); 135 assert(absExponent < 10U && "Invalid character in exponent"); 136 137 for (; p != end; ++p) { 138 unsigned int value; 139 140 value = decDigitValue(*p); 141 assert(value < 10U && "Invalid character in exponent"); 142 143 value += absExponent * 10; 144 if (absExponent >= overlargeExponent) { 145 absExponent = overlargeExponent; 146 p = end; /* outwit assert below */ 147 break; 148 } 149 absExponent = value; 150 } 151 152 assert(p == end && "Invalid exponent in exponent"); 153 154 if (isNegative) 155 return -(int) absExponent; 156 else 157 return (int) absExponent; 158} 159 160/* This is ugly and needs cleaning up, but I don't immediately see 161 how whilst remaining safe. */ 162static int 163totalExponent(StringRef::iterator p, StringRef::iterator end, 164 int exponentAdjustment) 165{ 166 int unsignedExponent; 167 bool negative, overflow; 168 int exponent = 0; 169 170 assert(p != end && "Exponent has no digits"); 171 172 negative = *p == '-'; 173 if (*p == '-' || *p == '+') { 174 p++; 175 assert(p != end && "Exponent has no digits"); 176 } 177 178 unsignedExponent = 0; 179 overflow = false; 180 for (; p != end; ++p) { 181 unsigned int value; 182 183 value = decDigitValue(*p); 184 assert(value < 10U && "Invalid character in exponent"); 185 186 unsignedExponent = unsignedExponent * 10 + value; 187 if (unsignedExponent > 32767) { 188 overflow = true; 189 break; 190 } 191 } 192 193 if (exponentAdjustment > 32767 || exponentAdjustment < -32768) 194 overflow = true; 195 196 if (!overflow) { 197 exponent = unsignedExponent; 198 if (negative) 199 exponent = -exponent; 200 exponent += exponentAdjustment; 201 if (exponent > 32767 || exponent < -32768) 202 overflow = true; 203 } 204 205 if (overflow) 206 exponent = negative ? -32768: 32767; 207 208 return exponent; 209} 210 211static StringRef::iterator 212skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, 213 StringRef::iterator *dot) 214{ 215 StringRef::iterator p = begin; 216 *dot = end; 217 while (p != end && *p == '0') 218 p++; 219 220 if (p != end && *p == '.') { 221 *dot = p++; 222 223 assert(end - begin != 1 && "Significand has no digits"); 224 225 while (p != end && *p == '0') 226 p++; 227 } 228 229 return p; 230} 231 232/* Given a normal decimal floating point number of the form 233 234 dddd.dddd[eE][+-]ddd 235 236 where the decimal point and exponent are optional, fill out the 237 structure D. Exponent is appropriate if the significand is 238 treated as an integer, and normalizedExponent if the significand 239 is taken to have the decimal point after a single leading 240 non-zero digit. 241 242 If the value is zero, V->firstSigDigit points to a non-digit, and 243 the return exponent is zero. 244*/ 245struct decimalInfo { 246 const char *firstSigDigit; 247 const char *lastSigDigit; 248 int exponent; 249 int normalizedExponent; 250}; 251 252static void 253interpretDecimal(StringRef::iterator begin, StringRef::iterator end, 254 decimalInfo *D) 255{ 256 StringRef::iterator dot = end; 257 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot); 258 259 D->firstSigDigit = p; 260 D->exponent = 0; 261 D->normalizedExponent = 0; 262 263 for (; p != end; ++p) { 264 if (*p == '.') { 265 assert(dot == end && "String contains multiple dots"); 266 dot = p++; 267 if (p == end) 268 break; 269 } 270 if (decDigitValue(*p) >= 10U) 271 break; 272 } 273 274 if (p != end) { 275 assert((*p == 'e' || *p == 'E') && "Invalid character in significand"); 276 assert(p != begin && "Significand has no digits"); 277 assert((dot == end || p - begin != 1) && "Significand has no digits"); 278 279 /* p points to the first non-digit in the string */ 280 D->exponent = readExponent(p + 1, end); 281 282 /* Implied decimal point? */ 283 if (dot == end) 284 dot = p; 285 } 286 287 /* If number is all zeroes accept any exponent. */ 288 if (p != D->firstSigDigit) { 289 /* Drop insignificant trailing zeroes. */ 290 if (p != begin) { 291 do 292 do 293 p--; 294 while (p != begin && *p == '0'); 295 while (p != begin && *p == '.'); 296 } 297 298 /* Adjust the exponents for any decimal point. */ 299 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p)); 300 D->normalizedExponent = (D->exponent + 301 static_cast<APFloat::ExponentType>((p - D->firstSigDigit) 302 - (dot > D->firstSigDigit && dot < p))); 303 } 304 305 D->lastSigDigit = p; 306} 307 308/* Return the trailing fraction of a hexadecimal number. 309 DIGITVALUE is the first hex digit of the fraction, P points to 310 the next digit. */ 311static lostFraction 312trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, 313 unsigned int digitValue) 314{ 315 unsigned int hexDigit; 316 317 /* If the first trailing digit isn't 0 or 8 we can work out the 318 fraction immediately. */ 319 if (digitValue > 8) 320 return lfMoreThanHalf; 321 else if (digitValue < 8 && digitValue > 0) 322 return lfLessThanHalf; 323 324 // Otherwise we need to find the first non-zero digit. 325 while (p != end && (*p == '0' || *p == '.')) 326 p++; 327 328 assert(p != end && "Invalid trailing hexadecimal fraction!"); 329 330 hexDigit = hexDigitValue(*p); 331 332 /* If we ran off the end it is exactly zero or one-half, otherwise 333 a little more. */ 334 if (hexDigit == -1U) 335 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; 336 else 337 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; 338} 339 340/* Return the fraction lost were a bignum truncated losing the least 341 significant BITS bits. */ 342static lostFraction 343lostFractionThroughTruncation(const integerPart *parts, 344 unsigned int partCount, 345 unsigned int bits) 346{ 347 unsigned int lsb; 348 349 lsb = APInt::tcLSB(parts, partCount); 350 351 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */ 352 if (bits <= lsb) 353 return lfExactlyZero; 354 if (bits == lsb + 1) 355 return lfExactlyHalf; 356 if (bits <= partCount * integerPartWidth && 357 APInt::tcExtractBit(parts, bits - 1)) 358 return lfMoreThanHalf; 359 360 return lfLessThanHalf; 361} 362 363/* Shift DST right BITS bits noting lost fraction. */ 364static lostFraction 365shiftRight(integerPart *dst, unsigned int parts, unsigned int bits) 366{ 367 lostFraction lost_fraction; 368 369 lost_fraction = lostFractionThroughTruncation(dst, parts, bits); 370 371 APInt::tcShiftRight(dst, parts, bits); 372 373 return lost_fraction; 374} 375 376/* Combine the effect of two lost fractions. */ 377static lostFraction 378combineLostFractions(lostFraction moreSignificant, 379 lostFraction lessSignificant) 380{ 381 if (lessSignificant != lfExactlyZero) { 382 if (moreSignificant == lfExactlyZero) 383 moreSignificant = lfLessThanHalf; 384 else if (moreSignificant == lfExactlyHalf) 385 moreSignificant = lfMoreThanHalf; 386 } 387 388 return moreSignificant; 389} 390 391/* The error from the true value, in half-ulps, on multiplying two 392 floating point numbers, which differ from the value they 393 approximate by at most HUE1 and HUE2 half-ulps, is strictly less 394 than the returned value. 395 396 See "How to Read Floating Point Numbers Accurately" by William D 397 Clinger. */ 398static unsigned int 399HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) 400{ 401 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8)); 402 403 if (HUerr1 + HUerr2 == 0) 404 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ 405 else 406 return inexactMultiply + 2 * (HUerr1 + HUerr2); 407} 408 409/* The number of ulps from the boundary (zero, or half if ISNEAREST) 410 when the least significant BITS are truncated. BITS cannot be 411 zero. */ 412static integerPart 413ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest) 414{ 415 unsigned int count, partBits; 416 integerPart part, boundary; 417 418 assert(bits != 0); 419 420 bits--; 421 count = bits / integerPartWidth; 422 partBits = bits % integerPartWidth + 1; 423 424 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits)); 425 426 if (isNearest) 427 boundary = (integerPart) 1 << (partBits - 1); 428 else 429 boundary = 0; 430 431 if (count == 0) { 432 if (part - boundary <= boundary - part) 433 return part - boundary; 434 else 435 return boundary - part; 436 } 437 438 if (part == boundary) { 439 while (--count) 440 if (parts[count]) 441 return ~(integerPart) 0; /* A lot. */ 442 443 return parts[0]; 444 } else if (part == boundary - 1) { 445 while (--count) 446 if (~parts[count]) 447 return ~(integerPart) 0; /* A lot. */ 448 449 return -parts[0]; 450 } 451 452 return ~(integerPart) 0; /* A lot. */ 453} 454 455/* Place pow(5, power) in DST, and return the number of parts used. 456 DST must be at least one part larger than size of the answer. */ 457static unsigned int 458powerOf5(integerPart *dst, unsigned int power) 459{ 460 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 461 15625, 78125 }; 462 integerPart pow5s[maxPowerOfFiveParts * 2 + 5]; 463 pow5s[0] = 78125 * 5; 464 465 unsigned int partsCount[16] = { 1 }; 466 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; 467 unsigned int result; 468 assert(power <= maxExponent); 469 470 p1 = dst; 471 p2 = scratch; 472 473 *p1 = firstEightPowers[power & 7]; 474 power >>= 3; 475 476 result = 1; 477 pow5 = pow5s; 478 479 for (unsigned int n = 0; power; power >>= 1, n++) { 480 unsigned int pc; 481 482 pc = partsCount[n]; 483 484 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */ 485 if (pc == 0) { 486 pc = partsCount[n - 1]; 487 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc); 488 pc *= 2; 489 if (pow5[pc - 1] == 0) 490 pc--; 491 partsCount[n] = pc; 492 } 493 494 if (power & 1) { 495 integerPart *tmp; 496 497 APInt::tcFullMultiply(p2, p1, pow5, result, pc); 498 result += pc; 499 if (p2[result - 1] == 0) 500 result--; 501 502 /* Now result is in p1 with partsCount parts and p2 is scratch 503 space. */ 504 tmp = p1, p1 = p2, p2 = tmp; 505 } 506 507 pow5 += pc; 508 } 509 510 if (p1 != dst) 511 APInt::tcAssign(dst, p1, result); 512 513 return result; 514} 515 516/* Zero at the end to avoid modular arithmetic when adding one; used 517 when rounding up during hexadecimal output. */ 518static const char hexDigitsLower[] = "0123456789abcdef0"; 519static const char hexDigitsUpper[] = "0123456789ABCDEF0"; 520static const char infinityL[] = "infinity"; 521static const char infinityU[] = "INFINITY"; 522static const char NaNL[] = "nan"; 523static const char NaNU[] = "NAN"; 524 525/* Write out an integerPart in hexadecimal, starting with the most 526 significant nibble. Write out exactly COUNT hexdigits, return 527 COUNT. */ 528static unsigned int 529partAsHex (char *dst, integerPart part, unsigned int count, 530 const char *hexDigitChars) 531{ 532 unsigned int result = count; 533 534 assert(count != 0 && count <= integerPartWidth / 4); 535 536 part >>= (integerPartWidth - 4 * count); 537 while (count--) { 538 dst[count] = hexDigitChars[part & 0xf]; 539 part >>= 4; 540 } 541 542 return result; 543} 544 545/* Write out an unsigned decimal integer. */ 546static char * 547writeUnsignedDecimal (char *dst, unsigned int n) 548{ 549 char buff[40], *p; 550 551 p = buff; 552 do 553 *p++ = '0' + n % 10; 554 while (n /= 10); 555 556 do 557 *dst++ = *--p; 558 while (p != buff); 559 560 return dst; 561} 562 563/* Write out a signed decimal integer. */ 564static char * 565writeSignedDecimal (char *dst, int value) 566{ 567 if (value < 0) { 568 *dst++ = '-'; 569 dst = writeUnsignedDecimal(dst, -(unsigned) value); 570 } else 571 dst = writeUnsignedDecimal(dst, value); 572 573 return dst; 574} 575 576/* Constructors. */ 577void 578APFloat::initialize(const fltSemantics *ourSemantics) 579{ 580 unsigned int count; 581 582 semantics = ourSemantics; 583 count = partCount(); 584 if (count > 1) 585 significand.parts = new integerPart[count]; 586} 587 588void 589APFloat::freeSignificand() 590{ 591 if (needsCleanup()) 592 delete [] significand.parts; 593} 594 595void 596APFloat::assign(const APFloat &rhs) 597{ 598 assert(semantics == rhs.semantics); 599 600 sign = rhs.sign; 601 category = rhs.category; 602 exponent = rhs.exponent; 603 if (isFiniteNonZero() || category == fcNaN) 604 copySignificand(rhs); 605} 606 607void 608APFloat::copySignificand(const APFloat &rhs) 609{ 610 assert(isFiniteNonZero() || category == fcNaN); 611 assert(rhs.partCount() >= partCount()); 612 613 APInt::tcAssign(significandParts(), rhs.significandParts(), 614 partCount()); 615} 616 617/* Make this number a NaN, with an arbitrary but deterministic value 618 for the significand. If double or longer, this is a signalling NaN, 619 which may not be ideal. If float, this is QNaN(0). */ 620void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) 621{ 622 category = fcNaN; 623 sign = Negative; 624 625 integerPart *significand = significandParts(); 626 unsigned numParts = partCount(); 627 628 // Set the significand bits to the fill. 629 if (!fill || fill->getNumWords() < numParts) 630 APInt::tcSet(significand, 0, numParts); 631 if (fill) { 632 APInt::tcAssign(significand, fill->getRawData(), 633 std::min(fill->getNumWords(), numParts)); 634 635 // Zero out the excess bits of the significand. 636 unsigned bitsToPreserve = semantics->precision - 1; 637 unsigned part = bitsToPreserve / 64; 638 bitsToPreserve %= 64; 639 significand[part] &= ((1ULL << bitsToPreserve) - 1); 640 for (part++; part != numParts; ++part) 641 significand[part] = 0; 642 } 643 644 unsigned QNaNBit = semantics->precision - 2; 645 646 if (SNaN) { 647 // We always have to clear the QNaN bit to make it an SNaN. 648 APInt::tcClearBit(significand, QNaNBit); 649 650 // If there are no bits set in the payload, we have to set 651 // *something* to make it a NaN instead of an infinity; 652 // conventionally, this is the next bit down from the QNaN bit. 653 if (APInt::tcIsZero(significand, numParts)) 654 APInt::tcSetBit(significand, QNaNBit - 1); 655 } else { 656 // We always have to set the QNaN bit to make it a QNaN. 657 APInt::tcSetBit(significand, QNaNBit); 658 } 659 660 // For x87 extended precision, we want to make a NaN, not a 661 // pseudo-NaN. Maybe we should expose the ability to make 662 // pseudo-NaNs? 663 if (semantics == &APFloat::x87DoubleExtended) 664 APInt::tcSetBit(significand, QNaNBit + 1); 665} 666 667APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative, 668 const APInt *fill) { 669 APFloat value(Sem, uninitialized); 670 value.makeNaN(SNaN, Negative, fill); 671 return value; 672} 673 674APFloat & 675APFloat::operator=(const APFloat &rhs) 676{ 677 if (this != &rhs) { 678 if (semantics != rhs.semantics) { 679 freeSignificand(); 680 initialize(rhs.semantics); 681 } 682 assign(rhs); 683 } 684 685 return *this; 686} 687 688APFloat & 689APFloat::operator=(APFloat &&rhs) { 690 freeSignificand(); 691 692 semantics = rhs.semantics; 693 significand = rhs.significand; 694 exponent = rhs.exponent; 695 category = rhs.category; 696 sign = rhs.sign; 697 698 rhs.semantics = &Bogus; 699 return *this; 700} 701 702bool 703APFloat::isDenormal() const { 704 return isFiniteNonZero() && (exponent == semantics->minExponent) && 705 (APInt::tcExtractBit(significandParts(), 706 semantics->precision - 1) == 0); 707} 708 709bool 710APFloat::isSmallest() const { 711 // The smallest number by magnitude in our format will be the smallest 712 // denormal, i.e. the floating point number with exponent being minimum 713 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0). 714 return isFiniteNonZero() && exponent == semantics->minExponent && 715 significandMSB() == 0; 716} 717 718bool APFloat::isSignificandAllOnes() const { 719 // Test if the significand excluding the integral bit is all ones. This allows 720 // us to test for binade boundaries. 721 const integerPart *Parts = significandParts(); 722 const unsigned PartCount = partCount(); 723 for (unsigned i = 0; i < PartCount - 1; i++) 724 if (~Parts[i]) 725 return false; 726 727 // Set the unused high bits to all ones when we compare. 728 const unsigned NumHighBits = 729 PartCount*integerPartWidth - semantics->precision + 1; 730 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to " 731 "fill than integerPartWidth"); 732 const integerPart HighBitFill = 733 ~integerPart(0) << (integerPartWidth - NumHighBits); 734 if (~(Parts[PartCount - 1] | HighBitFill)) 735 return false; 736 737 return true; 738} 739 740bool APFloat::isSignificandAllZeros() const { 741 // Test if the significand excluding the integral bit is all zeros. This 742 // allows us to test for binade boundaries. 743 const integerPart *Parts = significandParts(); 744 const unsigned PartCount = partCount(); 745 746 for (unsigned i = 0; i < PartCount - 1; i++) 747 if (Parts[i]) 748 return false; 749 750 const unsigned NumHighBits = 751 PartCount*integerPartWidth - semantics->precision + 1; 752 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to " 753 "clear than integerPartWidth"); 754 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits; 755 756 if (Parts[PartCount - 1] & HighBitMask) 757 return false; 758 759 return true; 760} 761 762bool 763APFloat::isLargest() const { 764 // The largest number by magnitude in our format will be the floating point 765 // number with maximum exponent and with significand that is all ones. 766 return isFiniteNonZero() && exponent == semantics->maxExponent 767 && isSignificandAllOnes(); 768} 769 770bool 771APFloat::isInteger() const { 772 // This could be made more efficient; I'm going for obviously correct. 773 if (!isFinite()) return false; 774 APFloat truncated = *this; 775 truncated.roundToIntegral(rmTowardZero); 776 return compare(truncated) == cmpEqual; 777} 778 779bool 780APFloat::bitwiseIsEqual(const APFloat &rhs) const { 781 if (this == &rhs) 782 return true; 783 if (semantics != rhs.semantics || 784 category != rhs.category || 785 sign != rhs.sign) 786 return false; 787 if (category==fcZero || category==fcInfinity) 788 return true; 789 790 if (isFiniteNonZero() && exponent != rhs.exponent) 791 return false; 792 793 return std::equal(significandParts(), significandParts() + partCount(), 794 rhs.significandParts()); 795} 796 797APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) { 798 initialize(&ourSemantics); 799 sign = 0; 800 category = fcNormal; 801 zeroSignificand(); 802 exponent = ourSemantics.precision - 1; 803 significandParts()[0] = value; 804 normalize(rmNearestTiesToEven, lfExactlyZero); 805} 806 807APFloat::APFloat(const fltSemantics &ourSemantics) { 808 initialize(&ourSemantics); 809 category = fcZero; 810 sign = false; 811} 812 813APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) { 814 // Allocates storage if necessary but does not initialize it. 815 initialize(&ourSemantics); 816} 817 818APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text) { 819 initialize(&ourSemantics); 820 convertFromString(text, rmNearestTiesToEven); 821} 822 823APFloat::APFloat(const APFloat &rhs) { 824 initialize(rhs.semantics); 825 assign(rhs); 826} 827 828APFloat::APFloat(APFloat &&rhs) : semantics(&Bogus) { 829 *this = std::move(rhs); 830} 831 832APFloat::~APFloat() 833{ 834 freeSignificand(); 835} 836 837// Profile - This method 'profiles' an APFloat for use with FoldingSet. 838void APFloat::Profile(FoldingSetNodeID& ID) const { 839 ID.Add(bitcastToAPInt()); 840} 841 842unsigned int 843APFloat::partCount() const 844{ 845 return partCountForBits(semantics->precision + 1); 846} 847 848unsigned int 849APFloat::semanticsPrecision(const fltSemantics &semantics) 850{ 851 return semantics.precision; 852} 853APFloat::ExponentType 854APFloat::semanticsMaxExponent(const fltSemantics &semantics) 855{ 856 return semantics.maxExponent; 857} 858APFloat::ExponentType 859APFloat::semanticsMinExponent(const fltSemantics &semantics) 860{ 861 return semantics.minExponent; 862} 863unsigned int 864APFloat::semanticsSizeInBits(const fltSemantics &semantics) 865{ 866 return semantics.sizeInBits; 867} 868 869const integerPart * 870APFloat::significandParts() const 871{ 872 return const_cast<APFloat *>(this)->significandParts(); 873} 874 875integerPart * 876APFloat::significandParts() 877{ 878 if (partCount() > 1) 879 return significand.parts; 880 else 881 return &significand.part; 882} 883 884void 885APFloat::zeroSignificand() 886{ 887 APInt::tcSet(significandParts(), 0, partCount()); 888} 889 890/* Increment an fcNormal floating point number's significand. */ 891void 892APFloat::incrementSignificand() 893{ 894 integerPart carry; 895 896 carry = APInt::tcIncrement(significandParts(), partCount()); 897 898 /* Our callers should never cause us to overflow. */ 899 assert(carry == 0); 900 (void)carry; 901} 902 903/* Add the significand of the RHS. Returns the carry flag. */ 904integerPart 905APFloat::addSignificand(const APFloat &rhs) 906{ 907 integerPart *parts; 908 909 parts = significandParts(); 910 911 assert(semantics == rhs.semantics); 912 assert(exponent == rhs.exponent); 913 914 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount()); 915} 916 917/* Subtract the significand of the RHS with a borrow flag. Returns 918 the borrow flag. */ 919integerPart 920APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow) 921{ 922 integerPart *parts; 923 924 parts = significandParts(); 925 926 assert(semantics == rhs.semantics); 927 assert(exponent == rhs.exponent); 928 929 return APInt::tcSubtract(parts, rhs.significandParts(), borrow, 930 partCount()); 931} 932 933/* Multiply the significand of the RHS. If ADDEND is non-NULL, add it 934 on to the full-precision result of the multiplication. Returns the 935 lost fraction. */ 936lostFraction 937APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) 938{ 939 unsigned int omsb; // One, not zero, based MSB. 940 unsigned int partsCount, newPartsCount, precision; 941 integerPart *lhsSignificand; 942 integerPart scratch[4]; 943 integerPart *fullSignificand; 944 lostFraction lost_fraction; 945 bool ignored; 946 947 assert(semantics == rhs.semantics); 948 949 precision = semantics->precision; 950 951 // Allocate space for twice as many bits as the original significand, plus one 952 // extra bit for the addition to overflow into. 953 newPartsCount = partCountForBits(precision * 2 + 1); 954 955 if (newPartsCount > 4) 956 fullSignificand = new integerPart[newPartsCount]; 957 else 958 fullSignificand = scratch; 959 960 lhsSignificand = significandParts(); 961 partsCount = partCount(); 962 963 APInt::tcFullMultiply(fullSignificand, lhsSignificand, 964 rhs.significandParts(), partsCount, partsCount); 965 966 lost_fraction = lfExactlyZero; 967 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 968 exponent += rhs.exponent; 969 970 // Assume the operands involved in the multiplication are single-precision 971 // FP, and the two multiplicants are: 972 // *this = a23 . a22 ... a0 * 2^e1 973 // rhs = b23 . b22 ... b0 * 2^e2 974 // the result of multiplication is: 975 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2) 976 // Note that there are three significant bits at the left-hand side of the 977 // radix point: two for the multiplication, and an overflow bit for the 978 // addition (that will always be zero at this point). Move the radix point 979 // toward left by two bits, and adjust exponent accordingly. 980 exponent += 2; 981 982 if (addend && addend->isNonZero()) { 983 // The intermediate result of the multiplication has "2 * precision" 984 // signicant bit; adjust the addend to be consistent with mul result. 985 // 986 Significand savedSignificand = significand; 987 const fltSemantics *savedSemantics = semantics; 988 fltSemantics extendedSemantics; 989 opStatus status; 990 unsigned int extendedPrecision; 991 992 // Normalize our MSB to one below the top bit to allow for overflow. 993 extendedPrecision = 2 * precision + 1; 994 if (omsb != extendedPrecision - 1) { 995 assert(extendedPrecision > omsb); 996 APInt::tcShiftLeft(fullSignificand, newPartsCount, 997 (extendedPrecision - 1) - omsb); 998 exponent -= (extendedPrecision - 1) - omsb; 999 } 1000 1001 /* Create new semantics. */ 1002 extendedSemantics = *semantics; 1003 extendedSemantics.precision = extendedPrecision; 1004 1005 if (newPartsCount == 1) 1006 significand.part = fullSignificand[0]; 1007 else 1008 significand.parts = fullSignificand; 1009 semantics = &extendedSemantics; 1010 1011 APFloat extendedAddend(*addend); 1012 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored); 1013 assert(status == opOK); 1014 (void)status; 1015 1016 // Shift the significand of the addend right by one bit. This guarantees 1017 // that the high bit of the significand is zero (same as fullSignificand), 1018 // so the addition will overflow (if it does overflow at all) into the top bit. 1019 lost_fraction = extendedAddend.shiftSignificandRight(1); 1020 assert(lost_fraction == lfExactlyZero && 1021 "Lost precision while shifting addend for fused-multiply-add."); 1022 1023 lost_fraction = addOrSubtractSignificand(extendedAddend, false); 1024 1025 /* Restore our state. */ 1026 if (newPartsCount == 1) 1027 fullSignificand[0] = significand.part; 1028 significand = savedSignificand; 1029 semantics = savedSemantics; 1030 1031 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1032 } 1033 1034 // Convert the result having "2 * precision" significant-bits back to the one 1035 // having "precision" significant-bits. First, move the radix point from 1036 // poision "2*precision - 1" to "precision - 1". The exponent need to be 1037 // adjusted by "2*precision - 1" - "precision - 1" = "precision". 1038 exponent -= precision + 1; 1039 1040 // In case MSB resides at the left-hand side of radix point, shift the 1041 // mantissa right by some amount to make sure the MSB reside right before 1042 // the radix point (i.e. "MSB . rest-significant-bits"). 1043 // 1044 // Note that the result is not normalized when "omsb < precision". So, the 1045 // caller needs to call APFloat::normalize() if normalized value is expected. 1046 if (omsb > precision) { 1047 unsigned int bits, significantParts; 1048 lostFraction lf; 1049 1050 bits = omsb - precision; 1051 significantParts = partCountForBits(omsb); 1052 lf = shiftRight(fullSignificand, significantParts, bits); 1053 lost_fraction = combineLostFractions(lf, lost_fraction); 1054 exponent += bits; 1055 } 1056 1057 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); 1058 1059 if (newPartsCount > 4) 1060 delete [] fullSignificand; 1061 1062 return lost_fraction; 1063} 1064 1065/* Multiply the significands of LHS and RHS to DST. */ 1066lostFraction 1067APFloat::divideSignificand(const APFloat &rhs) 1068{ 1069 unsigned int bit, i, partsCount; 1070 const integerPart *rhsSignificand; 1071 integerPart *lhsSignificand, *dividend, *divisor; 1072 integerPart scratch[4]; 1073 lostFraction lost_fraction; 1074 1075 assert(semantics == rhs.semantics); 1076 1077 lhsSignificand = significandParts(); 1078 rhsSignificand = rhs.significandParts(); 1079 partsCount = partCount(); 1080 1081 if (partsCount > 2) 1082 dividend = new integerPart[partsCount * 2]; 1083 else 1084 dividend = scratch; 1085 1086 divisor = dividend + partsCount; 1087 1088 /* Copy the dividend and divisor as they will be modified in-place. */ 1089 for (i = 0; i < partsCount; i++) { 1090 dividend[i] = lhsSignificand[i]; 1091 divisor[i] = rhsSignificand[i]; 1092 lhsSignificand[i] = 0; 1093 } 1094 1095 exponent -= rhs.exponent; 1096 1097 unsigned int precision = semantics->precision; 1098 1099 /* Normalize the divisor. */ 1100 bit = precision - APInt::tcMSB(divisor, partsCount) - 1; 1101 if (bit) { 1102 exponent += bit; 1103 APInt::tcShiftLeft(divisor, partsCount, bit); 1104 } 1105 1106 /* Normalize the dividend. */ 1107 bit = precision - APInt::tcMSB(dividend, partsCount) - 1; 1108 if (bit) { 1109 exponent -= bit; 1110 APInt::tcShiftLeft(dividend, partsCount, bit); 1111 } 1112 1113 /* Ensure the dividend >= divisor initially for the loop below. 1114 Incidentally, this means that the division loop below is 1115 guaranteed to set the integer bit to one. */ 1116 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) { 1117 exponent--; 1118 APInt::tcShiftLeft(dividend, partsCount, 1); 1119 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); 1120 } 1121 1122 /* Long division. */ 1123 for (bit = precision; bit; bit -= 1) { 1124 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) { 1125 APInt::tcSubtract(dividend, divisor, 0, partsCount); 1126 APInt::tcSetBit(lhsSignificand, bit - 1); 1127 } 1128 1129 APInt::tcShiftLeft(dividend, partsCount, 1); 1130 } 1131 1132 /* Figure out the lost fraction. */ 1133 int cmp = APInt::tcCompare(dividend, divisor, partsCount); 1134 1135 if (cmp > 0) 1136 lost_fraction = lfMoreThanHalf; 1137 else if (cmp == 0) 1138 lost_fraction = lfExactlyHalf; 1139 else if (APInt::tcIsZero(dividend, partsCount)) 1140 lost_fraction = lfExactlyZero; 1141 else 1142 lost_fraction = lfLessThanHalf; 1143 1144 if (partsCount > 2) 1145 delete [] dividend; 1146 1147 return lost_fraction; 1148} 1149 1150unsigned int 1151APFloat::significandMSB() const 1152{ 1153 return APInt::tcMSB(significandParts(), partCount()); 1154} 1155 1156unsigned int 1157APFloat::significandLSB() const 1158{ 1159 return APInt::tcLSB(significandParts(), partCount()); 1160} 1161 1162/* Note that a zero result is NOT normalized to fcZero. */ 1163lostFraction 1164APFloat::shiftSignificandRight(unsigned int bits) 1165{ 1166 /* Our exponent should not overflow. */ 1167 assert((ExponentType) (exponent + bits) >= exponent); 1168 1169 exponent += bits; 1170 1171 return shiftRight(significandParts(), partCount(), bits); 1172} 1173 1174/* Shift the significand left BITS bits, subtract BITS from its exponent. */ 1175void 1176APFloat::shiftSignificandLeft(unsigned int bits) 1177{ 1178 assert(bits < semantics->precision); 1179 1180 if (bits) { 1181 unsigned int partsCount = partCount(); 1182 1183 APInt::tcShiftLeft(significandParts(), partsCount, bits); 1184 exponent -= bits; 1185 1186 assert(!APInt::tcIsZero(significandParts(), partsCount)); 1187 } 1188} 1189 1190APFloat::cmpResult 1191APFloat::compareAbsoluteValue(const APFloat &rhs) const 1192{ 1193 int compare; 1194 1195 assert(semantics == rhs.semantics); 1196 assert(isFiniteNonZero()); 1197 assert(rhs.isFiniteNonZero()); 1198 1199 compare = exponent - rhs.exponent; 1200 1201 /* If exponents are equal, do an unsigned bignum comparison of the 1202 significands. */ 1203 if (compare == 0) 1204 compare = APInt::tcCompare(significandParts(), rhs.significandParts(), 1205 partCount()); 1206 1207 if (compare > 0) 1208 return cmpGreaterThan; 1209 else if (compare < 0) 1210 return cmpLessThan; 1211 else 1212 return cmpEqual; 1213} 1214 1215/* Handle overflow. Sign is preserved. We either become infinity or 1216 the largest finite number. */ 1217APFloat::opStatus 1218APFloat::handleOverflow(roundingMode rounding_mode) 1219{ 1220 /* Infinity? */ 1221 if (rounding_mode == rmNearestTiesToEven || 1222 rounding_mode == rmNearestTiesToAway || 1223 (rounding_mode == rmTowardPositive && !sign) || 1224 (rounding_mode == rmTowardNegative && sign)) { 1225 category = fcInfinity; 1226 return (opStatus) (opOverflow | opInexact); 1227 } 1228 1229 /* Otherwise we become the largest finite number. */ 1230 category = fcNormal; 1231 exponent = semantics->maxExponent; 1232 APInt::tcSetLeastSignificantBits(significandParts(), partCount(), 1233 semantics->precision); 1234 1235 return opInexact; 1236} 1237 1238/* Returns TRUE if, when truncating the current number, with BIT the 1239 new LSB, with the given lost fraction and rounding mode, the result 1240 would need to be rounded away from zero (i.e., by increasing the 1241 signficand). This routine must work for fcZero of both signs, and 1242 fcNormal numbers. */ 1243bool 1244APFloat::roundAwayFromZero(roundingMode rounding_mode, 1245 lostFraction lost_fraction, 1246 unsigned int bit) const 1247{ 1248 /* NaNs and infinities should not have lost fractions. */ 1249 assert(isFiniteNonZero() || category == fcZero); 1250 1251 /* Current callers never pass this so we don't handle it. */ 1252 assert(lost_fraction != lfExactlyZero); 1253 1254 switch (rounding_mode) { 1255 case rmNearestTiesToAway: 1256 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; 1257 1258 case rmNearestTiesToEven: 1259 if (lost_fraction == lfMoreThanHalf) 1260 return true; 1261 1262 /* Our zeroes don't have a significand to test. */ 1263 if (lost_fraction == lfExactlyHalf && category != fcZero) 1264 return APInt::tcExtractBit(significandParts(), bit); 1265 1266 return false; 1267 1268 case rmTowardZero: 1269 return false; 1270 1271 case rmTowardPositive: 1272 return !sign; 1273 1274 case rmTowardNegative: 1275 return sign; 1276 } 1277 llvm_unreachable("Invalid rounding mode found"); 1278} 1279 1280APFloat::opStatus 1281APFloat::normalize(roundingMode rounding_mode, 1282 lostFraction lost_fraction) 1283{ 1284 unsigned int omsb; /* One, not zero, based MSB. */ 1285 int exponentChange; 1286 1287 if (!isFiniteNonZero()) 1288 return opOK; 1289 1290 /* Before rounding normalize the exponent of fcNormal numbers. */ 1291 omsb = significandMSB() + 1; 1292 1293 if (omsb) { 1294 /* OMSB is numbered from 1. We want to place it in the integer 1295 bit numbered PRECISION if possible, with a compensating change in 1296 the exponent. */ 1297 exponentChange = omsb - semantics->precision; 1298 1299 /* If the resulting exponent is too high, overflow according to 1300 the rounding mode. */ 1301 if (exponent + exponentChange > semantics->maxExponent) 1302 return handleOverflow(rounding_mode); 1303 1304 /* Subnormal numbers have exponent minExponent, and their MSB 1305 is forced based on that. */ 1306 if (exponent + exponentChange < semantics->minExponent) 1307 exponentChange = semantics->minExponent - exponent; 1308 1309 /* Shifting left is easy as we don't lose precision. */ 1310 if (exponentChange < 0) { 1311 assert(lost_fraction == lfExactlyZero); 1312 1313 shiftSignificandLeft(-exponentChange); 1314 1315 return opOK; 1316 } 1317 1318 if (exponentChange > 0) { 1319 lostFraction lf; 1320 1321 /* Shift right and capture any new lost fraction. */ 1322 lf = shiftSignificandRight(exponentChange); 1323 1324 lost_fraction = combineLostFractions(lf, lost_fraction); 1325 1326 /* Keep OMSB up-to-date. */ 1327 if (omsb > (unsigned) exponentChange) 1328 omsb -= exponentChange; 1329 else 1330 omsb = 0; 1331 } 1332 } 1333 1334 /* Now round the number according to rounding_mode given the lost 1335 fraction. */ 1336 1337 /* As specified in IEEE 754, since we do not trap we do not report 1338 underflow for exact results. */ 1339 if (lost_fraction == lfExactlyZero) { 1340 /* Canonicalize zeroes. */ 1341 if (omsb == 0) 1342 category = fcZero; 1343 1344 return opOK; 1345 } 1346 1347 /* Increment the significand if we're rounding away from zero. */ 1348 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) { 1349 if (omsb == 0) 1350 exponent = semantics->minExponent; 1351 1352 incrementSignificand(); 1353 omsb = significandMSB() + 1; 1354 1355 /* Did the significand increment overflow? */ 1356 if (omsb == (unsigned) semantics->precision + 1) { 1357 /* Renormalize by incrementing the exponent and shifting our 1358 significand right one. However if we already have the 1359 maximum exponent we overflow to infinity. */ 1360 if (exponent == semantics->maxExponent) { 1361 category = fcInfinity; 1362 1363 return (opStatus) (opOverflow | opInexact); 1364 } 1365 1366 shiftSignificandRight(1); 1367 1368 return opInexact; 1369 } 1370 } 1371 1372 /* The normal case - we were and are not denormal, and any 1373 significand increment above didn't overflow. */ 1374 if (omsb == semantics->precision) 1375 return opInexact; 1376 1377 /* We have a non-zero denormal. */ 1378 assert(omsb < semantics->precision); 1379 1380 /* Canonicalize zeroes. */ 1381 if (omsb == 0) 1382 category = fcZero; 1383 1384 /* The fcZero case is a denormal that underflowed to zero. */ 1385 return (opStatus) (opUnderflow | opInexact); 1386} 1387 1388APFloat::opStatus 1389APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract) 1390{ 1391 switch (PackCategoriesIntoKey(category, rhs.category)) { 1392 default: 1393 llvm_unreachable(nullptr); 1394 1395 case PackCategoriesIntoKey(fcNaN, fcZero): 1396 case PackCategoriesIntoKey(fcNaN, fcNormal): 1397 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1398 case PackCategoriesIntoKey(fcNaN, fcNaN): 1399 case PackCategoriesIntoKey(fcNormal, fcZero): 1400 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1401 case PackCategoriesIntoKey(fcInfinity, fcZero): 1402 return opOK; 1403 1404 case PackCategoriesIntoKey(fcZero, fcNaN): 1405 case PackCategoriesIntoKey(fcNormal, fcNaN): 1406 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1407 // We need to be sure to flip the sign here for subtraction because we 1408 // don't have a separate negate operation so -NaN becomes 0 - NaN here. 1409 sign = rhs.sign ^ subtract; 1410 category = fcNaN; 1411 copySignificand(rhs); 1412 return opOK; 1413 1414 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1415 case PackCategoriesIntoKey(fcZero, fcInfinity): 1416 category = fcInfinity; 1417 sign = rhs.sign ^ subtract; 1418 return opOK; 1419 1420 case PackCategoriesIntoKey(fcZero, fcNormal): 1421 assign(rhs); 1422 sign = rhs.sign ^ subtract; 1423 return opOK; 1424 1425 case PackCategoriesIntoKey(fcZero, fcZero): 1426 /* Sign depends on rounding mode; handled by caller. */ 1427 return opOK; 1428 1429 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1430 /* Differently signed infinities can only be validly 1431 subtracted. */ 1432 if (((sign ^ rhs.sign)!=0) != subtract) { 1433 makeNaN(); 1434 return opInvalidOp; 1435 } 1436 1437 return opOK; 1438 1439 case PackCategoriesIntoKey(fcNormal, fcNormal): 1440 return opDivByZero; 1441 } 1442} 1443 1444/* Add or subtract two normal numbers. */ 1445lostFraction 1446APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract) 1447{ 1448 integerPart carry; 1449 lostFraction lost_fraction; 1450 int bits; 1451 1452 /* Determine if the operation on the absolute values is effectively 1453 an addition or subtraction. */ 1454 subtract ^= static_cast<bool>(sign ^ rhs.sign); 1455 1456 /* Are we bigger exponent-wise than the RHS? */ 1457 bits = exponent - rhs.exponent; 1458 1459 /* Subtraction is more subtle than one might naively expect. */ 1460 if (subtract) { 1461 APFloat temp_rhs(rhs); 1462 bool reverse; 1463 1464 if (bits == 0) { 1465 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan; 1466 lost_fraction = lfExactlyZero; 1467 } else if (bits > 0) { 1468 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); 1469 shiftSignificandLeft(1); 1470 reverse = false; 1471 } else { 1472 lost_fraction = shiftSignificandRight(-bits - 1); 1473 temp_rhs.shiftSignificandLeft(1); 1474 reverse = true; 1475 } 1476 1477 if (reverse) { 1478 carry = temp_rhs.subtractSignificand 1479 (*this, lost_fraction != lfExactlyZero); 1480 copySignificand(temp_rhs); 1481 sign = !sign; 1482 } else { 1483 carry = subtractSignificand 1484 (temp_rhs, lost_fraction != lfExactlyZero); 1485 } 1486 1487 /* Invert the lost fraction - it was on the RHS and 1488 subtracted. */ 1489 if (lost_fraction == lfLessThanHalf) 1490 lost_fraction = lfMoreThanHalf; 1491 else if (lost_fraction == lfMoreThanHalf) 1492 lost_fraction = lfLessThanHalf; 1493 1494 /* The code above is intended to ensure that no borrow is 1495 necessary. */ 1496 assert(!carry); 1497 (void)carry; 1498 } else { 1499 if (bits > 0) { 1500 APFloat temp_rhs(rhs); 1501 1502 lost_fraction = temp_rhs.shiftSignificandRight(bits); 1503 carry = addSignificand(temp_rhs); 1504 } else { 1505 lost_fraction = shiftSignificandRight(-bits); 1506 carry = addSignificand(rhs); 1507 } 1508 1509 /* We have a guard bit; generating a carry cannot happen. */ 1510 assert(!carry); 1511 (void)carry; 1512 } 1513 1514 return lost_fraction; 1515} 1516 1517APFloat::opStatus 1518APFloat::multiplySpecials(const APFloat &rhs) 1519{ 1520 switch (PackCategoriesIntoKey(category, rhs.category)) { 1521 default: 1522 llvm_unreachable(nullptr); 1523 1524 case PackCategoriesIntoKey(fcNaN, fcZero): 1525 case PackCategoriesIntoKey(fcNaN, fcNormal): 1526 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1527 case PackCategoriesIntoKey(fcNaN, fcNaN): 1528 sign = false; 1529 return opOK; 1530 1531 case PackCategoriesIntoKey(fcZero, fcNaN): 1532 case PackCategoriesIntoKey(fcNormal, fcNaN): 1533 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1534 sign = false; 1535 category = fcNaN; 1536 copySignificand(rhs); 1537 return opOK; 1538 1539 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1540 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1541 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1542 category = fcInfinity; 1543 return opOK; 1544 1545 case PackCategoriesIntoKey(fcZero, fcNormal): 1546 case PackCategoriesIntoKey(fcNormal, fcZero): 1547 case PackCategoriesIntoKey(fcZero, fcZero): 1548 category = fcZero; 1549 return opOK; 1550 1551 case PackCategoriesIntoKey(fcZero, fcInfinity): 1552 case PackCategoriesIntoKey(fcInfinity, fcZero): 1553 makeNaN(); 1554 return opInvalidOp; 1555 1556 case PackCategoriesIntoKey(fcNormal, fcNormal): 1557 return opOK; 1558 } 1559} 1560 1561APFloat::opStatus 1562APFloat::divideSpecials(const APFloat &rhs) 1563{ 1564 switch (PackCategoriesIntoKey(category, rhs.category)) { 1565 default: 1566 llvm_unreachable(nullptr); 1567 1568 case PackCategoriesIntoKey(fcZero, fcNaN): 1569 case PackCategoriesIntoKey(fcNormal, fcNaN): 1570 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1571 category = fcNaN; 1572 copySignificand(rhs); 1573 case PackCategoriesIntoKey(fcNaN, fcZero): 1574 case PackCategoriesIntoKey(fcNaN, fcNormal): 1575 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1576 case PackCategoriesIntoKey(fcNaN, fcNaN): 1577 sign = false; 1578 case PackCategoriesIntoKey(fcInfinity, fcZero): 1579 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1580 case PackCategoriesIntoKey(fcZero, fcInfinity): 1581 case PackCategoriesIntoKey(fcZero, fcNormal): 1582 return opOK; 1583 1584 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1585 category = fcZero; 1586 return opOK; 1587 1588 case PackCategoriesIntoKey(fcNormal, fcZero): 1589 category = fcInfinity; 1590 return opDivByZero; 1591 1592 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1593 case PackCategoriesIntoKey(fcZero, fcZero): 1594 makeNaN(); 1595 return opInvalidOp; 1596 1597 case PackCategoriesIntoKey(fcNormal, fcNormal): 1598 return opOK; 1599 } 1600} 1601 1602APFloat::opStatus 1603APFloat::modSpecials(const APFloat &rhs) 1604{ 1605 switch (PackCategoriesIntoKey(category, rhs.category)) { 1606 default: 1607 llvm_unreachable(nullptr); 1608 1609 case PackCategoriesIntoKey(fcNaN, fcZero): 1610 case PackCategoriesIntoKey(fcNaN, fcNormal): 1611 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1612 case PackCategoriesIntoKey(fcNaN, fcNaN): 1613 case PackCategoriesIntoKey(fcZero, fcInfinity): 1614 case PackCategoriesIntoKey(fcZero, fcNormal): 1615 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1616 return opOK; 1617 1618 case PackCategoriesIntoKey(fcZero, fcNaN): 1619 case PackCategoriesIntoKey(fcNormal, fcNaN): 1620 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1621 sign = false; 1622 category = fcNaN; 1623 copySignificand(rhs); 1624 return opOK; 1625 1626 case PackCategoriesIntoKey(fcNormal, fcZero): 1627 case PackCategoriesIntoKey(fcInfinity, fcZero): 1628 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1629 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1630 case PackCategoriesIntoKey(fcZero, fcZero): 1631 makeNaN(); 1632 return opInvalidOp; 1633 1634 case PackCategoriesIntoKey(fcNormal, fcNormal): 1635 return opOK; 1636 } 1637} 1638 1639/* Change sign. */ 1640void 1641APFloat::changeSign() 1642{ 1643 /* Look mummy, this one's easy. */ 1644 sign = !sign; 1645} 1646 1647void 1648APFloat::clearSign() 1649{ 1650 /* So is this one. */ 1651 sign = 0; 1652} 1653 1654void 1655APFloat::copySign(const APFloat &rhs) 1656{ 1657 /* And this one. */ 1658 sign = rhs.sign; 1659} 1660 1661/* Normalized addition or subtraction. */ 1662APFloat::opStatus 1663APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode, 1664 bool subtract) 1665{ 1666 opStatus fs; 1667 1668 fs = addOrSubtractSpecials(rhs, subtract); 1669 1670 /* This return code means it was not a simple case. */ 1671 if (fs == opDivByZero) { 1672 lostFraction lost_fraction; 1673 1674 lost_fraction = addOrSubtractSignificand(rhs, subtract); 1675 fs = normalize(rounding_mode, lost_fraction); 1676 1677 /* Can only be zero if we lost no fraction. */ 1678 assert(category != fcZero || lost_fraction == lfExactlyZero); 1679 } 1680 1681 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1682 positive zero unless rounding to minus infinity, except that 1683 adding two like-signed zeroes gives that zero. */ 1684 if (category == fcZero) { 1685 if (rhs.category != fcZero || (sign == rhs.sign) == subtract) 1686 sign = (rounding_mode == rmTowardNegative); 1687 } 1688 1689 return fs; 1690} 1691 1692/* Normalized addition. */ 1693APFloat::opStatus 1694APFloat::add(const APFloat &rhs, roundingMode rounding_mode) 1695{ 1696 return addOrSubtract(rhs, rounding_mode, false); 1697} 1698 1699/* Normalized subtraction. */ 1700APFloat::opStatus 1701APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode) 1702{ 1703 return addOrSubtract(rhs, rounding_mode, true); 1704} 1705 1706/* Normalized multiply. */ 1707APFloat::opStatus 1708APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode) 1709{ 1710 opStatus fs; 1711 1712 sign ^= rhs.sign; 1713 fs = multiplySpecials(rhs); 1714 1715 if (isFiniteNonZero()) { 1716 lostFraction lost_fraction = multiplySignificand(rhs, nullptr); 1717 fs = normalize(rounding_mode, lost_fraction); 1718 if (lost_fraction != lfExactlyZero) 1719 fs = (opStatus) (fs | opInexact); 1720 } 1721 1722 return fs; 1723} 1724 1725/* Normalized divide. */ 1726APFloat::opStatus 1727APFloat::divide(const APFloat &rhs, roundingMode rounding_mode) 1728{ 1729 opStatus fs; 1730 1731 sign ^= rhs.sign; 1732 fs = divideSpecials(rhs); 1733 1734 if (isFiniteNonZero()) { 1735 lostFraction lost_fraction = divideSignificand(rhs); 1736 fs = normalize(rounding_mode, lost_fraction); 1737 if (lost_fraction != lfExactlyZero) 1738 fs = (opStatus) (fs | opInexact); 1739 } 1740 1741 return fs; 1742} 1743 1744/* Normalized remainder. This is not currently correct in all cases. */ 1745APFloat::opStatus 1746APFloat::remainder(const APFloat &rhs) 1747{ 1748 opStatus fs; 1749 APFloat V = *this; 1750 unsigned int origSign = sign; 1751 1752 fs = V.divide(rhs, rmNearestTiesToEven); 1753 if (fs == opDivByZero) 1754 return fs; 1755 1756 int parts = partCount(); 1757 integerPart *x = new integerPart[parts]; 1758 bool ignored; 1759 fs = V.convertToInteger(x, parts * integerPartWidth, true, 1760 rmNearestTiesToEven, &ignored); 1761 if (fs==opInvalidOp) 1762 return fs; 1763 1764 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true, 1765 rmNearestTiesToEven); 1766 assert(fs==opOK); // should always work 1767 1768 fs = V.multiply(rhs, rmNearestTiesToEven); 1769 assert(fs==opOK || fs==opInexact); // should not overflow or underflow 1770 1771 fs = subtract(V, rmNearestTiesToEven); 1772 assert(fs==opOK || fs==opInexact); // likewise 1773 1774 if (isZero()) 1775 sign = origSign; // IEEE754 requires this 1776 delete[] x; 1777 return fs; 1778} 1779 1780/* Normalized llvm frem (C fmod). 1781 This is not currently correct in all cases. */ 1782APFloat::opStatus 1783APFloat::mod(const APFloat &rhs) 1784{ 1785 opStatus fs; 1786 fs = modSpecials(rhs); 1787 1788 if (isFiniteNonZero() && rhs.isFiniteNonZero()) { 1789 APFloat V = *this; 1790 unsigned int origSign = sign; 1791 1792 fs = V.divide(rhs, rmNearestTiesToEven); 1793 if (fs == opDivByZero) 1794 return fs; 1795 1796 int parts = partCount(); 1797 integerPart *x = new integerPart[parts]; 1798 bool ignored; 1799 fs = V.convertToInteger(x, parts * integerPartWidth, true, 1800 rmTowardZero, &ignored); 1801 if (fs==opInvalidOp) 1802 return fs; 1803 1804 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true, 1805 rmNearestTiesToEven); 1806 assert(fs==opOK); // should always work 1807 1808 fs = V.multiply(rhs, rmNearestTiesToEven); 1809 assert(fs==opOK || fs==opInexact); // should not overflow or underflow 1810 1811 fs = subtract(V, rmNearestTiesToEven); 1812 assert(fs==opOK || fs==opInexact); // likewise 1813 1814 if (isZero()) 1815 sign = origSign; // IEEE754 requires this 1816 delete[] x; 1817 } 1818 return fs; 1819} 1820 1821/* Normalized fused-multiply-add. */ 1822APFloat::opStatus 1823APFloat::fusedMultiplyAdd(const APFloat &multiplicand, 1824 const APFloat &addend, 1825 roundingMode rounding_mode) 1826{ 1827 opStatus fs; 1828 1829 /* Post-multiplication sign, before addition. */ 1830 sign ^= multiplicand.sign; 1831 1832 /* If and only if all arguments are normal do we need to do an 1833 extended-precision calculation. */ 1834 if (isFiniteNonZero() && 1835 multiplicand.isFiniteNonZero() && 1836 addend.isFinite()) { 1837 lostFraction lost_fraction; 1838 1839 lost_fraction = multiplySignificand(multiplicand, &addend); 1840 fs = normalize(rounding_mode, lost_fraction); 1841 if (lost_fraction != lfExactlyZero) 1842 fs = (opStatus) (fs | opInexact); 1843 1844 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1845 positive zero unless rounding to minus infinity, except that 1846 adding two like-signed zeroes gives that zero. */ 1847 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) 1848 sign = (rounding_mode == rmTowardNegative); 1849 } else { 1850 fs = multiplySpecials(multiplicand); 1851 1852 /* FS can only be opOK or opInvalidOp. There is no more work 1853 to do in the latter case. The IEEE-754R standard says it is 1854 implementation-defined in this case whether, if ADDEND is a 1855 quiet NaN, we raise invalid op; this implementation does so. 1856 1857 If we need to do the addition we can do so with normal 1858 precision. */ 1859 if (fs == opOK) 1860 fs = addOrSubtract(addend, rounding_mode, false); 1861 } 1862 1863 return fs; 1864} 1865 1866/* Rounding-mode corrrect round to integral value. */ 1867APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) { 1868 opStatus fs; 1869 1870 // If the exponent is large enough, we know that this value is already 1871 // integral, and the arithmetic below would potentially cause it to saturate 1872 // to +/-Inf. Bail out early instead. 1873 if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics)) 1874 return opOK; 1875 1876 // The algorithm here is quite simple: we add 2^(p-1), where p is the 1877 // precision of our format, and then subtract it back off again. The choice 1878 // of rounding modes for the addition/subtraction determines the rounding mode 1879 // for our integral rounding as well. 1880 // NOTE: When the input value is negative, we do subtraction followed by 1881 // addition instead. 1882 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1); 1883 IntegerConstant <<= semanticsPrecision(*semantics)-1; 1884 APFloat MagicConstant(*semantics); 1885 fs = MagicConstant.convertFromAPInt(IntegerConstant, false, 1886 rmNearestTiesToEven); 1887 MagicConstant.copySign(*this); 1888 1889 if (fs != opOK) 1890 return fs; 1891 1892 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly. 1893 bool inputSign = isNegative(); 1894 1895 fs = add(MagicConstant, rounding_mode); 1896 if (fs != opOK && fs != opInexact) 1897 return fs; 1898 1899 fs = subtract(MagicConstant, rounding_mode); 1900 1901 // Restore the input sign. 1902 if (inputSign != isNegative()) 1903 changeSign(); 1904 1905 return fs; 1906} 1907 1908 1909/* Comparison requires normalized numbers. */ 1910APFloat::cmpResult 1911APFloat::compare(const APFloat &rhs) const 1912{ 1913 cmpResult result; 1914 1915 assert(semantics == rhs.semantics); 1916 1917 switch (PackCategoriesIntoKey(category, rhs.category)) { 1918 default: 1919 llvm_unreachable(nullptr); 1920 1921 case PackCategoriesIntoKey(fcNaN, fcZero): 1922 case PackCategoriesIntoKey(fcNaN, fcNormal): 1923 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1924 case PackCategoriesIntoKey(fcNaN, fcNaN): 1925 case PackCategoriesIntoKey(fcZero, fcNaN): 1926 case PackCategoriesIntoKey(fcNormal, fcNaN): 1927 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1928 return cmpUnordered; 1929 1930 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1931 case PackCategoriesIntoKey(fcInfinity, fcZero): 1932 case PackCategoriesIntoKey(fcNormal, fcZero): 1933 if (sign) 1934 return cmpLessThan; 1935 else 1936 return cmpGreaterThan; 1937 1938 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1939 case PackCategoriesIntoKey(fcZero, fcInfinity): 1940 case PackCategoriesIntoKey(fcZero, fcNormal): 1941 if (rhs.sign) 1942 return cmpGreaterThan; 1943 else 1944 return cmpLessThan; 1945 1946 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1947 if (sign == rhs.sign) 1948 return cmpEqual; 1949 else if (sign) 1950 return cmpLessThan; 1951 else 1952 return cmpGreaterThan; 1953 1954 case PackCategoriesIntoKey(fcZero, fcZero): 1955 return cmpEqual; 1956 1957 case PackCategoriesIntoKey(fcNormal, fcNormal): 1958 break; 1959 } 1960 1961 /* Two normal numbers. Do they have the same sign? */ 1962 if (sign != rhs.sign) { 1963 if (sign) 1964 result = cmpLessThan; 1965 else 1966 result = cmpGreaterThan; 1967 } else { 1968 /* Compare absolute values; invert result if negative. */ 1969 result = compareAbsoluteValue(rhs); 1970 1971 if (sign) { 1972 if (result == cmpLessThan) 1973 result = cmpGreaterThan; 1974 else if (result == cmpGreaterThan) 1975 result = cmpLessThan; 1976 } 1977 } 1978 1979 return result; 1980} 1981 1982/// APFloat::convert - convert a value of one floating point type to another. 1983/// The return value corresponds to the IEEE754 exceptions. *losesInfo 1984/// records whether the transformation lost information, i.e. whether 1985/// converting the result back to the original type will produce the 1986/// original value (this is almost the same as return value==fsOK, but there 1987/// are edge cases where this is not so). 1988 1989APFloat::opStatus 1990APFloat::convert(const fltSemantics &toSemantics, 1991 roundingMode rounding_mode, bool *losesInfo) 1992{ 1993 lostFraction lostFraction; 1994 unsigned int newPartCount, oldPartCount; 1995 opStatus fs; 1996 int shift; 1997 const fltSemantics &fromSemantics = *semantics; 1998 1999 lostFraction = lfExactlyZero; 2000 newPartCount = partCountForBits(toSemantics.precision + 1); 2001 oldPartCount = partCount(); 2002 shift = toSemantics.precision - fromSemantics.precision; 2003 2004 bool X86SpecialNan = false; 2005 if (&fromSemantics == &APFloat::x87DoubleExtended && 2006 &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN && 2007 (!(*significandParts() & 0x8000000000000000ULL) || 2008 !(*significandParts() & 0x4000000000000000ULL))) { 2009 // x86 has some unusual NaNs which cannot be represented in any other 2010 // format; note them here. 2011 X86SpecialNan = true; 2012 } 2013 2014 // If this is a truncation of a denormal number, and the target semantics 2015 // has larger exponent range than the source semantics (this can happen 2016 // when truncating from PowerPC double-double to double format), the 2017 // right shift could lose result mantissa bits. Adjust exponent instead 2018 // of performing excessive shift. 2019 if (shift < 0 && isFiniteNonZero()) { 2020 int exponentChange = significandMSB() + 1 - fromSemantics.precision; 2021 if (exponent + exponentChange < toSemantics.minExponent) 2022 exponentChange = toSemantics.minExponent - exponent; 2023 if (exponentChange < shift) 2024 exponentChange = shift; 2025 if (exponentChange < 0) { 2026 shift -= exponentChange; 2027 exponent += exponentChange; 2028 } 2029 } 2030 2031 // If this is a truncation, perform the shift before we narrow the storage. 2032 if (shift < 0 && (isFiniteNonZero() || category==fcNaN)) 2033 lostFraction = shiftRight(significandParts(), oldPartCount, -shift); 2034 2035 // Fix the storage so it can hold to new value. 2036 if (newPartCount > oldPartCount) { 2037 // The new type requires more storage; make it available. 2038 integerPart *newParts; 2039 newParts = new integerPart[newPartCount]; 2040 APInt::tcSet(newParts, 0, newPartCount); 2041 if (isFiniteNonZero() || category==fcNaN) 2042 APInt::tcAssign(newParts, significandParts(), oldPartCount); 2043 freeSignificand(); 2044 significand.parts = newParts; 2045 } else if (newPartCount == 1 && oldPartCount != 1) { 2046 // Switch to built-in storage for a single part. 2047 integerPart newPart = 0; 2048 if (isFiniteNonZero() || category==fcNaN) 2049 newPart = significandParts()[0]; 2050 freeSignificand(); 2051 significand.part = newPart; 2052 } 2053 2054 // Now that we have the right storage, switch the semantics. 2055 semantics = &toSemantics; 2056 2057 // If this is an extension, perform the shift now that the storage is 2058 // available. 2059 if (shift > 0 && (isFiniteNonZero() || category==fcNaN)) 2060 APInt::tcShiftLeft(significandParts(), newPartCount, shift); 2061 2062 if (isFiniteNonZero()) { 2063 fs = normalize(rounding_mode, lostFraction); 2064 *losesInfo = (fs != opOK); 2065 } else if (category == fcNaN) { 2066 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan; 2067 2068 // For x87 extended precision, we want to make a NaN, not a special NaN if 2069 // the input wasn't special either. 2070 if (!X86SpecialNan && semantics == &APFloat::x87DoubleExtended) 2071 APInt::tcSetBit(significandParts(), semantics->precision - 1); 2072 2073 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan) 2074 // does not give you back the same bits. This is dubious, and we 2075 // don't currently do it. You're really supposed to get 2076 // an invalid operation signal at runtime, but nobody does that. 2077 fs = opOK; 2078 } else { 2079 *losesInfo = false; 2080 fs = opOK; 2081 } 2082 2083 return fs; 2084} 2085 2086/* Convert a floating point number to an integer according to the 2087 rounding mode. If the rounded integer value is out of range this 2088 returns an invalid operation exception and the contents of the 2089 destination parts are unspecified. If the rounded value is in 2090 range but the floating point number is not the exact integer, the C 2091 standard doesn't require an inexact exception to be raised. IEEE 2092 854 does require it so we do that. 2093 2094 Note that for conversions to integer type the C standard requires 2095 round-to-zero to always be used. */ 2096APFloat::opStatus 2097APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width, 2098 bool isSigned, 2099 roundingMode rounding_mode, 2100 bool *isExact) const 2101{ 2102 lostFraction lost_fraction; 2103 const integerPart *src; 2104 unsigned int dstPartsCount, truncatedBits; 2105 2106 *isExact = false; 2107 2108 /* Handle the three special cases first. */ 2109 if (category == fcInfinity || category == fcNaN) 2110 return opInvalidOp; 2111 2112 dstPartsCount = partCountForBits(width); 2113 2114 if (category == fcZero) { 2115 APInt::tcSet(parts, 0, dstPartsCount); 2116 // Negative zero can't be represented as an int. 2117 *isExact = !sign; 2118 return opOK; 2119 } 2120 2121 src = significandParts(); 2122 2123 /* Step 1: place our absolute value, with any fraction truncated, in 2124 the destination. */ 2125 if (exponent < 0) { 2126 /* Our absolute value is less than one; truncate everything. */ 2127 APInt::tcSet(parts, 0, dstPartsCount); 2128 /* For exponent -1 the integer bit represents .5, look at that. 2129 For smaller exponents leftmost truncated bit is 0. */ 2130 truncatedBits = semantics->precision -1U - exponent; 2131 } else { 2132 /* We want the most significant (exponent + 1) bits; the rest are 2133 truncated. */ 2134 unsigned int bits = exponent + 1U; 2135 2136 /* Hopelessly large in magnitude? */ 2137 if (bits > width) 2138 return opInvalidOp; 2139 2140 if (bits < semantics->precision) { 2141 /* We truncate (semantics->precision - bits) bits. */ 2142 truncatedBits = semantics->precision - bits; 2143 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits); 2144 } else { 2145 /* We want at least as many bits as are available. */ 2146 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0); 2147 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision); 2148 truncatedBits = 0; 2149 } 2150 } 2151 2152 /* Step 2: work out any lost fraction, and increment the absolute 2153 value if we would round away from zero. */ 2154 if (truncatedBits) { 2155 lost_fraction = lostFractionThroughTruncation(src, partCount(), 2156 truncatedBits); 2157 if (lost_fraction != lfExactlyZero && 2158 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { 2159 if (APInt::tcIncrement(parts, dstPartsCount)) 2160 return opInvalidOp; /* Overflow. */ 2161 } 2162 } else { 2163 lost_fraction = lfExactlyZero; 2164 } 2165 2166 /* Step 3: check if we fit in the destination. */ 2167 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1; 2168 2169 if (sign) { 2170 if (!isSigned) { 2171 /* Negative numbers cannot be represented as unsigned. */ 2172 if (omsb != 0) 2173 return opInvalidOp; 2174 } else { 2175 /* It takes omsb bits to represent the unsigned integer value. 2176 We lose a bit for the sign, but care is needed as the 2177 maximally negative integer is a special case. */ 2178 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb) 2179 return opInvalidOp; 2180 2181 /* This case can happen because of rounding. */ 2182 if (omsb > width) 2183 return opInvalidOp; 2184 } 2185 2186 APInt::tcNegate (parts, dstPartsCount); 2187 } else { 2188 if (omsb >= width + !isSigned) 2189 return opInvalidOp; 2190 } 2191 2192 if (lost_fraction == lfExactlyZero) { 2193 *isExact = true; 2194 return opOK; 2195 } else 2196 return opInexact; 2197} 2198 2199/* Same as convertToSignExtendedInteger, except we provide 2200 deterministic values in case of an invalid operation exception, 2201 namely zero for NaNs and the minimal or maximal value respectively 2202 for underflow or overflow. 2203 The *isExact output tells whether the result is exact, in the sense 2204 that converting it back to the original floating point type produces 2205 the original value. This is almost equivalent to result==opOK, 2206 except for negative zeroes. 2207*/ 2208APFloat::opStatus 2209APFloat::convertToInteger(integerPart *parts, unsigned int width, 2210 bool isSigned, 2211 roundingMode rounding_mode, bool *isExact) const 2212{ 2213 opStatus fs; 2214 2215 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode, 2216 isExact); 2217 2218 if (fs == opInvalidOp) { 2219 unsigned int bits, dstPartsCount; 2220 2221 dstPartsCount = partCountForBits(width); 2222 2223 if (category == fcNaN) 2224 bits = 0; 2225 else if (sign) 2226 bits = isSigned; 2227 else 2228 bits = width - isSigned; 2229 2230 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits); 2231 if (sign && isSigned) 2232 APInt::tcShiftLeft(parts, dstPartsCount, width - 1); 2233 } 2234 2235 return fs; 2236} 2237 2238/* Same as convertToInteger(integerPart*, ...), except the result is returned in 2239 an APSInt, whose initial bit-width and signed-ness are used to determine the 2240 precision of the conversion. 2241 */ 2242APFloat::opStatus 2243APFloat::convertToInteger(APSInt &result, 2244 roundingMode rounding_mode, bool *isExact) const 2245{ 2246 unsigned bitWidth = result.getBitWidth(); 2247 SmallVector<uint64_t, 4> parts(result.getNumWords()); 2248 opStatus status = convertToInteger( 2249 parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact); 2250 // Keeps the original signed-ness. 2251 result = APInt(bitWidth, parts); 2252 return status; 2253} 2254 2255/* Convert an unsigned integer SRC to a floating point number, 2256 rounding according to ROUNDING_MODE. The sign of the floating 2257 point number is not modified. */ 2258APFloat::opStatus 2259APFloat::convertFromUnsignedParts(const integerPart *src, 2260 unsigned int srcCount, 2261 roundingMode rounding_mode) 2262{ 2263 unsigned int omsb, precision, dstCount; 2264 integerPart *dst; 2265 lostFraction lost_fraction; 2266 2267 category = fcNormal; 2268 omsb = APInt::tcMSB(src, srcCount) + 1; 2269 dst = significandParts(); 2270 dstCount = partCount(); 2271 precision = semantics->precision; 2272 2273 /* We want the most significant PRECISION bits of SRC. There may not 2274 be that many; extract what we can. */ 2275 if (precision <= omsb) { 2276 exponent = omsb - 1; 2277 lost_fraction = lostFractionThroughTruncation(src, srcCount, 2278 omsb - precision); 2279 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision); 2280 } else { 2281 exponent = precision - 1; 2282 lost_fraction = lfExactlyZero; 2283 APInt::tcExtract(dst, dstCount, src, omsb, 0); 2284 } 2285 2286 return normalize(rounding_mode, lost_fraction); 2287} 2288 2289APFloat::opStatus 2290APFloat::convertFromAPInt(const APInt &Val, 2291 bool isSigned, 2292 roundingMode rounding_mode) 2293{ 2294 unsigned int partCount = Val.getNumWords(); 2295 APInt api = Val; 2296 2297 sign = false; 2298 if (isSigned && api.isNegative()) { 2299 sign = true; 2300 api = -api; 2301 } 2302 2303 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2304} 2305 2306/* Convert a two's complement integer SRC to a floating point number, 2307 rounding according to ROUNDING_MODE. ISSIGNED is true if the 2308 integer is signed, in which case it must be sign-extended. */ 2309APFloat::opStatus 2310APFloat::convertFromSignExtendedInteger(const integerPart *src, 2311 unsigned int srcCount, 2312 bool isSigned, 2313 roundingMode rounding_mode) 2314{ 2315 opStatus status; 2316 2317 if (isSigned && 2318 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { 2319 integerPart *copy; 2320 2321 /* If we're signed and negative negate a copy. */ 2322 sign = true; 2323 copy = new integerPart[srcCount]; 2324 APInt::tcAssign(copy, src, srcCount); 2325 APInt::tcNegate(copy, srcCount); 2326 status = convertFromUnsignedParts(copy, srcCount, rounding_mode); 2327 delete [] copy; 2328 } else { 2329 sign = false; 2330 status = convertFromUnsignedParts(src, srcCount, rounding_mode); 2331 } 2332 2333 return status; 2334} 2335 2336/* FIXME: should this just take a const APInt reference? */ 2337APFloat::opStatus 2338APFloat::convertFromZeroExtendedInteger(const integerPart *parts, 2339 unsigned int width, bool isSigned, 2340 roundingMode rounding_mode) 2341{ 2342 unsigned int partCount = partCountForBits(width); 2343 APInt api = APInt(width, makeArrayRef(parts, partCount)); 2344 2345 sign = false; 2346 if (isSigned && APInt::tcExtractBit(parts, width - 1)) { 2347 sign = true; 2348 api = -api; 2349 } 2350 2351 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2352} 2353 2354APFloat::opStatus 2355APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode) 2356{ 2357 lostFraction lost_fraction = lfExactlyZero; 2358 2359 category = fcNormal; 2360 zeroSignificand(); 2361 exponent = 0; 2362 2363 integerPart *significand = significandParts(); 2364 unsigned partsCount = partCount(); 2365 unsigned bitPos = partsCount * integerPartWidth; 2366 bool computedTrailingFraction = false; 2367 2368 // Skip leading zeroes and any (hexa)decimal point. 2369 StringRef::iterator begin = s.begin(); 2370 StringRef::iterator end = s.end(); 2371 StringRef::iterator dot; 2372 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot); 2373 StringRef::iterator firstSignificantDigit = p; 2374 2375 while (p != end) { 2376 integerPart hex_value; 2377 2378 if (*p == '.') { 2379 assert(dot == end && "String contains multiple dots"); 2380 dot = p++; 2381 continue; 2382 } 2383 2384 hex_value = hexDigitValue(*p); 2385 if (hex_value == -1U) 2386 break; 2387 2388 p++; 2389 2390 // Store the number while we have space. 2391 if (bitPos) { 2392 bitPos -= 4; 2393 hex_value <<= bitPos % integerPartWidth; 2394 significand[bitPos / integerPartWidth] |= hex_value; 2395 } else if (!computedTrailingFraction) { 2396 lost_fraction = trailingHexadecimalFraction(p, end, hex_value); 2397 computedTrailingFraction = true; 2398 } 2399 } 2400 2401 /* Hex floats require an exponent but not a hexadecimal point. */ 2402 assert(p != end && "Hex strings require an exponent"); 2403 assert((*p == 'p' || *p == 'P') && "Invalid character in significand"); 2404 assert(p != begin && "Significand has no digits"); 2405 assert((dot == end || p - begin != 1) && "Significand has no digits"); 2406 2407 /* Ignore the exponent if we are zero. */ 2408 if (p != firstSignificantDigit) { 2409 int expAdjustment; 2410 2411 /* Implicit hexadecimal point? */ 2412 if (dot == end) 2413 dot = p; 2414 2415 /* Calculate the exponent adjustment implicit in the number of 2416 significant digits. */ 2417 expAdjustment = static_cast<int>(dot - firstSignificantDigit); 2418 if (expAdjustment < 0) 2419 expAdjustment++; 2420 expAdjustment = expAdjustment * 4 - 1; 2421 2422 /* Adjust for writing the significand starting at the most 2423 significant nibble. */ 2424 expAdjustment += semantics->precision; 2425 expAdjustment -= partsCount * integerPartWidth; 2426 2427 /* Adjust for the given exponent. */ 2428 exponent = totalExponent(p + 1, end, expAdjustment); 2429 } 2430 2431 return normalize(rounding_mode, lost_fraction); 2432} 2433 2434APFloat::opStatus 2435APFloat::roundSignificandWithExponent(const integerPart *decSigParts, 2436 unsigned sigPartCount, int exp, 2437 roundingMode rounding_mode) 2438{ 2439 unsigned int parts, pow5PartCount; 2440 fltSemantics calcSemantics = { 32767, -32767, 0, 0 }; 2441 integerPart pow5Parts[maxPowerOfFiveParts]; 2442 bool isNearest; 2443 2444 isNearest = (rounding_mode == rmNearestTiesToEven || 2445 rounding_mode == rmNearestTiesToAway); 2446 2447 parts = partCountForBits(semantics->precision + 11); 2448 2449 /* Calculate pow(5, abs(exp)). */ 2450 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp); 2451 2452 for (;; parts *= 2) { 2453 opStatus sigStatus, powStatus; 2454 unsigned int excessPrecision, truncatedBits; 2455 2456 calcSemantics.precision = parts * integerPartWidth - 1; 2457 excessPrecision = calcSemantics.precision - semantics->precision; 2458 truncatedBits = excessPrecision; 2459 2460 APFloat decSig = APFloat::getZero(calcSemantics, sign); 2461 APFloat pow5(calcSemantics); 2462 2463 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount, 2464 rmNearestTiesToEven); 2465 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount, 2466 rmNearestTiesToEven); 2467 /* Add exp, as 10^n = 5^n * 2^n. */ 2468 decSig.exponent += exp; 2469 2470 lostFraction calcLostFraction; 2471 integerPart HUerr, HUdistance; 2472 unsigned int powHUerr; 2473 2474 if (exp >= 0) { 2475 /* multiplySignificand leaves the precision-th bit set to 1. */ 2476 calcLostFraction = decSig.multiplySignificand(pow5, nullptr); 2477 powHUerr = powStatus != opOK; 2478 } else { 2479 calcLostFraction = decSig.divideSignificand(pow5); 2480 /* Denormal numbers have less precision. */ 2481 if (decSig.exponent < semantics->minExponent) { 2482 excessPrecision += (semantics->minExponent - decSig.exponent); 2483 truncatedBits = excessPrecision; 2484 if (excessPrecision > calcSemantics.precision) 2485 excessPrecision = calcSemantics.precision; 2486 } 2487 /* Extra half-ulp lost in reciprocal of exponent. */ 2488 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2; 2489 } 2490 2491 /* Both multiplySignificand and divideSignificand return the 2492 result with the integer bit set. */ 2493 assert(APInt::tcExtractBit 2494 (decSig.significandParts(), calcSemantics.precision - 1) == 1); 2495 2496 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK, 2497 powHUerr); 2498 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(), 2499 excessPrecision, isNearest); 2500 2501 /* Are we guaranteed to round correctly if we truncate? */ 2502 if (HUdistance >= HUerr) { 2503 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(), 2504 calcSemantics.precision - excessPrecision, 2505 excessPrecision); 2506 /* Take the exponent of decSig. If we tcExtract-ed less bits 2507 above we must adjust our exponent to compensate for the 2508 implicit right shift. */ 2509 exponent = (decSig.exponent + semantics->precision 2510 - (calcSemantics.precision - excessPrecision)); 2511 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(), 2512 decSig.partCount(), 2513 truncatedBits); 2514 return normalize(rounding_mode, calcLostFraction); 2515 } 2516 } 2517} 2518 2519APFloat::opStatus 2520APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) 2521{ 2522 decimalInfo D; 2523 opStatus fs; 2524 2525 /* Scan the text. */ 2526 StringRef::iterator p = str.begin(); 2527 interpretDecimal(p, str.end(), &D); 2528 2529 /* Handle the quick cases. First the case of no significant digits, 2530 i.e. zero, and then exponents that are obviously too large or too 2531 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp 2532 definitely overflows if 2533 2534 (exp - 1) * L >= maxExponent 2535 2536 and definitely underflows to zero where 2537 2538 (exp + 1) * L <= minExponent - precision 2539 2540 With integer arithmetic the tightest bounds for L are 2541 2542 93/28 < L < 196/59 [ numerator <= 256 ] 2543 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] 2544 */ 2545 2546 // Test if we have a zero number allowing for strings with no null terminators 2547 // and zero decimals with non-zero exponents. 2548 // 2549 // We computed firstSigDigit by ignoring all zeros and dots. Thus if 2550 // D->firstSigDigit equals str.end(), every digit must be a zero and there can 2551 // be at most one dot. On the other hand, if we have a zero with a non-zero 2552 // exponent, then we know that D.firstSigDigit will be non-numeric. 2553 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) { 2554 category = fcZero; 2555 fs = opOK; 2556 2557 /* Check whether the normalized exponent is high enough to overflow 2558 max during the log-rebasing in the max-exponent check below. */ 2559 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) { 2560 fs = handleOverflow(rounding_mode); 2561 2562 /* If it wasn't, then it also wasn't high enough to overflow max 2563 during the log-rebasing in the min-exponent check. Check that it 2564 won't overflow min in either check, then perform the min-exponent 2565 check. */ 2566 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 || 2567 (D.normalizedExponent + 1) * 28738 <= 2568 8651 * (semantics->minExponent - (int) semantics->precision)) { 2569 /* Underflow to zero and round. */ 2570 category = fcNormal; 2571 zeroSignificand(); 2572 fs = normalize(rounding_mode, lfLessThanHalf); 2573 2574 /* We can finally safely perform the max-exponent check. */ 2575 } else if ((D.normalizedExponent - 1) * 42039 2576 >= 12655 * semantics->maxExponent) { 2577 /* Overflow and round. */ 2578 fs = handleOverflow(rounding_mode); 2579 } else { 2580 integerPart *decSignificand; 2581 unsigned int partCount; 2582 2583 /* A tight upper bound on number of bits required to hold an 2584 N-digit decimal integer is N * 196 / 59. Allocate enough space 2585 to hold the full significand, and an extra part required by 2586 tcMultiplyPart. */ 2587 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1; 2588 partCount = partCountForBits(1 + 196 * partCount / 59); 2589 decSignificand = new integerPart[partCount + 1]; 2590 partCount = 0; 2591 2592 /* Convert to binary efficiently - we do almost all multiplication 2593 in an integerPart. When this would overflow do we do a single 2594 bignum multiplication, and then revert again to multiplication 2595 in an integerPart. */ 2596 do { 2597 integerPart decValue, val, multiplier; 2598 2599 val = 0; 2600 multiplier = 1; 2601 2602 do { 2603 if (*p == '.') { 2604 p++; 2605 if (p == str.end()) { 2606 break; 2607 } 2608 } 2609 decValue = decDigitValue(*p++); 2610 assert(decValue < 10U && "Invalid character in significand"); 2611 multiplier *= 10; 2612 val = val * 10 + decValue; 2613 /* The maximum number that can be multiplied by ten with any 2614 digit added without overflowing an integerPart. */ 2615 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10); 2616 2617 /* Multiply out the current part. */ 2618 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val, 2619 partCount, partCount + 1, false); 2620 2621 /* If we used another part (likely but not guaranteed), increase 2622 the count. */ 2623 if (decSignificand[partCount]) 2624 partCount++; 2625 } while (p <= D.lastSigDigit); 2626 2627 category = fcNormal; 2628 fs = roundSignificandWithExponent(decSignificand, partCount, 2629 D.exponent, rounding_mode); 2630 2631 delete [] decSignificand; 2632 } 2633 2634 return fs; 2635} 2636 2637bool 2638APFloat::convertFromStringSpecials(StringRef str) { 2639 if (str.equals("inf") || str.equals("INFINITY")) { 2640 makeInf(false); 2641 return true; 2642 } 2643 2644 if (str.equals("-inf") || str.equals("-INFINITY")) { 2645 makeInf(true); 2646 return true; 2647 } 2648 2649 if (str.equals("nan") || str.equals("NaN")) { 2650 makeNaN(false, false); 2651 return true; 2652 } 2653 2654 if (str.equals("-nan") || str.equals("-NaN")) { 2655 makeNaN(false, true); 2656 return true; 2657 } 2658 2659 return false; 2660} 2661 2662APFloat::opStatus 2663APFloat::convertFromString(StringRef str, roundingMode rounding_mode) 2664{ 2665 assert(!str.empty() && "Invalid string length"); 2666 2667 // Handle special cases. 2668 if (convertFromStringSpecials(str)) 2669 return opOK; 2670 2671 /* Handle a leading minus sign. */ 2672 StringRef::iterator p = str.begin(); 2673 size_t slen = str.size(); 2674 sign = *p == '-' ? 1 : 0; 2675 if (*p == '-' || *p == '+') { 2676 p++; 2677 slen--; 2678 assert(slen && "String has no digits"); 2679 } 2680 2681 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { 2682 assert(slen - 2 && "Invalid string"); 2683 return convertFromHexadecimalString(StringRef(p + 2, slen - 2), 2684 rounding_mode); 2685 } 2686 2687 return convertFromDecimalString(StringRef(p, slen), rounding_mode); 2688} 2689 2690/* Write out a hexadecimal representation of the floating point value 2691 to DST, which must be of sufficient size, in the C99 form 2692 [-]0xh.hhhhp[+-]d. Return the number of characters written, 2693 excluding the terminating NUL. 2694 2695 If UPPERCASE, the output is in upper case, otherwise in lower case. 2696 2697 HEXDIGITS digits appear altogether, rounding the value if 2698 necessary. If HEXDIGITS is 0, the minimal precision to display the 2699 number precisely is used instead. If nothing would appear after 2700 the decimal point it is suppressed. 2701 2702 The decimal exponent is always printed and has at least one digit. 2703 Zero values display an exponent of zero. Infinities and NaNs 2704 appear as "infinity" or "nan" respectively. 2705 2706 The above rules are as specified by C99. There is ambiguity about 2707 what the leading hexadecimal digit should be. This implementation 2708 uses whatever is necessary so that the exponent is displayed as 2709 stored. This implies the exponent will fall within the IEEE format 2710 range, and the leading hexadecimal digit will be 0 (for denormals), 2711 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with 2712 any other digits zero). 2713*/ 2714unsigned int 2715APFloat::convertToHexString(char *dst, unsigned int hexDigits, 2716 bool upperCase, roundingMode rounding_mode) const 2717{ 2718 char *p; 2719 2720 p = dst; 2721 if (sign) 2722 *dst++ = '-'; 2723 2724 switch (category) { 2725 case fcInfinity: 2726 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); 2727 dst += sizeof infinityL - 1; 2728 break; 2729 2730 case fcNaN: 2731 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); 2732 dst += sizeof NaNU - 1; 2733 break; 2734 2735 case fcZero: 2736 *dst++ = '0'; 2737 *dst++ = upperCase ? 'X': 'x'; 2738 *dst++ = '0'; 2739 if (hexDigits > 1) { 2740 *dst++ = '.'; 2741 memset (dst, '0', hexDigits - 1); 2742 dst += hexDigits - 1; 2743 } 2744 *dst++ = upperCase ? 'P': 'p'; 2745 *dst++ = '0'; 2746 break; 2747 2748 case fcNormal: 2749 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); 2750 break; 2751 } 2752 2753 *dst = 0; 2754 2755 return static_cast<unsigned int>(dst - p); 2756} 2757 2758/* Does the hard work of outputting the correctly rounded hexadecimal 2759 form of a normal floating point number with the specified number of 2760 hexadecimal digits. If HEXDIGITS is zero the minimum number of 2761 digits necessary to print the value precisely is output. */ 2762char * 2763APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, 2764 bool upperCase, 2765 roundingMode rounding_mode) const 2766{ 2767 unsigned int count, valueBits, shift, partsCount, outputDigits; 2768 const char *hexDigitChars; 2769 const integerPart *significand; 2770 char *p; 2771 bool roundUp; 2772 2773 *dst++ = '0'; 2774 *dst++ = upperCase ? 'X': 'x'; 2775 2776 roundUp = false; 2777 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower; 2778 2779 significand = significandParts(); 2780 partsCount = partCount(); 2781 2782 /* +3 because the first digit only uses the single integer bit, so 2783 we have 3 virtual zero most-significant-bits. */ 2784 valueBits = semantics->precision + 3; 2785 shift = integerPartWidth - valueBits % integerPartWidth; 2786 2787 /* The natural number of digits required ignoring trailing 2788 insignificant zeroes. */ 2789 outputDigits = (valueBits - significandLSB () + 3) / 4; 2790 2791 /* hexDigits of zero means use the required number for the 2792 precision. Otherwise, see if we are truncating. If we are, 2793 find out if we need to round away from zero. */ 2794 if (hexDigits) { 2795 if (hexDigits < outputDigits) { 2796 /* We are dropping non-zero bits, so need to check how to round. 2797 "bits" is the number of dropped bits. */ 2798 unsigned int bits; 2799 lostFraction fraction; 2800 2801 bits = valueBits - hexDigits * 4; 2802 fraction = lostFractionThroughTruncation (significand, partsCount, bits); 2803 roundUp = roundAwayFromZero(rounding_mode, fraction, bits); 2804 } 2805 outputDigits = hexDigits; 2806 } 2807 2808 /* Write the digits consecutively, and start writing in the location 2809 of the hexadecimal point. We move the most significant digit 2810 left and add the hexadecimal point later. */ 2811 p = ++dst; 2812 2813 count = (valueBits + integerPartWidth - 1) / integerPartWidth; 2814 2815 while (outputDigits && count) { 2816 integerPart part; 2817 2818 /* Put the most significant integerPartWidth bits in "part". */ 2819 if (--count == partsCount) 2820 part = 0; /* An imaginary higher zero part. */ 2821 else 2822 part = significand[count] << shift; 2823 2824 if (count && shift) 2825 part |= significand[count - 1] >> (integerPartWidth - shift); 2826 2827 /* Convert as much of "part" to hexdigits as we can. */ 2828 unsigned int curDigits = integerPartWidth / 4; 2829 2830 if (curDigits > outputDigits) 2831 curDigits = outputDigits; 2832 dst += partAsHex (dst, part, curDigits, hexDigitChars); 2833 outputDigits -= curDigits; 2834 } 2835 2836 if (roundUp) { 2837 char *q = dst; 2838 2839 /* Note that hexDigitChars has a trailing '0'. */ 2840 do { 2841 q--; 2842 *q = hexDigitChars[hexDigitValue (*q) + 1]; 2843 } while (*q == '0'); 2844 assert(q >= p); 2845 } else { 2846 /* Add trailing zeroes. */ 2847 memset (dst, '0', outputDigits); 2848 dst += outputDigits; 2849 } 2850 2851 /* Move the most significant digit to before the point, and if there 2852 is something after the decimal point add it. This must come 2853 after rounding above. */ 2854 p[-1] = p[0]; 2855 if (dst -1 == p) 2856 dst--; 2857 else 2858 p[0] = '.'; 2859 2860 /* Finally output the exponent. */ 2861 *dst++ = upperCase ? 'P': 'p'; 2862 2863 return writeSignedDecimal (dst, exponent); 2864} 2865 2866hash_code llvm::hash_value(const APFloat &Arg) { 2867 if (!Arg.isFiniteNonZero()) 2868 return hash_combine((uint8_t)Arg.category, 2869 // NaN has no sign, fix it at zero. 2870 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign, 2871 Arg.semantics->precision); 2872 2873 // Normal floats need their exponent and significand hashed. 2874 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign, 2875 Arg.semantics->precision, Arg.exponent, 2876 hash_combine_range( 2877 Arg.significandParts(), 2878 Arg.significandParts() + Arg.partCount())); 2879} 2880 2881// Conversion from APFloat to/from host float/double. It may eventually be 2882// possible to eliminate these and have everybody deal with APFloats, but that 2883// will take a while. This approach will not easily extend to long double. 2884// Current implementation requires integerPartWidth==64, which is correct at 2885// the moment but could be made more general. 2886 2887// Denormals have exponent minExponent in APFloat, but minExponent-1 in 2888// the actual IEEE respresentations. We compensate for that here. 2889 2890APInt 2891APFloat::convertF80LongDoubleAPFloatToAPInt() const 2892{ 2893 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended); 2894 assert(partCount()==2); 2895 2896 uint64_t myexponent, mysignificand; 2897 2898 if (isFiniteNonZero()) { 2899 myexponent = exponent+16383; //bias 2900 mysignificand = significandParts()[0]; 2901 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) 2902 myexponent = 0; // denormal 2903 } else if (category==fcZero) { 2904 myexponent = 0; 2905 mysignificand = 0; 2906 } else if (category==fcInfinity) { 2907 myexponent = 0x7fff; 2908 mysignificand = 0x8000000000000000ULL; 2909 } else { 2910 assert(category == fcNaN && "Unknown category"); 2911 myexponent = 0x7fff; 2912 mysignificand = significandParts()[0]; 2913 } 2914 2915 uint64_t words[2]; 2916 words[0] = mysignificand; 2917 words[1] = ((uint64_t)(sign & 1) << 15) | 2918 (myexponent & 0x7fffLL); 2919 return APInt(80, words); 2920} 2921 2922APInt 2923APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const 2924{ 2925 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble); 2926 assert(partCount()==2); 2927 2928 uint64_t words[2]; 2929 opStatus fs; 2930 bool losesInfo; 2931 2932 // Convert number to double. To avoid spurious underflows, we re- 2933 // normalize against the "double" minExponent first, and only *then* 2934 // truncate the mantissa. The result of that second conversion 2935 // may be inexact, but should never underflow. 2936 // Declare fltSemantics before APFloat that uses it (and 2937 // saves pointer to it) to ensure correct destruction order. 2938 fltSemantics extendedSemantics = *semantics; 2939 extendedSemantics.minExponent = IEEEdouble.minExponent; 2940 APFloat extended(*this); 2941 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2942 assert(fs == opOK && !losesInfo); 2943 (void)fs; 2944 2945 APFloat u(extended); 2946 fs = u.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo); 2947 assert(fs == opOK || fs == opInexact); 2948 (void)fs; 2949 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData(); 2950 2951 // If conversion was exact or resulted in a special case, we're done; 2952 // just set the second double to zero. Otherwise, re-convert back to 2953 // the extended format and compute the difference. This now should 2954 // convert exactly to double. 2955 if (u.isFiniteNonZero() && losesInfo) { 2956 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2957 assert(fs == opOK && !losesInfo); 2958 (void)fs; 2959 2960 APFloat v(extended); 2961 v.subtract(u, rmNearestTiesToEven); 2962 fs = v.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo); 2963 assert(fs == opOK && !losesInfo); 2964 (void)fs; 2965 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData(); 2966 } else { 2967 words[1] = 0; 2968 } 2969 2970 return APInt(128, words); 2971} 2972 2973APInt 2974APFloat::convertQuadrupleAPFloatToAPInt() const 2975{ 2976 assert(semantics == (const llvm::fltSemantics*)&IEEEquad); 2977 assert(partCount()==2); 2978 2979 uint64_t myexponent, mysignificand, mysignificand2; 2980 2981 if (isFiniteNonZero()) { 2982 myexponent = exponent+16383; //bias 2983 mysignificand = significandParts()[0]; 2984 mysignificand2 = significandParts()[1]; 2985 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL)) 2986 myexponent = 0; // denormal 2987 } else if (category==fcZero) { 2988 myexponent = 0; 2989 mysignificand = mysignificand2 = 0; 2990 } else if (category==fcInfinity) { 2991 myexponent = 0x7fff; 2992 mysignificand = mysignificand2 = 0; 2993 } else { 2994 assert(category == fcNaN && "Unknown category!"); 2995 myexponent = 0x7fff; 2996 mysignificand = significandParts()[0]; 2997 mysignificand2 = significandParts()[1]; 2998 } 2999 3000 uint64_t words[2]; 3001 words[0] = mysignificand; 3002 words[1] = ((uint64_t)(sign & 1) << 63) | 3003 ((myexponent & 0x7fff) << 48) | 3004 (mysignificand2 & 0xffffffffffffLL); 3005 3006 return APInt(128, words); 3007} 3008 3009APInt 3010APFloat::convertDoubleAPFloatToAPInt() const 3011{ 3012 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble); 3013 assert(partCount()==1); 3014 3015 uint64_t myexponent, mysignificand; 3016 3017 if (isFiniteNonZero()) { 3018 myexponent = exponent+1023; //bias 3019 mysignificand = *significandParts(); 3020 if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) 3021 myexponent = 0; // denormal 3022 } else if (category==fcZero) { 3023 myexponent = 0; 3024 mysignificand = 0; 3025 } else if (category==fcInfinity) { 3026 myexponent = 0x7ff; 3027 mysignificand = 0; 3028 } else { 3029 assert(category == fcNaN && "Unknown category!"); 3030 myexponent = 0x7ff; 3031 mysignificand = *significandParts(); 3032 } 3033 3034 return APInt(64, ((((uint64_t)(sign & 1) << 63) | 3035 ((myexponent & 0x7ff) << 52) | 3036 (mysignificand & 0xfffffffffffffLL)))); 3037} 3038 3039APInt 3040APFloat::convertFloatAPFloatToAPInt() const 3041{ 3042 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle); 3043 assert(partCount()==1); 3044 3045 uint32_t myexponent, mysignificand; 3046 3047 if (isFiniteNonZero()) { 3048 myexponent = exponent+127; //bias 3049 mysignificand = (uint32_t)*significandParts(); 3050 if (myexponent == 1 && !(mysignificand & 0x800000)) 3051 myexponent = 0; // denormal 3052 } else if (category==fcZero) { 3053 myexponent = 0; 3054 mysignificand = 0; 3055 } else if (category==fcInfinity) { 3056 myexponent = 0xff; 3057 mysignificand = 0; 3058 } else { 3059 assert(category == fcNaN && "Unknown category!"); 3060 myexponent = 0xff; 3061 mysignificand = (uint32_t)*significandParts(); 3062 } 3063 3064 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) | 3065 (mysignificand & 0x7fffff))); 3066} 3067 3068APInt 3069APFloat::convertHalfAPFloatToAPInt() const 3070{ 3071 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf); 3072 assert(partCount()==1); 3073 3074 uint32_t myexponent, mysignificand; 3075 3076 if (isFiniteNonZero()) { 3077 myexponent = exponent+15; //bias 3078 mysignificand = (uint32_t)*significandParts(); 3079 if (myexponent == 1 && !(mysignificand & 0x400)) 3080 myexponent = 0; // denormal 3081 } else if (category==fcZero) { 3082 myexponent = 0; 3083 mysignificand = 0; 3084 } else if (category==fcInfinity) { 3085 myexponent = 0x1f; 3086 mysignificand = 0; 3087 } else { 3088 assert(category == fcNaN && "Unknown category!"); 3089 myexponent = 0x1f; 3090 mysignificand = (uint32_t)*significandParts(); 3091 } 3092 3093 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) | 3094 (mysignificand & 0x3ff))); 3095} 3096 3097// This function creates an APInt that is just a bit map of the floating 3098// point constant as it would appear in memory. It is not a conversion, 3099// and treating the result as a normal integer is unlikely to be useful. 3100 3101APInt 3102APFloat::bitcastToAPInt() const 3103{ 3104 if (semantics == (const llvm::fltSemantics*)&IEEEhalf) 3105 return convertHalfAPFloatToAPInt(); 3106 3107 if (semantics == (const llvm::fltSemantics*)&IEEEsingle) 3108 return convertFloatAPFloatToAPInt(); 3109 3110 if (semantics == (const llvm::fltSemantics*)&IEEEdouble) 3111 return convertDoubleAPFloatToAPInt(); 3112 3113 if (semantics == (const llvm::fltSemantics*)&IEEEquad) 3114 return convertQuadrupleAPFloatToAPInt(); 3115 3116 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble) 3117 return convertPPCDoubleDoubleAPFloatToAPInt(); 3118 3119 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended && 3120 "unknown format!"); 3121 return convertF80LongDoubleAPFloatToAPInt(); 3122} 3123 3124float 3125APFloat::convertToFloat() const 3126{ 3127 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle && 3128 "Float semantics are not IEEEsingle"); 3129 APInt api = bitcastToAPInt(); 3130 return api.bitsToFloat(); 3131} 3132 3133double 3134APFloat::convertToDouble() const 3135{ 3136 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble && 3137 "Float semantics are not IEEEdouble"); 3138 APInt api = bitcastToAPInt(); 3139 return api.bitsToDouble(); 3140} 3141 3142/// Integer bit is explicit in this format. Intel hardware (387 and later) 3143/// does not support these bit patterns: 3144/// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity") 3145/// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN") 3146/// exponent = 0, integer bit 1 ("pseudodenormal") 3147/// exponent!=0 nor all 1's, integer bit 0 ("unnormal") 3148/// At the moment, the first two are treated as NaNs, the second two as Normal. 3149void 3150APFloat::initFromF80LongDoubleAPInt(const APInt &api) 3151{ 3152 assert(api.getBitWidth()==80); 3153 uint64_t i1 = api.getRawData()[0]; 3154 uint64_t i2 = api.getRawData()[1]; 3155 uint64_t myexponent = (i2 & 0x7fff); 3156 uint64_t mysignificand = i1; 3157 3158 initialize(&APFloat::x87DoubleExtended); 3159 assert(partCount()==2); 3160 3161 sign = static_cast<unsigned int>(i2>>15); 3162 if (myexponent==0 && mysignificand==0) { 3163 // exponent, significand meaningless 3164 category = fcZero; 3165 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { 3166 // exponent, significand meaningless 3167 category = fcInfinity; 3168 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) { 3169 // exponent meaningless 3170 category = fcNaN; 3171 significandParts()[0] = mysignificand; 3172 significandParts()[1] = 0; 3173 } else { 3174 category = fcNormal; 3175 exponent = myexponent - 16383; 3176 significandParts()[0] = mysignificand; 3177 significandParts()[1] = 0; 3178 if (myexponent==0) // denormal 3179 exponent = -16382; 3180 } 3181} 3182 3183void 3184APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) 3185{ 3186 assert(api.getBitWidth()==128); 3187 uint64_t i1 = api.getRawData()[0]; 3188 uint64_t i2 = api.getRawData()[1]; 3189 opStatus fs; 3190 bool losesInfo; 3191 3192 // Get the first double and convert to our format. 3193 initFromDoubleAPInt(APInt(64, i1)); 3194 fs = convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo); 3195 assert(fs == opOK && !losesInfo); 3196 (void)fs; 3197 3198 // Unless we have a special case, add in second double. 3199 if (isFiniteNonZero()) { 3200 APFloat v(IEEEdouble, APInt(64, i2)); 3201 fs = v.convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo); 3202 assert(fs == opOK && !losesInfo); 3203 (void)fs; 3204 3205 add(v, rmNearestTiesToEven); 3206 } 3207} 3208 3209void 3210APFloat::initFromQuadrupleAPInt(const APInt &api) 3211{ 3212 assert(api.getBitWidth()==128); 3213 uint64_t i1 = api.getRawData()[0]; 3214 uint64_t i2 = api.getRawData()[1]; 3215 uint64_t myexponent = (i2 >> 48) & 0x7fff; 3216 uint64_t mysignificand = i1; 3217 uint64_t mysignificand2 = i2 & 0xffffffffffffLL; 3218 3219 initialize(&APFloat::IEEEquad); 3220 assert(partCount()==2); 3221 3222 sign = static_cast<unsigned int>(i2>>63); 3223 if (myexponent==0 && 3224 (mysignificand==0 && mysignificand2==0)) { 3225 // exponent, significand meaningless 3226 category = fcZero; 3227 } else if (myexponent==0x7fff && 3228 (mysignificand==0 && mysignificand2==0)) { 3229 // exponent, significand meaningless 3230 category = fcInfinity; 3231 } else if (myexponent==0x7fff && 3232 (mysignificand!=0 || mysignificand2 !=0)) { 3233 // exponent meaningless 3234 category = fcNaN; 3235 significandParts()[0] = mysignificand; 3236 significandParts()[1] = mysignificand2; 3237 } else { 3238 category = fcNormal; 3239 exponent = myexponent - 16383; 3240 significandParts()[0] = mysignificand; 3241 significandParts()[1] = mysignificand2; 3242 if (myexponent==0) // denormal 3243 exponent = -16382; 3244 else 3245 significandParts()[1] |= 0x1000000000000LL; // integer bit 3246 } 3247} 3248 3249void 3250APFloat::initFromDoubleAPInt(const APInt &api) 3251{ 3252 assert(api.getBitWidth()==64); 3253 uint64_t i = *api.getRawData(); 3254 uint64_t myexponent = (i >> 52) & 0x7ff; 3255 uint64_t mysignificand = i & 0xfffffffffffffLL; 3256 3257 initialize(&APFloat::IEEEdouble); 3258 assert(partCount()==1); 3259 3260 sign = static_cast<unsigned int>(i>>63); 3261 if (myexponent==0 && mysignificand==0) { 3262 // exponent, significand meaningless 3263 category = fcZero; 3264 } else if (myexponent==0x7ff && mysignificand==0) { 3265 // exponent, significand meaningless 3266 category = fcInfinity; 3267 } else if (myexponent==0x7ff && mysignificand!=0) { 3268 // exponent meaningless 3269 category = fcNaN; 3270 *significandParts() = mysignificand; 3271 } else { 3272 category = fcNormal; 3273 exponent = myexponent - 1023; 3274 *significandParts() = mysignificand; 3275 if (myexponent==0) // denormal 3276 exponent = -1022; 3277 else 3278 *significandParts() |= 0x10000000000000LL; // integer bit 3279 } 3280} 3281 3282void 3283APFloat::initFromFloatAPInt(const APInt & api) 3284{ 3285 assert(api.getBitWidth()==32); 3286 uint32_t i = (uint32_t)*api.getRawData(); 3287 uint32_t myexponent = (i >> 23) & 0xff; 3288 uint32_t mysignificand = i & 0x7fffff; 3289 3290 initialize(&APFloat::IEEEsingle); 3291 assert(partCount()==1); 3292 3293 sign = i >> 31; 3294 if (myexponent==0 && mysignificand==0) { 3295 // exponent, significand meaningless 3296 category = fcZero; 3297 } else if (myexponent==0xff && mysignificand==0) { 3298 // exponent, significand meaningless 3299 category = fcInfinity; 3300 } else if (myexponent==0xff && mysignificand!=0) { 3301 // sign, exponent, significand meaningless 3302 category = fcNaN; 3303 *significandParts() = mysignificand; 3304 } else { 3305 category = fcNormal; 3306 exponent = myexponent - 127; //bias 3307 *significandParts() = mysignificand; 3308 if (myexponent==0) // denormal 3309 exponent = -126; 3310 else 3311 *significandParts() |= 0x800000; // integer bit 3312 } 3313} 3314 3315void 3316APFloat::initFromHalfAPInt(const APInt & api) 3317{ 3318 assert(api.getBitWidth()==16); 3319 uint32_t i = (uint32_t)*api.getRawData(); 3320 uint32_t myexponent = (i >> 10) & 0x1f; 3321 uint32_t mysignificand = i & 0x3ff; 3322 3323 initialize(&APFloat::IEEEhalf); 3324 assert(partCount()==1); 3325 3326 sign = i >> 15; 3327 if (myexponent==0 && mysignificand==0) { 3328 // exponent, significand meaningless 3329 category = fcZero; 3330 } else if (myexponent==0x1f && mysignificand==0) { 3331 // exponent, significand meaningless 3332 category = fcInfinity; 3333 } else if (myexponent==0x1f && mysignificand!=0) { 3334 // sign, exponent, significand meaningless 3335 category = fcNaN; 3336 *significandParts() = mysignificand; 3337 } else { 3338 category = fcNormal; 3339 exponent = myexponent - 15; //bias 3340 *significandParts() = mysignificand; 3341 if (myexponent==0) // denormal 3342 exponent = -14; 3343 else 3344 *significandParts() |= 0x400; // integer bit 3345 } 3346} 3347 3348/// Treat api as containing the bits of a floating point number. Currently 3349/// we infer the floating point type from the size of the APInt. The 3350/// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful 3351/// when the size is anything else). 3352void 3353APFloat::initFromAPInt(const fltSemantics* Sem, const APInt& api) 3354{ 3355 if (Sem == &IEEEhalf) 3356 return initFromHalfAPInt(api); 3357 if (Sem == &IEEEsingle) 3358 return initFromFloatAPInt(api); 3359 if (Sem == &IEEEdouble) 3360 return initFromDoubleAPInt(api); 3361 if (Sem == &x87DoubleExtended) 3362 return initFromF80LongDoubleAPInt(api); 3363 if (Sem == &IEEEquad) 3364 return initFromQuadrupleAPInt(api); 3365 if (Sem == &PPCDoubleDouble) 3366 return initFromPPCDoubleDoubleAPInt(api); 3367 3368 llvm_unreachable(nullptr); 3369} 3370 3371APFloat 3372APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE) 3373{ 3374 switch (BitWidth) { 3375 case 16: 3376 return APFloat(IEEEhalf, APInt::getAllOnesValue(BitWidth)); 3377 case 32: 3378 return APFloat(IEEEsingle, APInt::getAllOnesValue(BitWidth)); 3379 case 64: 3380 return APFloat(IEEEdouble, APInt::getAllOnesValue(BitWidth)); 3381 case 80: 3382 return APFloat(x87DoubleExtended, APInt::getAllOnesValue(BitWidth)); 3383 case 128: 3384 if (isIEEE) 3385 return APFloat(IEEEquad, APInt::getAllOnesValue(BitWidth)); 3386 return APFloat(PPCDoubleDouble, APInt::getAllOnesValue(BitWidth)); 3387 default: 3388 llvm_unreachable("Unknown floating bit width"); 3389 } 3390} 3391 3392unsigned APFloat::getSizeInBits(const fltSemantics &Sem) { 3393 return Sem.sizeInBits; 3394} 3395 3396/// Make this number the largest magnitude normal number in the given 3397/// semantics. 3398void APFloat::makeLargest(bool Negative) { 3399 // We want (in interchange format): 3400 // sign = {Negative} 3401 // exponent = 1..10 3402 // significand = 1..1 3403 category = fcNormal; 3404 sign = Negative; 3405 exponent = semantics->maxExponent; 3406 3407 // Use memset to set all but the highest integerPart to all ones. 3408 integerPart *significand = significandParts(); 3409 unsigned PartCount = partCount(); 3410 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1)); 3411 3412 // Set the high integerPart especially setting all unused top bits for 3413 // internal consistency. 3414 const unsigned NumUnusedHighBits = 3415 PartCount*integerPartWidth - semantics->precision; 3416 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth) 3417 ? (~integerPart(0) >> NumUnusedHighBits) 3418 : 0; 3419} 3420 3421/// Make this number the smallest magnitude denormal number in the given 3422/// semantics. 3423void APFloat::makeSmallest(bool Negative) { 3424 // We want (in interchange format): 3425 // sign = {Negative} 3426 // exponent = 0..0 3427 // significand = 0..01 3428 category = fcNormal; 3429 sign = Negative; 3430 exponent = semantics->minExponent; 3431 APInt::tcSet(significandParts(), 1, partCount()); 3432} 3433 3434 3435APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) { 3436 // We want (in interchange format): 3437 // sign = {Negative} 3438 // exponent = 1..10 3439 // significand = 1..1 3440 APFloat Val(Sem, uninitialized); 3441 Val.makeLargest(Negative); 3442 return Val; 3443} 3444 3445APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) { 3446 // We want (in interchange format): 3447 // sign = {Negative} 3448 // exponent = 0..0 3449 // significand = 0..01 3450 APFloat Val(Sem, uninitialized); 3451 Val.makeSmallest(Negative); 3452 return Val; 3453} 3454 3455APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) { 3456 APFloat Val(Sem, uninitialized); 3457 3458 // We want (in interchange format): 3459 // sign = {Negative} 3460 // exponent = 0..0 3461 // significand = 10..0 3462 3463 Val.category = fcNormal; 3464 Val.zeroSignificand(); 3465 Val.sign = Negative; 3466 Val.exponent = Sem.minExponent; 3467 Val.significandParts()[partCountForBits(Sem.precision)-1] |= 3468 (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth)); 3469 3470 return Val; 3471} 3472 3473APFloat::APFloat(const fltSemantics &Sem, const APInt &API) { 3474 initFromAPInt(&Sem, API); 3475} 3476 3477APFloat::APFloat(float f) { 3478 initFromAPInt(&IEEEsingle, APInt::floatToBits(f)); 3479} 3480 3481APFloat::APFloat(double d) { 3482 initFromAPInt(&IEEEdouble, APInt::doubleToBits(d)); 3483} 3484 3485namespace { 3486 void append(SmallVectorImpl<char> &Buffer, StringRef Str) { 3487 Buffer.append(Str.begin(), Str.end()); 3488 } 3489 3490 /// Removes data from the given significand until it is no more 3491 /// precise than is required for the desired precision. 3492 void AdjustToPrecision(APInt &significand, 3493 int &exp, unsigned FormatPrecision) { 3494 unsigned bits = significand.getActiveBits(); 3495 3496 // 196/59 is a very slight overestimate of lg_2(10). 3497 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59; 3498 3499 if (bits <= bitsRequired) return; 3500 3501 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196; 3502 if (!tensRemovable) return; 3503 3504 exp += tensRemovable; 3505 3506 APInt divisor(significand.getBitWidth(), 1); 3507 APInt powten(significand.getBitWidth(), 10); 3508 while (true) { 3509 if (tensRemovable & 1) 3510 divisor *= powten; 3511 tensRemovable >>= 1; 3512 if (!tensRemovable) break; 3513 powten *= powten; 3514 } 3515 3516 significand = significand.udiv(divisor); 3517 3518 // Truncate the significand down to its active bit count. 3519 significand = significand.trunc(significand.getActiveBits()); 3520 } 3521 3522 3523 void AdjustToPrecision(SmallVectorImpl<char> &buffer, 3524 int &exp, unsigned FormatPrecision) { 3525 unsigned N = buffer.size(); 3526 if (N <= FormatPrecision) return; 3527 3528 // The most significant figures are the last ones in the buffer. 3529 unsigned FirstSignificant = N - FormatPrecision; 3530 3531 // Round. 3532 // FIXME: this probably shouldn't use 'round half up'. 3533 3534 // Rounding down is just a truncation, except we also want to drop 3535 // trailing zeros from the new result. 3536 if (buffer[FirstSignificant - 1] < '5') { 3537 while (FirstSignificant < N && buffer[FirstSignificant] == '0') 3538 FirstSignificant++; 3539 3540 exp += FirstSignificant; 3541 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 3542 return; 3543 } 3544 3545 // Rounding up requires a decimal add-with-carry. If we continue 3546 // the carry, the newly-introduced zeros will just be truncated. 3547 for (unsigned I = FirstSignificant; I != N; ++I) { 3548 if (buffer[I] == '9') { 3549 FirstSignificant++; 3550 } else { 3551 buffer[I]++; 3552 break; 3553 } 3554 } 3555 3556 // If we carried through, we have exactly one digit of precision. 3557 if (FirstSignificant == N) { 3558 exp += FirstSignificant; 3559 buffer.clear(); 3560 buffer.push_back('1'); 3561 return; 3562 } 3563 3564 exp += FirstSignificant; 3565 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 3566 } 3567} 3568 3569void APFloat::toString(SmallVectorImpl<char> &Str, 3570 unsigned FormatPrecision, 3571 unsigned FormatMaxPadding) const { 3572 switch (category) { 3573 case fcInfinity: 3574 if (isNegative()) 3575 return append(Str, "-Inf"); 3576 else 3577 return append(Str, "+Inf"); 3578 3579 case fcNaN: return append(Str, "NaN"); 3580 3581 case fcZero: 3582 if (isNegative()) 3583 Str.push_back('-'); 3584 3585 if (!FormatMaxPadding) 3586 append(Str, "0.0E+0"); 3587 else 3588 Str.push_back('0'); 3589 return; 3590 3591 case fcNormal: 3592 break; 3593 } 3594 3595 if (isNegative()) 3596 Str.push_back('-'); 3597 3598 // Decompose the number into an APInt and an exponent. 3599 int exp = exponent - ((int) semantics->precision - 1); 3600 APInt significand(semantics->precision, 3601 makeArrayRef(significandParts(), 3602 partCountForBits(semantics->precision))); 3603 3604 // Set FormatPrecision if zero. We want to do this before we 3605 // truncate trailing zeros, as those are part of the precision. 3606 if (!FormatPrecision) { 3607 // We use enough digits so the number can be round-tripped back to an 3608 // APFloat. The formula comes from "How to Print Floating-Point Numbers 3609 // Accurately" by Steele and White. 3610 // FIXME: Using a formula based purely on the precision is conservative; 3611 // we can print fewer digits depending on the actual value being printed. 3612 3613 // FormatPrecision = 2 + floor(significandBits / lg_2(10)) 3614 FormatPrecision = 2 + semantics->precision * 59 / 196; 3615 } 3616 3617 // Ignore trailing binary zeros. 3618 int trailingZeros = significand.countTrailingZeros(); 3619 exp += trailingZeros; 3620 significand = significand.lshr(trailingZeros); 3621 3622 // Change the exponent from 2^e to 10^e. 3623 if (exp == 0) { 3624 // Nothing to do. 3625 } else if (exp > 0) { 3626 // Just shift left. 3627 significand = significand.zext(semantics->precision + exp); 3628 significand <<= exp; 3629 exp = 0; 3630 } else { /* exp < 0 */ 3631 int texp = -exp; 3632 3633 // We transform this using the identity: 3634 // (N)(2^-e) == (N)(5^e)(10^-e) 3635 // This means we have to multiply N (the significand) by 5^e. 3636 // To avoid overflow, we have to operate on numbers large 3637 // enough to store N * 5^e: 3638 // log2(N * 5^e) == log2(N) + e * log2(5) 3639 // <= semantics->precision + e * 137 / 59 3640 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59) 3641 3642 unsigned precision = semantics->precision + (137 * texp + 136) / 59; 3643 3644 // Multiply significand by 5^e. 3645 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8) 3646 significand = significand.zext(precision); 3647 APInt five_to_the_i(precision, 5); 3648 while (true) { 3649 if (texp & 1) significand *= five_to_the_i; 3650 3651 texp >>= 1; 3652 if (!texp) break; 3653 five_to_the_i *= five_to_the_i; 3654 } 3655 } 3656 3657 AdjustToPrecision(significand, exp, FormatPrecision); 3658 3659 SmallVector<char, 256> buffer; 3660 3661 // Fill the buffer. 3662 unsigned precision = significand.getBitWidth(); 3663 APInt ten(precision, 10); 3664 APInt digit(precision, 0); 3665 3666 bool inTrail = true; 3667 while (significand != 0) { 3668 // digit <- significand % 10 3669 // significand <- significand / 10 3670 APInt::udivrem(significand, ten, significand, digit); 3671 3672 unsigned d = digit.getZExtValue(); 3673 3674 // Drop trailing zeros. 3675 if (inTrail && !d) exp++; 3676 else { 3677 buffer.push_back((char) ('0' + d)); 3678 inTrail = false; 3679 } 3680 } 3681 3682 assert(!buffer.empty() && "no characters in buffer!"); 3683 3684 // Drop down to FormatPrecision. 3685 // TODO: don't do more precise calculations above than are required. 3686 AdjustToPrecision(buffer, exp, FormatPrecision); 3687 3688 unsigned NDigits = buffer.size(); 3689 3690 // Check whether we should use scientific notation. 3691 bool FormatScientific; 3692 if (!FormatMaxPadding) 3693 FormatScientific = true; 3694 else { 3695 if (exp >= 0) { 3696 // 765e3 --> 765000 3697 // ^^^ 3698 // But we shouldn't make the number look more precise than it is. 3699 FormatScientific = ((unsigned) exp > FormatMaxPadding || 3700 NDigits + (unsigned) exp > FormatPrecision); 3701 } else { 3702 // Power of the most significant digit. 3703 int MSD = exp + (int) (NDigits - 1); 3704 if (MSD >= 0) { 3705 // 765e-2 == 7.65 3706 FormatScientific = false; 3707 } else { 3708 // 765e-5 == 0.00765 3709 // ^ ^^ 3710 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding; 3711 } 3712 } 3713 } 3714 3715 // Scientific formatting is pretty straightforward. 3716 if (FormatScientific) { 3717 exp += (NDigits - 1); 3718 3719 Str.push_back(buffer[NDigits-1]); 3720 Str.push_back('.'); 3721 if (NDigits == 1) 3722 Str.push_back('0'); 3723 else 3724 for (unsigned I = 1; I != NDigits; ++I) 3725 Str.push_back(buffer[NDigits-1-I]); 3726 Str.push_back('E'); 3727 3728 Str.push_back(exp >= 0 ? '+' : '-'); 3729 if (exp < 0) exp = -exp; 3730 SmallVector<char, 6> expbuf; 3731 do { 3732 expbuf.push_back((char) ('0' + (exp % 10))); 3733 exp /= 10; 3734 } while (exp); 3735 for (unsigned I = 0, E = expbuf.size(); I != E; ++I) 3736 Str.push_back(expbuf[E-1-I]); 3737 return; 3738 } 3739 3740 // Non-scientific, positive exponents. 3741 if (exp >= 0) { 3742 for (unsigned I = 0; I != NDigits; ++I) 3743 Str.push_back(buffer[NDigits-1-I]); 3744 for (unsigned I = 0; I != (unsigned) exp; ++I) 3745 Str.push_back('0'); 3746 return; 3747 } 3748 3749 // Non-scientific, negative exponents. 3750 3751 // The number of digits to the left of the decimal point. 3752 int NWholeDigits = exp + (int) NDigits; 3753 3754 unsigned I = 0; 3755 if (NWholeDigits > 0) { 3756 for (; I != (unsigned) NWholeDigits; ++I) 3757 Str.push_back(buffer[NDigits-I-1]); 3758 Str.push_back('.'); 3759 } else { 3760 unsigned NZeros = 1 + (unsigned) -NWholeDigits; 3761 3762 Str.push_back('0'); 3763 Str.push_back('.'); 3764 for (unsigned Z = 1; Z != NZeros; ++Z) 3765 Str.push_back('0'); 3766 } 3767 3768 for (; I != NDigits; ++I) 3769 Str.push_back(buffer[NDigits-I-1]); 3770} 3771 3772bool APFloat::getExactInverse(APFloat *inv) const { 3773 // Special floats and denormals have no exact inverse. 3774 if (!isFiniteNonZero()) 3775 return false; 3776 3777 // Check that the number is a power of two by making sure that only the 3778 // integer bit is set in the significand. 3779 if (significandLSB() != semantics->precision - 1) 3780 return false; 3781 3782 // Get the inverse. 3783 APFloat reciprocal(*semantics, 1ULL); 3784 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK) 3785 return false; 3786 3787 // Avoid multiplication with a denormal, it is not safe on all platforms and 3788 // may be slower than a normal division. 3789 if (reciprocal.isDenormal()) 3790 return false; 3791 3792 assert(reciprocal.isFiniteNonZero() && 3793 reciprocal.significandLSB() == reciprocal.semantics->precision - 1); 3794 3795 if (inv) 3796 *inv = reciprocal; 3797 3798 return true; 3799} 3800 3801bool APFloat::isSignaling() const { 3802 if (!isNaN()) 3803 return false; 3804 3805 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the 3806 // first bit of the trailing significand being 0. 3807 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2); 3808} 3809 3810/// IEEE-754R 2008 5.3.1: nextUp/nextDown. 3811/// 3812/// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with 3813/// appropriate sign switching before/after the computation. 3814APFloat::opStatus APFloat::next(bool nextDown) { 3815 // If we are performing nextDown, swap sign so we have -x. 3816 if (nextDown) 3817 changeSign(); 3818 3819 // Compute nextUp(x) 3820 opStatus result = opOK; 3821 3822 // Handle each float category separately. 3823 switch (category) { 3824 case fcInfinity: 3825 // nextUp(+inf) = +inf 3826 if (!isNegative()) 3827 break; 3828 // nextUp(-inf) = -getLargest() 3829 makeLargest(true); 3830 break; 3831 case fcNaN: 3832 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag. 3833 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not 3834 // change the payload. 3835 if (isSignaling()) { 3836 result = opInvalidOp; 3837 // For consistency, propagate the sign of the sNaN to the qNaN. 3838 makeNaN(false, isNegative(), nullptr); 3839 } 3840 break; 3841 case fcZero: 3842 // nextUp(pm 0) = +getSmallest() 3843 makeSmallest(false); 3844 break; 3845 case fcNormal: 3846 // nextUp(-getSmallest()) = -0 3847 if (isSmallest() && isNegative()) { 3848 APInt::tcSet(significandParts(), 0, partCount()); 3849 category = fcZero; 3850 exponent = 0; 3851 break; 3852 } 3853 3854 // nextUp(getLargest()) == INFINITY 3855 if (isLargest() && !isNegative()) { 3856 APInt::tcSet(significandParts(), 0, partCount()); 3857 category = fcInfinity; 3858 exponent = semantics->maxExponent + 1; 3859 break; 3860 } 3861 3862 // nextUp(normal) == normal + inc. 3863 if (isNegative()) { 3864 // If we are negative, we need to decrement the significand. 3865 3866 // We only cross a binade boundary that requires adjusting the exponent 3867 // if: 3868 // 1. exponent != semantics->minExponent. This implies we are not in the 3869 // smallest binade or are dealing with denormals. 3870 // 2. Our significand excluding the integral bit is all zeros. 3871 bool WillCrossBinadeBoundary = 3872 exponent != semantics->minExponent && isSignificandAllZeros(); 3873 3874 // Decrement the significand. 3875 // 3876 // We always do this since: 3877 // 1. If we are dealing with a non-binade decrement, by definition we 3878 // just decrement the significand. 3879 // 2. If we are dealing with a normal -> normal binade decrement, since 3880 // we have an explicit integral bit the fact that all bits but the 3881 // integral bit are zero implies that subtracting one will yield a 3882 // significand with 0 integral bit and 1 in all other spots. Thus we 3883 // must just adjust the exponent and set the integral bit to 1. 3884 // 3. If we are dealing with a normal -> denormal binade decrement, 3885 // since we set the integral bit to 0 when we represent denormals, we 3886 // just decrement the significand. 3887 integerPart *Parts = significandParts(); 3888 APInt::tcDecrement(Parts, partCount()); 3889 3890 if (WillCrossBinadeBoundary) { 3891 // Our result is a normal number. Do the following: 3892 // 1. Set the integral bit to 1. 3893 // 2. Decrement the exponent. 3894 APInt::tcSetBit(Parts, semantics->precision - 1); 3895 exponent--; 3896 } 3897 } else { 3898 // If we are positive, we need to increment the significand. 3899 3900 // We only cross a binade boundary that requires adjusting the exponent if 3901 // the input is not a denormal and all of said input's significand bits 3902 // are set. If all of said conditions are true: clear the significand, set 3903 // the integral bit to 1, and increment the exponent. If we have a 3904 // denormal always increment since moving denormals and the numbers in the 3905 // smallest normal binade have the same exponent in our representation. 3906 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes(); 3907 3908 if (WillCrossBinadeBoundary) { 3909 integerPart *Parts = significandParts(); 3910 APInt::tcSet(Parts, 0, partCount()); 3911 APInt::tcSetBit(Parts, semantics->precision - 1); 3912 assert(exponent != semantics->maxExponent && 3913 "We can not increment an exponent beyond the maxExponent allowed" 3914 " by the given floating point semantics."); 3915 exponent++; 3916 } else { 3917 incrementSignificand(); 3918 } 3919 } 3920 break; 3921 } 3922 3923 // If we are performing nextDown, swap sign so we have -nextUp(-x) 3924 if (nextDown) 3925 changeSign(); 3926 3927 return result; 3928} 3929 3930void 3931APFloat::makeInf(bool Negative) { 3932 category = fcInfinity; 3933 sign = Negative; 3934 exponent = semantics->maxExponent + 1; 3935 APInt::tcSet(significandParts(), 0, partCount()); 3936} 3937 3938void 3939APFloat::makeZero(bool Negative) { 3940 category = fcZero; 3941 sign = Negative; 3942 exponent = semantics->minExponent-1; 3943 APInt::tcSet(significandParts(), 0, partCount()); 3944} 3945 3946APFloat llvm::scalbn(APFloat X, int Exp) { 3947 if (X.isInfinity() || X.isZero() || X.isNaN()) 3948 return X; 3949 3950 auto MaxExp = X.getSemantics().maxExponent; 3951 auto MinExp = X.getSemantics().minExponent; 3952 if (Exp > (MaxExp - X.exponent)) 3953 // Overflow saturates to infinity. 3954 return APFloat::getInf(X.getSemantics(), X.isNegative()); 3955 if (Exp < (MinExp - X.exponent)) 3956 // Underflow saturates to zero. 3957 return APFloat::getZero(X.getSemantics(), X.isNegative()); 3958 3959 X.exponent += Exp; 3960 return X; 3961} 3962