APFloat.cpp revision 43a4b28e94598ea8accd7a97f8e1b5902731f340
1//===-- APFloat.cpp - Implement APFloat class -----------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Neil Booth and is distributed under the 6// University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements a class to represent arbitrary precision floating 11// point values and provide a variety of arithmetic operations on them. 12// 13//===----------------------------------------------------------------------===// 14 15#include <cassert> 16#include <cstring> 17#include "llvm/ADT/APFloat.h" 18#include "llvm/Support/MathExtras.h" 19 20using namespace llvm; 21 22#define convolve(lhs, rhs) ((lhs) * 4 + (rhs)) 23 24/* Assumed in hexadecimal significand parsing, and conversion to 25 hexadecimal strings. */ 26COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0); 27 28namespace llvm { 29 30 /* Represents floating point arithmetic semantics. */ 31 struct fltSemantics { 32 /* The largest E such that 2^E is representable; this matches the 33 definition of IEEE 754. */ 34 exponent_t maxExponent; 35 36 /* The smallest E such that 2^E is a normalized number; this 37 matches the definition of IEEE 754. */ 38 exponent_t minExponent; 39 40 /* Number of bits in the significand. This includes the integer 41 bit. */ 42 unsigned int precision; 43 44 /* True if arithmetic is supported. */ 45 unsigned int arithmeticOK; 46 }; 47 48 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true }; 49 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true }; 50 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true }; 51 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true }; 52 const fltSemantics APFloat::Bogus = { 0, 0, 0, true }; 53 54 // The PowerPC format consists of two doubles. It does not map cleanly 55 // onto the usual format above. For now only storage of constants of 56 // this type is supported, no arithmetic. 57 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106, false }; 58 59 /* A tight upper bound on number of parts required to hold the value 60 pow(5, power) is 61 62 power * 815 / (351 * integerPartWidth) + 1 63 64 However, whilst the result may require only this many parts, 65 because we are multiplying two values to get it, the 66 multiplication may require an extra part with the excess part 67 being zero (consider the trivial case of 1 * 1, tcFullMultiply 68 requires two parts to hold the single-part result). So we add an 69 extra one to guarantee enough space whilst multiplying. */ 70 const unsigned int maxExponent = 16383; 71 const unsigned int maxPrecision = 113; 72 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; 73 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) 74 / (351 * integerPartWidth)); 75} 76 77/* Put a bunch of private, handy routines in an anonymous namespace. */ 78namespace { 79 80 inline unsigned int 81 partCountForBits(unsigned int bits) 82 { 83 return ((bits) + integerPartWidth - 1) / integerPartWidth; 84 } 85 86 /* Returns 0U-9U. Return values >= 10U are not digits. */ 87 inline unsigned int 88 decDigitValue(unsigned int c) 89 { 90 return c - '0'; 91 } 92 93 unsigned int 94 hexDigitValue(unsigned int c) 95 { 96 unsigned int r; 97 98 r = c - '0'; 99 if(r <= 9) 100 return r; 101 102 r = c - 'A'; 103 if(r <= 5) 104 return r + 10; 105 106 r = c - 'a'; 107 if(r <= 5) 108 return r + 10; 109 110 return -1U; 111 } 112 113 inline void 114 assertArithmeticOK(const llvm::fltSemantics &semantics) { 115 assert(semantics.arithmeticOK 116 && "Compile-time arithmetic does not support these semantics"); 117 } 118 119 /* Return the value of a decimal exponent of the form 120 [+-]ddddddd. 121 122 If the exponent overflows, returns a large exponent with the 123 appropriate sign. */ 124 static int 125 readExponent(const char *p) 126 { 127 bool isNegative; 128 unsigned int absExponent; 129 const unsigned int overlargeExponent = 24000; /* FIXME. */ 130 131 isNegative = (*p == '-'); 132 if (*p == '-' || *p == '+') 133 p++; 134 135 absExponent = decDigitValue(*p++); 136 assert (absExponent < 10U); 137 138 for (;;) { 139 unsigned int value; 140 141 value = decDigitValue(*p); 142 if (value >= 10U) 143 break; 144 145 p++; 146 value += absExponent * 10; 147 if (absExponent >= overlargeExponent) { 148 absExponent = overlargeExponent; 149 break; 150 } 151 absExponent = value; 152 } 153 154 if (isNegative) 155 return -(int) absExponent; 156 else 157 return (int) absExponent; 158 } 159 160 /* This is ugly and needs cleaning up, but I don't immediately see 161 how whilst remaining safe. */ 162 static int 163 totalExponent(const char *p, int exponentAdjustment) 164 { 165 integerPart unsignedExponent; 166 bool negative, overflow; 167 long exponent; 168 169 /* Move past the exponent letter and sign to the digits. */ 170 p++; 171 negative = *p == '-'; 172 if(*p == '-' || *p == '+') 173 p++; 174 175 unsignedExponent = 0; 176 overflow = false; 177 for(;;) { 178 unsigned int value; 179 180 value = decDigitValue(*p); 181 if(value >= 10U) 182 break; 183 184 p++; 185 unsignedExponent = unsignedExponent * 10 + value; 186 if(unsignedExponent > 65535) 187 overflow = true; 188 } 189 190 if(exponentAdjustment > 65535 || exponentAdjustment < -65536) 191 overflow = true; 192 193 if(!overflow) { 194 exponent = unsignedExponent; 195 if(negative) 196 exponent = -exponent; 197 exponent += exponentAdjustment; 198 if(exponent > 65535 || exponent < -65536) 199 overflow = true; 200 } 201 202 if(overflow) 203 exponent = negative ? -65536: 65535; 204 205 return exponent; 206 } 207 208 const char * 209 skipLeadingZeroesAndAnyDot(const char *p, const char **dot) 210 { 211 *dot = 0; 212 while(*p == '0') 213 p++; 214 215 if(*p == '.') { 216 *dot = p++; 217 while(*p == '0') 218 p++; 219 } 220 221 return p; 222 } 223 224 /* Given a normal decimal floating point number of the form 225 226 dddd.dddd[eE][+-]ddd 227 228 where the decimal point and exponent are optional, fill out the 229 structure D. Exponent is appropriate if the significand is 230 treated as an integer, and normalizedExponent if the significand 231 is taken to have the decimal point after a single leading 232 non-zero digit. 233 234 If the value is zero, V->firstSigDigit points to a zero, and the 235 return exponent is zero. 236 */ 237 struct decimalInfo { 238 const char *firstSigDigit; 239 const char *lastSigDigit; 240 int exponent; 241 int normalizedExponent; 242 }; 243 244 void 245 interpretDecimal(const char *p, decimalInfo *D) 246 { 247 const char *dot; 248 249 p = skipLeadingZeroesAndAnyDot (p, &dot); 250 251 D->firstSigDigit = p; 252 D->exponent = 0; 253 D->normalizedExponent = 0; 254 255 for (;;) { 256 if (*p == '.') { 257 assert(dot == 0); 258 dot = p++; 259 } 260 if (decDigitValue(*p) >= 10U) 261 break; 262 p++; 263 } 264 265 /* If number is all zerooes accept any exponent. */ 266 if (p != D->firstSigDigit) { 267 if (*p == 'e' || *p == 'E') 268 D->exponent = readExponent(p + 1); 269 270 /* Implied decimal point? */ 271 if (!dot) 272 dot = p; 273 274 /* Drop insignificant trailing zeroes. */ 275 do 276 do 277 p--; 278 while (*p == '0'); 279 while (*p == '.'); 280 281 /* Adjust the exponents for any decimal point. */ 282 D->exponent += (dot - p) - (dot > p); 283 D->normalizedExponent = (D->exponent + (p - D->firstSigDigit) 284 - (dot > D->firstSigDigit && dot < p)); 285 } 286 287 D->lastSigDigit = p; 288 } 289 290 /* Return the trailing fraction of a hexadecimal number. 291 DIGITVALUE is the first hex digit of the fraction, P points to 292 the next digit. */ 293 lostFraction 294 trailingHexadecimalFraction(const char *p, unsigned int digitValue) 295 { 296 unsigned int hexDigit; 297 298 /* If the first trailing digit isn't 0 or 8 we can work out the 299 fraction immediately. */ 300 if(digitValue > 8) 301 return lfMoreThanHalf; 302 else if(digitValue < 8 && digitValue > 0) 303 return lfLessThanHalf; 304 305 /* Otherwise we need to find the first non-zero digit. */ 306 while(*p == '0') 307 p++; 308 309 hexDigit = hexDigitValue(*p); 310 311 /* If we ran off the end it is exactly zero or one-half, otherwise 312 a little more. */ 313 if(hexDigit == -1U) 314 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; 315 else 316 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; 317 } 318 319 /* Return the fraction lost were a bignum truncated losing the least 320 significant BITS bits. */ 321 lostFraction 322 lostFractionThroughTruncation(const integerPart *parts, 323 unsigned int partCount, 324 unsigned int bits) 325 { 326 unsigned int lsb; 327 328 lsb = APInt::tcLSB(parts, partCount); 329 330 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */ 331 if(bits <= lsb) 332 return lfExactlyZero; 333 if(bits == lsb + 1) 334 return lfExactlyHalf; 335 if(bits <= partCount * integerPartWidth 336 && APInt::tcExtractBit(parts, bits - 1)) 337 return lfMoreThanHalf; 338 339 return lfLessThanHalf; 340 } 341 342 /* Shift DST right BITS bits noting lost fraction. */ 343 lostFraction 344 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits) 345 { 346 lostFraction lost_fraction; 347 348 lost_fraction = lostFractionThroughTruncation(dst, parts, bits); 349 350 APInt::tcShiftRight(dst, parts, bits); 351 352 return lost_fraction; 353 } 354 355 /* Combine the effect of two lost fractions. */ 356 lostFraction 357 combineLostFractions(lostFraction moreSignificant, 358 lostFraction lessSignificant) 359 { 360 if(lessSignificant != lfExactlyZero) { 361 if(moreSignificant == lfExactlyZero) 362 moreSignificant = lfLessThanHalf; 363 else if(moreSignificant == lfExactlyHalf) 364 moreSignificant = lfMoreThanHalf; 365 } 366 367 return moreSignificant; 368 } 369 370 /* The error from the true value, in half-ulps, on multiplying two 371 floating point numbers, which differ from the value they 372 approximate by at most HUE1 and HUE2 half-ulps, is strictly less 373 than the returned value. 374 375 See "How to Read Floating Point Numbers Accurately" by William D 376 Clinger. */ 377 unsigned int 378 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) 379 { 380 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8)); 381 382 if (HUerr1 + HUerr2 == 0) 383 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ 384 else 385 return inexactMultiply + 2 * (HUerr1 + HUerr2); 386 } 387 388 /* The number of ulps from the boundary (zero, or half if ISNEAREST) 389 when the least significant BITS are truncated. BITS cannot be 390 zero. */ 391 integerPart 392 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest) 393 { 394 unsigned int count, partBits; 395 integerPart part, boundary; 396 397 assert (bits != 0); 398 399 bits--; 400 count = bits / integerPartWidth; 401 partBits = bits % integerPartWidth + 1; 402 403 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits)); 404 405 if (isNearest) 406 boundary = (integerPart) 1 << (partBits - 1); 407 else 408 boundary = 0; 409 410 if (count == 0) { 411 if (part - boundary <= boundary - part) 412 return part - boundary; 413 else 414 return boundary - part; 415 } 416 417 if (part == boundary) { 418 while (--count) 419 if (parts[count]) 420 return ~(integerPart) 0; /* A lot. */ 421 422 return parts[0]; 423 } else if (part == boundary - 1) { 424 while (--count) 425 if (~parts[count]) 426 return ~(integerPart) 0; /* A lot. */ 427 428 return -parts[0]; 429 } 430 431 return ~(integerPart) 0; /* A lot. */ 432 } 433 434 /* Place pow(5, power) in DST, and return the number of parts used. 435 DST must be at least one part larger than size of the answer. */ 436 static unsigned int 437 powerOf5(integerPart *dst, unsigned int power) 438 { 439 static integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 440 15625, 78125 }; 441 static integerPart pow5s[maxPowerOfFiveParts * 2 + 5] = { 78125 * 5 }; 442 static unsigned int partsCount[16] = { 1 }; 443 444 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; 445 unsigned int result; 446 447 assert(power <= maxExponent); 448 449 p1 = dst; 450 p2 = scratch; 451 452 *p1 = firstEightPowers[power & 7]; 453 power >>= 3; 454 455 result = 1; 456 pow5 = pow5s; 457 458 for (unsigned int n = 0; power; power >>= 1, n++) { 459 unsigned int pc; 460 461 pc = partsCount[n]; 462 463 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */ 464 if (pc == 0) { 465 pc = partsCount[n - 1]; 466 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc); 467 pc *= 2; 468 if (pow5[pc - 1] == 0) 469 pc--; 470 partsCount[n] = pc; 471 } 472 473 if (power & 1) { 474 integerPart *tmp; 475 476 APInt::tcFullMultiply(p2, p1, pow5, result, pc); 477 result += pc; 478 if (p2[result - 1] == 0) 479 result--; 480 481 /* Now result is in p1 with partsCount parts and p2 is scratch 482 space. */ 483 tmp = p1, p1 = p2, p2 = tmp; 484 } 485 486 pow5 += pc; 487 } 488 489 if (p1 != dst) 490 APInt::tcAssign(dst, p1, result); 491 492 return result; 493 } 494 495 /* Zero at the end to avoid modular arithmetic when adding one; used 496 when rounding up during hexadecimal output. */ 497 static const char hexDigitsLower[] = "0123456789abcdef0"; 498 static const char hexDigitsUpper[] = "0123456789ABCDEF0"; 499 static const char infinityL[] = "infinity"; 500 static const char infinityU[] = "INFINITY"; 501 static const char NaNL[] = "nan"; 502 static const char NaNU[] = "NAN"; 503 504 /* Write out an integerPart in hexadecimal, starting with the most 505 significant nibble. Write out exactly COUNT hexdigits, return 506 COUNT. */ 507 static unsigned int 508 partAsHex (char *dst, integerPart part, unsigned int count, 509 const char *hexDigitChars) 510 { 511 unsigned int result = count; 512 513 assert (count != 0 && count <= integerPartWidth / 4); 514 515 part >>= (integerPartWidth - 4 * count); 516 while (count--) { 517 dst[count] = hexDigitChars[part & 0xf]; 518 part >>= 4; 519 } 520 521 return result; 522 } 523 524 /* Write out an unsigned decimal integer. */ 525 static char * 526 writeUnsignedDecimal (char *dst, unsigned int n) 527 { 528 char buff[40], *p; 529 530 p = buff; 531 do 532 *p++ = '0' + n % 10; 533 while (n /= 10); 534 535 do 536 *dst++ = *--p; 537 while (p != buff); 538 539 return dst; 540 } 541 542 /* Write out a signed decimal integer. */ 543 static char * 544 writeSignedDecimal (char *dst, int value) 545 { 546 if (value < 0) { 547 *dst++ = '-'; 548 dst = writeUnsignedDecimal(dst, -(unsigned) value); 549 } else 550 dst = writeUnsignedDecimal(dst, value); 551 552 return dst; 553 } 554} 555 556/* Constructors. */ 557void 558APFloat::initialize(const fltSemantics *ourSemantics) 559{ 560 unsigned int count; 561 562 semantics = ourSemantics; 563 count = partCount(); 564 if(count > 1) 565 significand.parts = new integerPart[count]; 566} 567 568void 569APFloat::freeSignificand() 570{ 571 if(partCount() > 1) 572 delete [] significand.parts; 573} 574 575void 576APFloat::assign(const APFloat &rhs) 577{ 578 assert(semantics == rhs.semantics); 579 580 sign = rhs.sign; 581 category = rhs.category; 582 exponent = rhs.exponent; 583 sign2 = rhs.sign2; 584 exponent2 = rhs.exponent2; 585 if(category == fcNormal || category == fcNaN) 586 copySignificand(rhs); 587} 588 589void 590APFloat::copySignificand(const APFloat &rhs) 591{ 592 assert(category == fcNormal || category == fcNaN); 593 assert(rhs.partCount() >= partCount()); 594 595 APInt::tcAssign(significandParts(), rhs.significandParts(), 596 partCount()); 597} 598 599/* Make this number a NaN, with an arbitrary but deterministic value 600 for the significand. */ 601void 602APFloat::makeNaN(void) 603{ 604 category = fcNaN; 605 APInt::tcSet(significandParts(), ~0U, partCount()); 606} 607 608APFloat & 609APFloat::operator=(const APFloat &rhs) 610{ 611 if(this != &rhs) { 612 if(semantics != rhs.semantics) { 613 freeSignificand(); 614 initialize(rhs.semantics); 615 } 616 assign(rhs); 617 } 618 619 return *this; 620} 621 622bool 623APFloat::bitwiseIsEqual(const APFloat &rhs) const { 624 if (this == &rhs) 625 return true; 626 if (semantics != rhs.semantics || 627 category != rhs.category || 628 sign != rhs.sign) 629 return false; 630 if (semantics==(const llvm::fltSemantics* const)&PPCDoubleDouble && 631 sign2 != rhs.sign2) 632 return false; 633 if (category==fcZero || category==fcInfinity) 634 return true; 635 else if (category==fcNormal && exponent!=rhs.exponent) 636 return false; 637 else if (semantics==(const llvm::fltSemantics* const)&PPCDoubleDouble && 638 exponent2!=rhs.exponent2) 639 return false; 640 else { 641 int i= partCount(); 642 const integerPart* p=significandParts(); 643 const integerPart* q=rhs.significandParts(); 644 for (; i>0; i--, p++, q++) { 645 if (*p != *q) 646 return false; 647 } 648 return true; 649 } 650} 651 652APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) 653{ 654 assertArithmeticOK(ourSemantics); 655 initialize(&ourSemantics); 656 sign = 0; 657 zeroSignificand(); 658 exponent = ourSemantics.precision - 1; 659 significandParts()[0] = value; 660 normalize(rmNearestTiesToEven, lfExactlyZero); 661} 662 663APFloat::APFloat(const fltSemantics &ourSemantics, 664 fltCategory ourCategory, bool negative) 665{ 666 assertArithmeticOK(ourSemantics); 667 initialize(&ourSemantics); 668 category = ourCategory; 669 sign = negative; 670 if(category == fcNormal) 671 category = fcZero; 672 else if (ourCategory == fcNaN) 673 makeNaN(); 674} 675 676APFloat::APFloat(const fltSemantics &ourSemantics, const char *text) 677{ 678 assertArithmeticOK(ourSemantics); 679 initialize(&ourSemantics); 680 convertFromString(text, rmNearestTiesToEven); 681} 682 683APFloat::APFloat(const APFloat &rhs) 684{ 685 initialize(rhs.semantics); 686 assign(rhs); 687} 688 689APFloat::~APFloat() 690{ 691 freeSignificand(); 692} 693 694unsigned int 695APFloat::partCount() const 696{ 697 return partCountForBits(semantics->precision + 1); 698} 699 700unsigned int 701APFloat::semanticsPrecision(const fltSemantics &semantics) 702{ 703 return semantics.precision; 704} 705 706const integerPart * 707APFloat::significandParts() const 708{ 709 return const_cast<APFloat *>(this)->significandParts(); 710} 711 712integerPart * 713APFloat::significandParts() 714{ 715 assert(category == fcNormal || category == fcNaN); 716 717 if(partCount() > 1) 718 return significand.parts; 719 else 720 return &significand.part; 721} 722 723void 724APFloat::zeroSignificand() 725{ 726 category = fcNormal; 727 APInt::tcSet(significandParts(), 0, partCount()); 728} 729 730/* Increment an fcNormal floating point number's significand. */ 731void 732APFloat::incrementSignificand() 733{ 734 integerPart carry; 735 736 carry = APInt::tcIncrement(significandParts(), partCount()); 737 738 /* Our callers should never cause us to overflow. */ 739 assert(carry == 0); 740} 741 742/* Add the significand of the RHS. Returns the carry flag. */ 743integerPart 744APFloat::addSignificand(const APFloat &rhs) 745{ 746 integerPart *parts; 747 748 parts = significandParts(); 749 750 assert(semantics == rhs.semantics); 751 assert(exponent == rhs.exponent); 752 753 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount()); 754} 755 756/* Subtract the significand of the RHS with a borrow flag. Returns 757 the borrow flag. */ 758integerPart 759APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow) 760{ 761 integerPart *parts; 762 763 parts = significandParts(); 764 765 assert(semantics == rhs.semantics); 766 assert(exponent == rhs.exponent); 767 768 return APInt::tcSubtract(parts, rhs.significandParts(), borrow, 769 partCount()); 770} 771 772/* Multiply the significand of the RHS. If ADDEND is non-NULL, add it 773 on to the full-precision result of the multiplication. Returns the 774 lost fraction. */ 775lostFraction 776APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) 777{ 778 unsigned int omsb; // One, not zero, based MSB. 779 unsigned int partsCount, newPartsCount, precision; 780 integerPart *lhsSignificand; 781 integerPart scratch[4]; 782 integerPart *fullSignificand; 783 lostFraction lost_fraction; 784 785 assert(semantics == rhs.semantics); 786 787 precision = semantics->precision; 788 newPartsCount = partCountForBits(precision * 2); 789 790 if(newPartsCount > 4) 791 fullSignificand = new integerPart[newPartsCount]; 792 else 793 fullSignificand = scratch; 794 795 lhsSignificand = significandParts(); 796 partsCount = partCount(); 797 798 APInt::tcFullMultiply(fullSignificand, lhsSignificand, 799 rhs.significandParts(), partsCount, partsCount); 800 801 lost_fraction = lfExactlyZero; 802 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 803 exponent += rhs.exponent; 804 805 if(addend) { 806 Significand savedSignificand = significand; 807 const fltSemantics *savedSemantics = semantics; 808 fltSemantics extendedSemantics; 809 opStatus status; 810 unsigned int extendedPrecision; 811 812 /* Normalize our MSB. */ 813 extendedPrecision = precision + precision - 1; 814 if(omsb != extendedPrecision) 815 { 816 APInt::tcShiftLeft(fullSignificand, newPartsCount, 817 extendedPrecision - omsb); 818 exponent -= extendedPrecision - omsb; 819 } 820 821 /* Create new semantics. */ 822 extendedSemantics = *semantics; 823 extendedSemantics.precision = extendedPrecision; 824 825 if(newPartsCount == 1) 826 significand.part = fullSignificand[0]; 827 else 828 significand.parts = fullSignificand; 829 semantics = &extendedSemantics; 830 831 APFloat extendedAddend(*addend); 832 status = extendedAddend.convert(extendedSemantics, rmTowardZero); 833 assert(status == opOK); 834 lost_fraction = addOrSubtractSignificand(extendedAddend, false); 835 836 /* Restore our state. */ 837 if(newPartsCount == 1) 838 fullSignificand[0] = significand.part; 839 significand = savedSignificand; 840 semantics = savedSemantics; 841 842 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 843 } 844 845 exponent -= (precision - 1); 846 847 if(omsb > precision) { 848 unsigned int bits, significantParts; 849 lostFraction lf; 850 851 bits = omsb - precision; 852 significantParts = partCountForBits(omsb); 853 lf = shiftRight(fullSignificand, significantParts, bits); 854 lost_fraction = combineLostFractions(lf, lost_fraction); 855 exponent += bits; 856 } 857 858 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); 859 860 if(newPartsCount > 4) 861 delete [] fullSignificand; 862 863 return lost_fraction; 864} 865 866/* Multiply the significands of LHS and RHS to DST. */ 867lostFraction 868APFloat::divideSignificand(const APFloat &rhs) 869{ 870 unsigned int bit, i, partsCount; 871 const integerPart *rhsSignificand; 872 integerPart *lhsSignificand, *dividend, *divisor; 873 integerPart scratch[4]; 874 lostFraction lost_fraction; 875 876 assert(semantics == rhs.semantics); 877 878 lhsSignificand = significandParts(); 879 rhsSignificand = rhs.significandParts(); 880 partsCount = partCount(); 881 882 if(partsCount > 2) 883 dividend = new integerPart[partsCount * 2]; 884 else 885 dividend = scratch; 886 887 divisor = dividend + partsCount; 888 889 /* Copy the dividend and divisor as they will be modified in-place. */ 890 for(i = 0; i < partsCount; i++) { 891 dividend[i] = lhsSignificand[i]; 892 divisor[i] = rhsSignificand[i]; 893 lhsSignificand[i] = 0; 894 } 895 896 exponent -= rhs.exponent; 897 898 unsigned int precision = semantics->precision; 899 900 /* Normalize the divisor. */ 901 bit = precision - APInt::tcMSB(divisor, partsCount) - 1; 902 if(bit) { 903 exponent += bit; 904 APInt::tcShiftLeft(divisor, partsCount, bit); 905 } 906 907 /* Normalize the dividend. */ 908 bit = precision - APInt::tcMSB(dividend, partsCount) - 1; 909 if(bit) { 910 exponent -= bit; 911 APInt::tcShiftLeft(dividend, partsCount, bit); 912 } 913 914 /* Ensure the dividend >= divisor initially for the loop below. 915 Incidentally, this means that the division loop below is 916 guaranteed to set the integer bit to one. */ 917 if(APInt::tcCompare(dividend, divisor, partsCount) < 0) { 918 exponent--; 919 APInt::tcShiftLeft(dividend, partsCount, 1); 920 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); 921 } 922 923 /* Long division. */ 924 for(bit = precision; bit; bit -= 1) { 925 if(APInt::tcCompare(dividend, divisor, partsCount) >= 0) { 926 APInt::tcSubtract(dividend, divisor, 0, partsCount); 927 APInt::tcSetBit(lhsSignificand, bit - 1); 928 } 929 930 APInt::tcShiftLeft(dividend, partsCount, 1); 931 } 932 933 /* Figure out the lost fraction. */ 934 int cmp = APInt::tcCompare(dividend, divisor, partsCount); 935 936 if(cmp > 0) 937 lost_fraction = lfMoreThanHalf; 938 else if(cmp == 0) 939 lost_fraction = lfExactlyHalf; 940 else if(APInt::tcIsZero(dividend, partsCount)) 941 lost_fraction = lfExactlyZero; 942 else 943 lost_fraction = lfLessThanHalf; 944 945 if(partsCount > 2) 946 delete [] dividend; 947 948 return lost_fraction; 949} 950 951unsigned int 952APFloat::significandMSB() const 953{ 954 return APInt::tcMSB(significandParts(), partCount()); 955} 956 957unsigned int 958APFloat::significandLSB() const 959{ 960 return APInt::tcLSB(significandParts(), partCount()); 961} 962 963/* Note that a zero result is NOT normalized to fcZero. */ 964lostFraction 965APFloat::shiftSignificandRight(unsigned int bits) 966{ 967 /* Our exponent should not overflow. */ 968 assert((exponent_t) (exponent + bits) >= exponent); 969 970 exponent += bits; 971 972 return shiftRight(significandParts(), partCount(), bits); 973} 974 975/* Shift the significand left BITS bits, subtract BITS from its exponent. */ 976void 977APFloat::shiftSignificandLeft(unsigned int bits) 978{ 979 assert(bits < semantics->precision); 980 981 if(bits) { 982 unsigned int partsCount = partCount(); 983 984 APInt::tcShiftLeft(significandParts(), partsCount, bits); 985 exponent -= bits; 986 987 assert(!APInt::tcIsZero(significandParts(), partsCount)); 988 } 989} 990 991APFloat::cmpResult 992APFloat::compareAbsoluteValue(const APFloat &rhs) const 993{ 994 int compare; 995 996 assert(semantics == rhs.semantics); 997 assert(category == fcNormal); 998 assert(rhs.category == fcNormal); 999 1000 compare = exponent - rhs.exponent; 1001 1002 /* If exponents are equal, do an unsigned bignum comparison of the 1003 significands. */ 1004 if(compare == 0) 1005 compare = APInt::tcCompare(significandParts(), rhs.significandParts(), 1006 partCount()); 1007 1008 if(compare > 0) 1009 return cmpGreaterThan; 1010 else if(compare < 0) 1011 return cmpLessThan; 1012 else 1013 return cmpEqual; 1014} 1015 1016/* Handle overflow. Sign is preserved. We either become infinity or 1017 the largest finite number. */ 1018APFloat::opStatus 1019APFloat::handleOverflow(roundingMode rounding_mode) 1020{ 1021 /* Infinity? */ 1022 if(rounding_mode == rmNearestTiesToEven 1023 || rounding_mode == rmNearestTiesToAway 1024 || (rounding_mode == rmTowardPositive && !sign) 1025 || (rounding_mode == rmTowardNegative && sign)) 1026 { 1027 category = fcInfinity; 1028 return (opStatus) (opOverflow | opInexact); 1029 } 1030 1031 /* Otherwise we become the largest finite number. */ 1032 category = fcNormal; 1033 exponent = semantics->maxExponent; 1034 APInt::tcSetLeastSignificantBits(significandParts(), partCount(), 1035 semantics->precision); 1036 1037 return opInexact; 1038} 1039 1040/* Returns TRUE if, when truncating the current number, with BIT the 1041 new LSB, with the given lost fraction and rounding mode, the result 1042 would need to be rounded away from zero (i.e., by increasing the 1043 signficand). This routine must work for fcZero of both signs, and 1044 fcNormal numbers. */ 1045bool 1046APFloat::roundAwayFromZero(roundingMode rounding_mode, 1047 lostFraction lost_fraction, 1048 unsigned int bit) const 1049{ 1050 /* NaNs and infinities should not have lost fractions. */ 1051 assert(category == fcNormal || category == fcZero); 1052 1053 /* Current callers never pass this so we don't handle it. */ 1054 assert(lost_fraction != lfExactlyZero); 1055 1056 switch(rounding_mode) { 1057 default: 1058 assert(0); 1059 1060 case rmNearestTiesToAway: 1061 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; 1062 1063 case rmNearestTiesToEven: 1064 if(lost_fraction == lfMoreThanHalf) 1065 return true; 1066 1067 /* Our zeroes don't have a significand to test. */ 1068 if(lost_fraction == lfExactlyHalf && category != fcZero) 1069 return APInt::tcExtractBit(significandParts(), bit); 1070 1071 return false; 1072 1073 case rmTowardZero: 1074 return false; 1075 1076 case rmTowardPositive: 1077 return sign == false; 1078 1079 case rmTowardNegative: 1080 return sign == true; 1081 } 1082} 1083 1084APFloat::opStatus 1085APFloat::normalize(roundingMode rounding_mode, 1086 lostFraction lost_fraction) 1087{ 1088 unsigned int omsb; /* One, not zero, based MSB. */ 1089 int exponentChange; 1090 1091 if(category != fcNormal) 1092 return opOK; 1093 1094 /* Before rounding normalize the exponent of fcNormal numbers. */ 1095 omsb = significandMSB() + 1; 1096 1097 if(omsb) { 1098 /* OMSB is numbered from 1. We want to place it in the integer 1099 bit numbered PRECISON if possible, with a compensating change in 1100 the exponent. */ 1101 exponentChange = omsb - semantics->precision; 1102 1103 /* If the resulting exponent is too high, overflow according to 1104 the rounding mode. */ 1105 if(exponent + exponentChange > semantics->maxExponent) 1106 return handleOverflow(rounding_mode); 1107 1108 /* Subnormal numbers have exponent minExponent, and their MSB 1109 is forced based on that. */ 1110 if(exponent + exponentChange < semantics->minExponent) 1111 exponentChange = semantics->minExponent - exponent; 1112 1113 /* Shifting left is easy as we don't lose precision. */ 1114 if(exponentChange < 0) { 1115 assert(lost_fraction == lfExactlyZero); 1116 1117 shiftSignificandLeft(-exponentChange); 1118 1119 return opOK; 1120 } 1121 1122 if(exponentChange > 0) { 1123 lostFraction lf; 1124 1125 /* Shift right and capture any new lost fraction. */ 1126 lf = shiftSignificandRight(exponentChange); 1127 1128 lost_fraction = combineLostFractions(lf, lost_fraction); 1129 1130 /* Keep OMSB up-to-date. */ 1131 if(omsb > (unsigned) exponentChange) 1132 omsb -= exponentChange; 1133 else 1134 omsb = 0; 1135 } 1136 } 1137 1138 /* Now round the number according to rounding_mode given the lost 1139 fraction. */ 1140 1141 /* As specified in IEEE 754, since we do not trap we do not report 1142 underflow for exact results. */ 1143 if(lost_fraction == lfExactlyZero) { 1144 /* Canonicalize zeroes. */ 1145 if(omsb == 0) 1146 category = fcZero; 1147 1148 return opOK; 1149 } 1150 1151 /* Increment the significand if we're rounding away from zero. */ 1152 if(roundAwayFromZero(rounding_mode, lost_fraction, 0)) { 1153 if(omsb == 0) 1154 exponent = semantics->minExponent; 1155 1156 incrementSignificand(); 1157 omsb = significandMSB() + 1; 1158 1159 /* Did the significand increment overflow? */ 1160 if(omsb == (unsigned) semantics->precision + 1) { 1161 /* Renormalize by incrementing the exponent and shifting our 1162 significand right one. However if we already have the 1163 maximum exponent we overflow to infinity. */ 1164 if(exponent == semantics->maxExponent) { 1165 category = fcInfinity; 1166 1167 return (opStatus) (opOverflow | opInexact); 1168 } 1169 1170 shiftSignificandRight(1); 1171 1172 return opInexact; 1173 } 1174 } 1175 1176 /* The normal case - we were and are not denormal, and any 1177 significand increment above didn't overflow. */ 1178 if(omsb == semantics->precision) 1179 return opInexact; 1180 1181 /* We have a non-zero denormal. */ 1182 assert(omsb < semantics->precision); 1183 1184 /* Canonicalize zeroes. */ 1185 if(omsb == 0) 1186 category = fcZero; 1187 1188 /* The fcZero case is a denormal that underflowed to zero. */ 1189 return (opStatus) (opUnderflow | opInexact); 1190} 1191 1192APFloat::opStatus 1193APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract) 1194{ 1195 switch(convolve(category, rhs.category)) { 1196 default: 1197 assert(0); 1198 1199 case convolve(fcNaN, fcZero): 1200 case convolve(fcNaN, fcNormal): 1201 case convolve(fcNaN, fcInfinity): 1202 case convolve(fcNaN, fcNaN): 1203 case convolve(fcNormal, fcZero): 1204 case convolve(fcInfinity, fcNormal): 1205 case convolve(fcInfinity, fcZero): 1206 return opOK; 1207 1208 case convolve(fcZero, fcNaN): 1209 case convolve(fcNormal, fcNaN): 1210 case convolve(fcInfinity, fcNaN): 1211 category = fcNaN; 1212 copySignificand(rhs); 1213 return opOK; 1214 1215 case convolve(fcNormal, fcInfinity): 1216 case convolve(fcZero, fcInfinity): 1217 category = fcInfinity; 1218 sign = rhs.sign ^ subtract; 1219 return opOK; 1220 1221 case convolve(fcZero, fcNormal): 1222 assign(rhs); 1223 sign = rhs.sign ^ subtract; 1224 return opOK; 1225 1226 case convolve(fcZero, fcZero): 1227 /* Sign depends on rounding mode; handled by caller. */ 1228 return opOK; 1229 1230 case convolve(fcInfinity, fcInfinity): 1231 /* Differently signed infinities can only be validly 1232 subtracted. */ 1233 if((sign ^ rhs.sign) != subtract) { 1234 makeNaN(); 1235 return opInvalidOp; 1236 } 1237 1238 return opOK; 1239 1240 case convolve(fcNormal, fcNormal): 1241 return opDivByZero; 1242 } 1243} 1244 1245/* Add or subtract two normal numbers. */ 1246lostFraction 1247APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract) 1248{ 1249 integerPart carry; 1250 lostFraction lost_fraction; 1251 int bits; 1252 1253 /* Determine if the operation on the absolute values is effectively 1254 an addition or subtraction. */ 1255 subtract ^= (sign ^ rhs.sign) ? true : false; 1256 1257 /* Are we bigger exponent-wise than the RHS? */ 1258 bits = exponent - rhs.exponent; 1259 1260 /* Subtraction is more subtle than one might naively expect. */ 1261 if(subtract) { 1262 APFloat temp_rhs(rhs); 1263 bool reverse; 1264 1265 if (bits == 0) { 1266 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan; 1267 lost_fraction = lfExactlyZero; 1268 } else if (bits > 0) { 1269 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); 1270 shiftSignificandLeft(1); 1271 reverse = false; 1272 } else { 1273 lost_fraction = shiftSignificandRight(-bits - 1); 1274 temp_rhs.shiftSignificandLeft(1); 1275 reverse = true; 1276 } 1277 1278 if (reverse) { 1279 carry = temp_rhs.subtractSignificand 1280 (*this, lost_fraction != lfExactlyZero); 1281 copySignificand(temp_rhs); 1282 sign = !sign; 1283 } else { 1284 carry = subtractSignificand 1285 (temp_rhs, lost_fraction != lfExactlyZero); 1286 } 1287 1288 /* Invert the lost fraction - it was on the RHS and 1289 subtracted. */ 1290 if(lost_fraction == lfLessThanHalf) 1291 lost_fraction = lfMoreThanHalf; 1292 else if(lost_fraction == lfMoreThanHalf) 1293 lost_fraction = lfLessThanHalf; 1294 1295 /* The code above is intended to ensure that no borrow is 1296 necessary. */ 1297 assert(!carry); 1298 } else { 1299 if(bits > 0) { 1300 APFloat temp_rhs(rhs); 1301 1302 lost_fraction = temp_rhs.shiftSignificandRight(bits); 1303 carry = addSignificand(temp_rhs); 1304 } else { 1305 lost_fraction = shiftSignificandRight(-bits); 1306 carry = addSignificand(rhs); 1307 } 1308 1309 /* We have a guard bit; generating a carry cannot happen. */ 1310 assert(!carry); 1311 } 1312 1313 return lost_fraction; 1314} 1315 1316APFloat::opStatus 1317APFloat::multiplySpecials(const APFloat &rhs) 1318{ 1319 switch(convolve(category, rhs.category)) { 1320 default: 1321 assert(0); 1322 1323 case convolve(fcNaN, fcZero): 1324 case convolve(fcNaN, fcNormal): 1325 case convolve(fcNaN, fcInfinity): 1326 case convolve(fcNaN, fcNaN): 1327 return opOK; 1328 1329 case convolve(fcZero, fcNaN): 1330 case convolve(fcNormal, fcNaN): 1331 case convolve(fcInfinity, fcNaN): 1332 category = fcNaN; 1333 copySignificand(rhs); 1334 return opOK; 1335 1336 case convolve(fcNormal, fcInfinity): 1337 case convolve(fcInfinity, fcNormal): 1338 case convolve(fcInfinity, fcInfinity): 1339 category = fcInfinity; 1340 return opOK; 1341 1342 case convolve(fcZero, fcNormal): 1343 case convolve(fcNormal, fcZero): 1344 case convolve(fcZero, fcZero): 1345 category = fcZero; 1346 return opOK; 1347 1348 case convolve(fcZero, fcInfinity): 1349 case convolve(fcInfinity, fcZero): 1350 makeNaN(); 1351 return opInvalidOp; 1352 1353 case convolve(fcNormal, fcNormal): 1354 return opOK; 1355 } 1356} 1357 1358APFloat::opStatus 1359APFloat::divideSpecials(const APFloat &rhs) 1360{ 1361 switch(convolve(category, rhs.category)) { 1362 default: 1363 assert(0); 1364 1365 case convolve(fcNaN, fcZero): 1366 case convolve(fcNaN, fcNormal): 1367 case convolve(fcNaN, fcInfinity): 1368 case convolve(fcNaN, fcNaN): 1369 case convolve(fcInfinity, fcZero): 1370 case convolve(fcInfinity, fcNormal): 1371 case convolve(fcZero, fcInfinity): 1372 case convolve(fcZero, fcNormal): 1373 return opOK; 1374 1375 case convolve(fcZero, fcNaN): 1376 case convolve(fcNormal, fcNaN): 1377 case convolve(fcInfinity, fcNaN): 1378 category = fcNaN; 1379 copySignificand(rhs); 1380 return opOK; 1381 1382 case convolve(fcNormal, fcInfinity): 1383 category = fcZero; 1384 return opOK; 1385 1386 case convolve(fcNormal, fcZero): 1387 category = fcInfinity; 1388 return opDivByZero; 1389 1390 case convolve(fcInfinity, fcInfinity): 1391 case convolve(fcZero, fcZero): 1392 makeNaN(); 1393 return opInvalidOp; 1394 1395 case convolve(fcNormal, fcNormal): 1396 return opOK; 1397 } 1398} 1399 1400/* Change sign. */ 1401void 1402APFloat::changeSign() 1403{ 1404 /* Look mummy, this one's easy. */ 1405 sign = !sign; 1406} 1407 1408void 1409APFloat::clearSign() 1410{ 1411 /* So is this one. */ 1412 sign = 0; 1413} 1414 1415void 1416APFloat::copySign(const APFloat &rhs) 1417{ 1418 /* And this one. */ 1419 sign = rhs.sign; 1420} 1421 1422/* Normalized addition or subtraction. */ 1423APFloat::opStatus 1424APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode, 1425 bool subtract) 1426{ 1427 opStatus fs; 1428 1429 assertArithmeticOK(*semantics); 1430 1431 fs = addOrSubtractSpecials(rhs, subtract); 1432 1433 /* This return code means it was not a simple case. */ 1434 if(fs == opDivByZero) { 1435 lostFraction lost_fraction; 1436 1437 lost_fraction = addOrSubtractSignificand(rhs, subtract); 1438 fs = normalize(rounding_mode, lost_fraction); 1439 1440 /* Can only be zero if we lost no fraction. */ 1441 assert(category != fcZero || lost_fraction == lfExactlyZero); 1442 } 1443 1444 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1445 positive zero unless rounding to minus infinity, except that 1446 adding two like-signed zeroes gives that zero. */ 1447 if(category == fcZero) { 1448 if(rhs.category != fcZero || (sign == rhs.sign) == subtract) 1449 sign = (rounding_mode == rmTowardNegative); 1450 } 1451 1452 return fs; 1453} 1454 1455/* Normalized addition. */ 1456APFloat::opStatus 1457APFloat::add(const APFloat &rhs, roundingMode rounding_mode) 1458{ 1459 return addOrSubtract(rhs, rounding_mode, false); 1460} 1461 1462/* Normalized subtraction. */ 1463APFloat::opStatus 1464APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode) 1465{ 1466 return addOrSubtract(rhs, rounding_mode, true); 1467} 1468 1469/* Normalized multiply. */ 1470APFloat::opStatus 1471APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode) 1472{ 1473 opStatus fs; 1474 1475 assertArithmeticOK(*semantics); 1476 sign ^= rhs.sign; 1477 fs = multiplySpecials(rhs); 1478 1479 if(category == fcNormal) { 1480 lostFraction lost_fraction = multiplySignificand(rhs, 0); 1481 fs = normalize(rounding_mode, lost_fraction); 1482 if(lost_fraction != lfExactlyZero) 1483 fs = (opStatus) (fs | opInexact); 1484 } 1485 1486 return fs; 1487} 1488 1489/* Normalized divide. */ 1490APFloat::opStatus 1491APFloat::divide(const APFloat &rhs, roundingMode rounding_mode) 1492{ 1493 opStatus fs; 1494 1495 assertArithmeticOK(*semantics); 1496 sign ^= rhs.sign; 1497 fs = divideSpecials(rhs); 1498 1499 if(category == fcNormal) { 1500 lostFraction lost_fraction = divideSignificand(rhs); 1501 fs = normalize(rounding_mode, lost_fraction); 1502 if(lost_fraction != lfExactlyZero) 1503 fs = (opStatus) (fs | opInexact); 1504 } 1505 1506 return fs; 1507} 1508 1509/* Normalized remainder. This is not currently doing TRT. */ 1510APFloat::opStatus 1511APFloat::mod(const APFloat &rhs, roundingMode rounding_mode) 1512{ 1513 opStatus fs; 1514 APFloat V = *this; 1515 unsigned int origSign = sign; 1516 1517 assertArithmeticOK(*semantics); 1518 fs = V.divide(rhs, rmNearestTiesToEven); 1519 if (fs == opDivByZero) 1520 return fs; 1521 1522 int parts = partCount(); 1523 integerPart *x = new integerPart[parts]; 1524 fs = V.convertToInteger(x, parts * integerPartWidth, true, 1525 rmNearestTiesToEven); 1526 if (fs==opInvalidOp) 1527 return fs; 1528 1529 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true, 1530 rmNearestTiesToEven); 1531 assert(fs==opOK); // should always work 1532 1533 fs = V.multiply(rhs, rounding_mode); 1534 assert(fs==opOK || fs==opInexact); // should not overflow or underflow 1535 1536 fs = subtract(V, rounding_mode); 1537 assert(fs==opOK || fs==opInexact); // likewise 1538 1539 if (isZero()) 1540 sign = origSign; // IEEE754 requires this 1541 delete[] x; 1542 return fs; 1543} 1544 1545/* Normalized fused-multiply-add. */ 1546APFloat::opStatus 1547APFloat::fusedMultiplyAdd(const APFloat &multiplicand, 1548 const APFloat &addend, 1549 roundingMode rounding_mode) 1550{ 1551 opStatus fs; 1552 1553 assertArithmeticOK(*semantics); 1554 1555 /* Post-multiplication sign, before addition. */ 1556 sign ^= multiplicand.sign; 1557 1558 /* If and only if all arguments are normal do we need to do an 1559 extended-precision calculation. */ 1560 if(category == fcNormal 1561 && multiplicand.category == fcNormal 1562 && addend.category == fcNormal) { 1563 lostFraction lost_fraction; 1564 1565 lost_fraction = multiplySignificand(multiplicand, &addend); 1566 fs = normalize(rounding_mode, lost_fraction); 1567 if(lost_fraction != lfExactlyZero) 1568 fs = (opStatus) (fs | opInexact); 1569 1570 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1571 positive zero unless rounding to minus infinity, except that 1572 adding two like-signed zeroes gives that zero. */ 1573 if(category == fcZero && sign != addend.sign) 1574 sign = (rounding_mode == rmTowardNegative); 1575 } else { 1576 fs = multiplySpecials(multiplicand); 1577 1578 /* FS can only be opOK or opInvalidOp. There is no more work 1579 to do in the latter case. The IEEE-754R standard says it is 1580 implementation-defined in this case whether, if ADDEND is a 1581 quiet NaN, we raise invalid op; this implementation does so. 1582 1583 If we need to do the addition we can do so with normal 1584 precision. */ 1585 if(fs == opOK) 1586 fs = addOrSubtract(addend, rounding_mode, false); 1587 } 1588 1589 return fs; 1590} 1591 1592/* Comparison requires normalized numbers. */ 1593APFloat::cmpResult 1594APFloat::compare(const APFloat &rhs) const 1595{ 1596 cmpResult result; 1597 1598 assertArithmeticOK(*semantics); 1599 assert(semantics == rhs.semantics); 1600 1601 switch(convolve(category, rhs.category)) { 1602 default: 1603 assert(0); 1604 1605 case convolve(fcNaN, fcZero): 1606 case convolve(fcNaN, fcNormal): 1607 case convolve(fcNaN, fcInfinity): 1608 case convolve(fcNaN, fcNaN): 1609 case convolve(fcZero, fcNaN): 1610 case convolve(fcNormal, fcNaN): 1611 case convolve(fcInfinity, fcNaN): 1612 return cmpUnordered; 1613 1614 case convolve(fcInfinity, fcNormal): 1615 case convolve(fcInfinity, fcZero): 1616 case convolve(fcNormal, fcZero): 1617 if(sign) 1618 return cmpLessThan; 1619 else 1620 return cmpGreaterThan; 1621 1622 case convolve(fcNormal, fcInfinity): 1623 case convolve(fcZero, fcInfinity): 1624 case convolve(fcZero, fcNormal): 1625 if(rhs.sign) 1626 return cmpGreaterThan; 1627 else 1628 return cmpLessThan; 1629 1630 case convolve(fcInfinity, fcInfinity): 1631 if(sign == rhs.sign) 1632 return cmpEqual; 1633 else if(sign) 1634 return cmpLessThan; 1635 else 1636 return cmpGreaterThan; 1637 1638 case convolve(fcZero, fcZero): 1639 return cmpEqual; 1640 1641 case convolve(fcNormal, fcNormal): 1642 break; 1643 } 1644 1645 /* Two normal numbers. Do they have the same sign? */ 1646 if(sign != rhs.sign) { 1647 if(sign) 1648 result = cmpLessThan; 1649 else 1650 result = cmpGreaterThan; 1651 } else { 1652 /* Compare absolute values; invert result if negative. */ 1653 result = compareAbsoluteValue(rhs); 1654 1655 if(sign) { 1656 if(result == cmpLessThan) 1657 result = cmpGreaterThan; 1658 else if(result == cmpGreaterThan) 1659 result = cmpLessThan; 1660 } 1661 } 1662 1663 return result; 1664} 1665 1666APFloat::opStatus 1667APFloat::convert(const fltSemantics &toSemantics, 1668 roundingMode rounding_mode) 1669{ 1670 lostFraction lostFraction; 1671 unsigned int newPartCount, oldPartCount; 1672 opStatus fs; 1673 1674 assertArithmeticOK(*semantics); 1675 lostFraction = lfExactlyZero; 1676 newPartCount = partCountForBits(toSemantics.precision + 1); 1677 oldPartCount = partCount(); 1678 1679 /* Handle storage complications. If our new form is wider, 1680 re-allocate our bit pattern into wider storage. If it is 1681 narrower, we ignore the excess parts, but if narrowing to a 1682 single part we need to free the old storage. 1683 Be careful not to reference significandParts for zeroes 1684 and infinities, since it aborts. */ 1685 if (newPartCount > oldPartCount) { 1686 integerPart *newParts; 1687 newParts = new integerPart[newPartCount]; 1688 APInt::tcSet(newParts, 0, newPartCount); 1689 if (category==fcNormal || category==fcNaN) 1690 APInt::tcAssign(newParts, significandParts(), oldPartCount); 1691 freeSignificand(); 1692 significand.parts = newParts; 1693 } else if (newPartCount < oldPartCount) { 1694 /* Capture any lost fraction through truncation of parts so we get 1695 correct rounding whilst normalizing. */ 1696 if (category==fcNormal) 1697 lostFraction = lostFractionThroughTruncation 1698 (significandParts(), oldPartCount, toSemantics.precision); 1699 if (newPartCount == 1) { 1700 integerPart newPart = 0; 1701 if (category==fcNormal || category==fcNaN) 1702 newPart = significandParts()[0]; 1703 freeSignificand(); 1704 significand.part = newPart; 1705 } 1706 } 1707 1708 if(category == fcNormal) { 1709 /* Re-interpret our bit-pattern. */ 1710 exponent += toSemantics.precision - semantics->precision; 1711 semantics = &toSemantics; 1712 fs = normalize(rounding_mode, lostFraction); 1713 } else if (category == fcNaN) { 1714 int shift = toSemantics.precision - semantics->precision; 1715 // No normalization here, just truncate 1716 if (shift>0) 1717 APInt::tcShiftLeft(significandParts(), newPartCount, shift); 1718 else if (shift < 0) 1719 APInt::tcShiftRight(significandParts(), newPartCount, -shift); 1720 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan) 1721 // does not give you back the same bits. This is dubious, and we 1722 // don't currently do it. You're really supposed to get 1723 // an invalid operation signal at runtime, but nobody does that. 1724 semantics = &toSemantics; 1725 fs = opOK; 1726 } else { 1727 semantics = &toSemantics; 1728 fs = opOK; 1729 } 1730 1731 return fs; 1732} 1733 1734/* Convert a floating point number to an integer according to the 1735 rounding mode. If the rounded integer value is out of range this 1736 returns an invalid operation exception and the contents of the 1737 destination parts are unspecified. If the rounded value is in 1738 range but the floating point number is not the exact integer, the C 1739 standard doesn't require an inexact exception to be raised. IEEE 1740 854 does require it so we do that. 1741 1742 Note that for conversions to integer type the C standard requires 1743 round-to-zero to always be used. */ 1744APFloat::opStatus 1745APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width, 1746 bool isSigned, 1747 roundingMode rounding_mode) const 1748{ 1749 lostFraction lost_fraction; 1750 const integerPart *src; 1751 unsigned int dstPartsCount, truncatedBits; 1752 1753 /* Handle the three special cases first. */ 1754 if(category == fcInfinity || category == fcNaN) 1755 return opInvalidOp; 1756 1757 dstPartsCount = partCountForBits(width); 1758 1759 if(category == fcZero) { 1760 APInt::tcSet(parts, 0, dstPartsCount); 1761 return opOK; 1762 } 1763 1764 src = significandParts(); 1765 1766 /* Step 1: place our absolute value, with any fraction truncated, in 1767 the destination. */ 1768 if (exponent < 0) { 1769 /* Our absolute value is less than one; truncate everything. */ 1770 APInt::tcSet(parts, 0, dstPartsCount); 1771 truncatedBits = semantics->precision; 1772 } else { 1773 /* We want the most significant (exponent + 1) bits; the rest are 1774 truncated. */ 1775 unsigned int bits = exponent + 1U; 1776 1777 /* Hopelessly large in magnitude? */ 1778 if (bits > width) 1779 return opInvalidOp; 1780 1781 if (bits < semantics->precision) { 1782 /* We truncate (semantics->precision - bits) bits. */ 1783 truncatedBits = semantics->precision - bits; 1784 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits); 1785 } else { 1786 /* We want at least as many bits as are available. */ 1787 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0); 1788 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision); 1789 truncatedBits = 0; 1790 } 1791 } 1792 1793 /* Step 2: work out any lost fraction, and increment the absolute 1794 value if we would round away from zero. */ 1795 if (truncatedBits) { 1796 lost_fraction = lostFractionThroughTruncation(src, partCount(), 1797 truncatedBits); 1798 if (lost_fraction != lfExactlyZero 1799 && roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { 1800 if (APInt::tcIncrement(parts, dstPartsCount)) 1801 return opInvalidOp; /* Overflow. */ 1802 } 1803 } else { 1804 lost_fraction = lfExactlyZero; 1805 } 1806 1807 /* Step 3: check if we fit in the destination. */ 1808 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1; 1809 1810 if (sign) { 1811 if (!isSigned) { 1812 /* Negative numbers cannot be represented as unsigned. */ 1813 if (omsb != 0) 1814 return opInvalidOp; 1815 } else { 1816 /* It takes omsb bits to represent the unsigned integer value. 1817 We lose a bit for the sign, but care is needed as the 1818 maximally negative integer is a special case. */ 1819 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb) 1820 return opInvalidOp; 1821 1822 /* This case can happen because of rounding. */ 1823 if (omsb > width) 1824 return opInvalidOp; 1825 } 1826 1827 APInt::tcNegate (parts, dstPartsCount); 1828 } else { 1829 if (omsb >= width + !isSigned) 1830 return opInvalidOp; 1831 } 1832 1833 if (lost_fraction == lfExactlyZero) 1834 return opOK; 1835 else 1836 return opInexact; 1837} 1838 1839/* Same as convertToSignExtendedInteger, except we provide 1840 deterministic values in case of an invalid operation exception, 1841 namely zero for NaNs and the minimal or maximal value respectively 1842 for underflow or overflow. */ 1843APFloat::opStatus 1844APFloat::convertToInteger(integerPart *parts, unsigned int width, 1845 bool isSigned, 1846 roundingMode rounding_mode) const 1847{ 1848 opStatus fs; 1849 1850 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode); 1851 1852 if (fs == opInvalidOp) { 1853 unsigned int bits, dstPartsCount; 1854 1855 dstPartsCount = partCountForBits(width); 1856 1857 if (category == fcNaN) 1858 bits = 0; 1859 else if (sign) 1860 bits = isSigned; 1861 else 1862 bits = width - isSigned; 1863 1864 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits); 1865 if (sign && isSigned) 1866 APInt::tcShiftLeft(parts, dstPartsCount, width - 1); 1867 } 1868 1869 return fs; 1870} 1871 1872/* Convert an unsigned integer SRC to a floating point number, 1873 rounding according to ROUNDING_MODE. The sign of the floating 1874 point number is not modified. */ 1875APFloat::opStatus 1876APFloat::convertFromUnsignedParts(const integerPart *src, 1877 unsigned int srcCount, 1878 roundingMode rounding_mode) 1879{ 1880 unsigned int omsb, precision, dstCount; 1881 integerPart *dst; 1882 lostFraction lost_fraction; 1883 1884 assertArithmeticOK(*semantics); 1885 category = fcNormal; 1886 omsb = APInt::tcMSB(src, srcCount) + 1; 1887 dst = significandParts(); 1888 dstCount = partCount(); 1889 precision = semantics->precision; 1890 1891 /* We want the most significant PRECISON bits of SRC. There may not 1892 be that many; extract what we can. */ 1893 if (precision <= omsb) { 1894 exponent = omsb - 1; 1895 lost_fraction = lostFractionThroughTruncation(src, srcCount, 1896 omsb - precision); 1897 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision); 1898 } else { 1899 exponent = precision - 1; 1900 lost_fraction = lfExactlyZero; 1901 APInt::tcExtract(dst, dstCount, src, omsb, 0); 1902 } 1903 1904 return normalize(rounding_mode, lost_fraction); 1905} 1906 1907/* Convert a two's complement integer SRC to a floating point number, 1908 rounding according to ROUNDING_MODE. ISSIGNED is true if the 1909 integer is signed, in which case it must be sign-extended. */ 1910APFloat::opStatus 1911APFloat::convertFromSignExtendedInteger(const integerPart *src, 1912 unsigned int srcCount, 1913 bool isSigned, 1914 roundingMode rounding_mode) 1915{ 1916 opStatus status; 1917 1918 assertArithmeticOK(*semantics); 1919 if (isSigned 1920 && APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { 1921 integerPart *copy; 1922 1923 /* If we're signed and negative negate a copy. */ 1924 sign = true; 1925 copy = new integerPart[srcCount]; 1926 APInt::tcAssign(copy, src, srcCount); 1927 APInt::tcNegate(copy, srcCount); 1928 status = convertFromUnsignedParts(copy, srcCount, rounding_mode); 1929 delete [] copy; 1930 } else { 1931 sign = false; 1932 status = convertFromUnsignedParts(src, srcCount, rounding_mode); 1933 } 1934 1935 return status; 1936} 1937 1938/* FIXME: should this just take a const APInt reference? */ 1939APFloat::opStatus 1940APFloat::convertFromZeroExtendedInteger(const integerPart *parts, 1941 unsigned int width, bool isSigned, 1942 roundingMode rounding_mode) 1943{ 1944 unsigned int partCount = partCountForBits(width); 1945 APInt api = APInt(width, partCount, parts); 1946 1947 sign = false; 1948 if(isSigned && APInt::tcExtractBit(parts, width - 1)) { 1949 sign = true; 1950 api = -api; 1951 } 1952 1953 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 1954} 1955 1956APFloat::opStatus 1957APFloat::convertFromHexadecimalString(const char *p, 1958 roundingMode rounding_mode) 1959{ 1960 lostFraction lost_fraction; 1961 integerPart *significand; 1962 unsigned int bitPos, partsCount; 1963 const char *dot, *firstSignificantDigit; 1964 1965 zeroSignificand(); 1966 exponent = 0; 1967 category = fcNormal; 1968 1969 significand = significandParts(); 1970 partsCount = partCount(); 1971 bitPos = partsCount * integerPartWidth; 1972 1973 /* Skip leading zeroes and any (hexa)decimal point. */ 1974 p = skipLeadingZeroesAndAnyDot(p, &dot); 1975 firstSignificantDigit = p; 1976 1977 for(;;) { 1978 integerPart hex_value; 1979 1980 if(*p == '.') { 1981 assert(dot == 0); 1982 dot = p++; 1983 } 1984 1985 hex_value = hexDigitValue(*p); 1986 if(hex_value == -1U) { 1987 lost_fraction = lfExactlyZero; 1988 break; 1989 } 1990 1991 p++; 1992 1993 /* Store the number whilst 4-bit nibbles remain. */ 1994 if(bitPos) { 1995 bitPos -= 4; 1996 hex_value <<= bitPos % integerPartWidth; 1997 significand[bitPos / integerPartWidth] |= hex_value; 1998 } else { 1999 lost_fraction = trailingHexadecimalFraction(p, hex_value); 2000 while(hexDigitValue(*p) != -1U) 2001 p++; 2002 break; 2003 } 2004 } 2005 2006 /* Hex floats require an exponent but not a hexadecimal point. */ 2007 assert(*p == 'p' || *p == 'P'); 2008 2009 /* Ignore the exponent if we are zero. */ 2010 if(p != firstSignificantDigit) { 2011 int expAdjustment; 2012 2013 /* Implicit hexadecimal point? */ 2014 if(!dot) 2015 dot = p; 2016 2017 /* Calculate the exponent adjustment implicit in the number of 2018 significant digits. */ 2019 expAdjustment = dot - firstSignificantDigit; 2020 if(expAdjustment < 0) 2021 expAdjustment++; 2022 expAdjustment = expAdjustment * 4 - 1; 2023 2024 /* Adjust for writing the significand starting at the most 2025 significant nibble. */ 2026 expAdjustment += semantics->precision; 2027 expAdjustment -= partsCount * integerPartWidth; 2028 2029 /* Adjust for the given exponent. */ 2030 exponent = totalExponent(p, expAdjustment); 2031 } 2032 2033 return normalize(rounding_mode, lost_fraction); 2034} 2035 2036APFloat::opStatus 2037APFloat::roundSignificandWithExponent(const integerPart *decSigParts, 2038 unsigned sigPartCount, int exp, 2039 roundingMode rounding_mode) 2040{ 2041 unsigned int parts, pow5PartCount; 2042 fltSemantics calcSemantics = { 32767, -32767, 0, true }; 2043 integerPart pow5Parts[maxPowerOfFiveParts]; 2044 bool isNearest; 2045 2046 isNearest = (rounding_mode == rmNearestTiesToEven 2047 || rounding_mode == rmNearestTiesToAway); 2048 2049 parts = partCountForBits(semantics->precision + 11); 2050 2051 /* Calculate pow(5, abs(exp)). */ 2052 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp); 2053 2054 for (;; parts *= 2) { 2055 opStatus sigStatus, powStatus; 2056 unsigned int excessPrecision, truncatedBits; 2057 2058 calcSemantics.precision = parts * integerPartWidth - 1; 2059 excessPrecision = calcSemantics.precision - semantics->precision; 2060 truncatedBits = excessPrecision; 2061 2062 APFloat decSig(calcSemantics, fcZero, sign); 2063 APFloat pow5(calcSemantics, fcZero, false); 2064 2065 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount, 2066 rmNearestTiesToEven); 2067 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount, 2068 rmNearestTiesToEven); 2069 /* Add exp, as 10^n = 5^n * 2^n. */ 2070 decSig.exponent += exp; 2071 2072 lostFraction calcLostFraction; 2073 integerPart HUerr, HUdistance, powHUerr; 2074 2075 if (exp >= 0) { 2076 /* multiplySignificand leaves the precision-th bit set to 1. */ 2077 calcLostFraction = decSig.multiplySignificand(pow5, NULL); 2078 powHUerr = powStatus != opOK; 2079 } else { 2080 calcLostFraction = decSig.divideSignificand(pow5); 2081 /* Denormal numbers have less precision. */ 2082 if (decSig.exponent < semantics->minExponent) { 2083 excessPrecision += (semantics->minExponent - decSig.exponent); 2084 truncatedBits = excessPrecision; 2085 if (excessPrecision > calcSemantics.precision) 2086 excessPrecision = calcSemantics.precision; 2087 } 2088 /* Extra half-ulp lost in reciprocal of exponent. */ 2089 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0: 2; 2090 } 2091 2092 /* Both multiplySignificand and divideSignificand return the 2093 result with the integer bit set. */ 2094 assert (APInt::tcExtractBit 2095 (decSig.significandParts(), calcSemantics.precision - 1) == 1); 2096 2097 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK, 2098 powHUerr); 2099 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(), 2100 excessPrecision, isNearest); 2101 2102 /* Are we guaranteed to round correctly if we truncate? */ 2103 if (HUdistance >= HUerr) { 2104 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(), 2105 calcSemantics.precision - excessPrecision, 2106 excessPrecision); 2107 /* Take the exponent of decSig. If we tcExtract-ed less bits 2108 above we must adjust our exponent to compensate for the 2109 implicit right shift. */ 2110 exponent = (decSig.exponent + semantics->precision 2111 - (calcSemantics.precision - excessPrecision)); 2112 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(), 2113 decSig.partCount(), 2114 truncatedBits); 2115 return normalize(rounding_mode, calcLostFraction); 2116 } 2117 } 2118} 2119 2120APFloat::opStatus 2121APFloat::convertFromDecimalString(const char *p, roundingMode rounding_mode) 2122{ 2123 decimalInfo D; 2124 opStatus fs; 2125 2126 /* Scan the text. */ 2127 interpretDecimal(p, &D); 2128 2129 /* Handle the quick cases. First the case of no significant digits, 2130 i.e. zero, and then exponents that are obviously too large or too 2131 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp 2132 definitely overflows if 2133 2134 (exp - 1) * L >= maxExponent 2135 2136 and definitely underflows to zero where 2137 2138 (exp + 1) * L <= minExponent - precision 2139 2140 With integer arithmetic the tightest bounds for L are 2141 2142 93/28 < L < 196/59 [ numerator <= 256 ] 2143 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] 2144 */ 2145 2146 if (*D.firstSigDigit == '0') { 2147 category = fcZero; 2148 fs = opOK; 2149 } else if ((D.normalizedExponent + 1) * 28738 2150 <= 8651 * (semantics->minExponent - (int) semantics->precision)) { 2151 /* Underflow to zero and round. */ 2152 zeroSignificand(); 2153 fs = normalize(rounding_mode, lfLessThanHalf); 2154 } else if ((D.normalizedExponent - 1) * 42039 2155 >= 12655 * semantics->maxExponent) { 2156 /* Overflow and round. */ 2157 fs = handleOverflow(rounding_mode); 2158 } else { 2159 integerPart *decSignificand; 2160 unsigned int partCount; 2161 2162 /* A tight upper bound on number of bits required to hold an 2163 N-digit decimal integer is N * 196 / 59. Allocate enough space 2164 to hold the full significand, and an extra part required by 2165 tcMultiplyPart. */ 2166 partCount = (D.lastSigDigit - D.firstSigDigit) + 1; 2167 partCount = partCountForBits(1 + 196 * partCount / 59); 2168 decSignificand = new integerPart[partCount + 1]; 2169 partCount = 0; 2170 2171 /* Convert to binary efficiently - we do almost all multiplication 2172 in an integerPart. When this would overflow do we do a single 2173 bignum multiplication, and then revert again to multiplication 2174 in an integerPart. */ 2175 do { 2176 integerPart decValue, val, multiplier; 2177 2178 val = 0; 2179 multiplier = 1; 2180 2181 do { 2182 if (*p == '.') 2183 p++; 2184 2185 decValue = decDigitValue(*p++); 2186 multiplier *= 10; 2187 val = val * 10 + decValue; 2188 /* The maximum number that can be multiplied by ten with any 2189 digit added without overflowing an integerPart. */ 2190 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10); 2191 2192 /* Multiply out the current part. */ 2193 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val, 2194 partCount, partCount + 1, false); 2195 2196 /* If we used another part (likely but not guaranteed), increase 2197 the count. */ 2198 if (decSignificand[partCount]) 2199 partCount++; 2200 } while (p <= D.lastSigDigit); 2201 2202 category = fcNormal; 2203 fs = roundSignificandWithExponent(decSignificand, partCount, 2204 D.exponent, rounding_mode); 2205 2206 delete [] decSignificand; 2207 } 2208 2209 return fs; 2210} 2211 2212APFloat::opStatus 2213APFloat::convertFromString(const char *p, roundingMode rounding_mode) 2214{ 2215 assertArithmeticOK(*semantics); 2216 2217 /* Handle a leading minus sign. */ 2218 if(*p == '-') 2219 sign = 1, p++; 2220 else 2221 sign = 0; 2222 2223 if(p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) 2224 return convertFromHexadecimalString(p + 2, rounding_mode); 2225 else 2226 return convertFromDecimalString(p, rounding_mode); 2227} 2228 2229/* Write out a hexadecimal representation of the floating point value 2230 to DST, which must be of sufficient size, in the C99 form 2231 [-]0xh.hhhhp[+-]d. Return the number of characters written, 2232 excluding the terminating NUL. 2233 2234 If UPPERCASE, the output is in upper case, otherwise in lower case. 2235 2236 HEXDIGITS digits appear altogether, rounding the value if 2237 necessary. If HEXDIGITS is 0, the minimal precision to display the 2238 number precisely is used instead. If nothing would appear after 2239 the decimal point it is suppressed. 2240 2241 The decimal exponent is always printed and has at least one digit. 2242 Zero values display an exponent of zero. Infinities and NaNs 2243 appear as "infinity" or "nan" respectively. 2244 2245 The above rules are as specified by C99. There is ambiguity about 2246 what the leading hexadecimal digit should be. This implementation 2247 uses whatever is necessary so that the exponent is displayed as 2248 stored. This implies the exponent will fall within the IEEE format 2249 range, and the leading hexadecimal digit will be 0 (for denormals), 2250 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with 2251 any other digits zero). 2252*/ 2253unsigned int 2254APFloat::convertToHexString(char *dst, unsigned int hexDigits, 2255 bool upperCase, roundingMode rounding_mode) const 2256{ 2257 char *p; 2258 2259 assertArithmeticOK(*semantics); 2260 2261 p = dst; 2262 if (sign) 2263 *dst++ = '-'; 2264 2265 switch (category) { 2266 case fcInfinity: 2267 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); 2268 dst += sizeof infinityL - 1; 2269 break; 2270 2271 case fcNaN: 2272 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); 2273 dst += sizeof NaNU - 1; 2274 break; 2275 2276 case fcZero: 2277 *dst++ = '0'; 2278 *dst++ = upperCase ? 'X': 'x'; 2279 *dst++ = '0'; 2280 if (hexDigits > 1) { 2281 *dst++ = '.'; 2282 memset (dst, '0', hexDigits - 1); 2283 dst += hexDigits - 1; 2284 } 2285 *dst++ = upperCase ? 'P': 'p'; 2286 *dst++ = '0'; 2287 break; 2288 2289 case fcNormal: 2290 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); 2291 break; 2292 } 2293 2294 *dst = 0; 2295 2296 return dst - p; 2297} 2298 2299/* Does the hard work of outputting the correctly rounded hexadecimal 2300 form of a normal floating point number with the specified number of 2301 hexadecimal digits. If HEXDIGITS is zero the minimum number of 2302 digits necessary to print the value precisely is output. */ 2303char * 2304APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, 2305 bool upperCase, 2306 roundingMode rounding_mode) const 2307{ 2308 unsigned int count, valueBits, shift, partsCount, outputDigits; 2309 const char *hexDigitChars; 2310 const integerPart *significand; 2311 char *p; 2312 bool roundUp; 2313 2314 *dst++ = '0'; 2315 *dst++ = upperCase ? 'X': 'x'; 2316 2317 roundUp = false; 2318 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower; 2319 2320 significand = significandParts(); 2321 partsCount = partCount(); 2322 2323 /* +3 because the first digit only uses the single integer bit, so 2324 we have 3 virtual zero most-significant-bits. */ 2325 valueBits = semantics->precision + 3; 2326 shift = integerPartWidth - valueBits % integerPartWidth; 2327 2328 /* The natural number of digits required ignoring trailing 2329 insignificant zeroes. */ 2330 outputDigits = (valueBits - significandLSB () + 3) / 4; 2331 2332 /* hexDigits of zero means use the required number for the 2333 precision. Otherwise, see if we are truncating. If we are, 2334 find out if we need to round away from zero. */ 2335 if (hexDigits) { 2336 if (hexDigits < outputDigits) { 2337 /* We are dropping non-zero bits, so need to check how to round. 2338 "bits" is the number of dropped bits. */ 2339 unsigned int bits; 2340 lostFraction fraction; 2341 2342 bits = valueBits - hexDigits * 4; 2343 fraction = lostFractionThroughTruncation (significand, partsCount, bits); 2344 roundUp = roundAwayFromZero(rounding_mode, fraction, bits); 2345 } 2346 outputDigits = hexDigits; 2347 } 2348 2349 /* Write the digits consecutively, and start writing in the location 2350 of the hexadecimal point. We move the most significant digit 2351 left and add the hexadecimal point later. */ 2352 p = ++dst; 2353 2354 count = (valueBits + integerPartWidth - 1) / integerPartWidth; 2355 2356 while (outputDigits && count) { 2357 integerPart part; 2358 2359 /* Put the most significant integerPartWidth bits in "part". */ 2360 if (--count == partsCount) 2361 part = 0; /* An imaginary higher zero part. */ 2362 else 2363 part = significand[count] << shift; 2364 2365 if (count && shift) 2366 part |= significand[count - 1] >> (integerPartWidth - shift); 2367 2368 /* Convert as much of "part" to hexdigits as we can. */ 2369 unsigned int curDigits = integerPartWidth / 4; 2370 2371 if (curDigits > outputDigits) 2372 curDigits = outputDigits; 2373 dst += partAsHex (dst, part, curDigits, hexDigitChars); 2374 outputDigits -= curDigits; 2375 } 2376 2377 if (roundUp) { 2378 char *q = dst; 2379 2380 /* Note that hexDigitChars has a trailing '0'. */ 2381 do { 2382 q--; 2383 *q = hexDigitChars[hexDigitValue (*q) + 1]; 2384 } while (*q == '0'); 2385 assert (q >= p); 2386 } else { 2387 /* Add trailing zeroes. */ 2388 memset (dst, '0', outputDigits); 2389 dst += outputDigits; 2390 } 2391 2392 /* Move the most significant digit to before the point, and if there 2393 is something after the decimal point add it. This must come 2394 after rounding above. */ 2395 p[-1] = p[0]; 2396 if (dst -1 == p) 2397 dst--; 2398 else 2399 p[0] = '.'; 2400 2401 /* Finally output the exponent. */ 2402 *dst++ = upperCase ? 'P': 'p'; 2403 2404 return writeSignedDecimal (dst, exponent); 2405} 2406 2407// For good performance it is desirable for different APFloats 2408// to produce different integers. 2409uint32_t 2410APFloat::getHashValue() const 2411{ 2412 if (category==fcZero) return sign<<8 | semantics->precision ; 2413 else if (category==fcInfinity) return sign<<9 | semantics->precision; 2414 else if (category==fcNaN) return 1<<10 | semantics->precision; 2415 else { 2416 uint32_t hash = sign<<11 | semantics->precision | exponent<<12; 2417 const integerPart* p = significandParts(); 2418 for (int i=partCount(); i>0; i--, p++) 2419 hash ^= ((uint32_t)*p) ^ (*p)>>32; 2420 return hash; 2421 } 2422} 2423 2424// Conversion from APFloat to/from host float/double. It may eventually be 2425// possible to eliminate these and have everybody deal with APFloats, but that 2426// will take a while. This approach will not easily extend to long double. 2427// Current implementation requires integerPartWidth==64, which is correct at 2428// the moment but could be made more general. 2429 2430// Denormals have exponent minExponent in APFloat, but minExponent-1 in 2431// the actual IEEE respresentations. We compensate for that here. 2432 2433APInt 2434APFloat::convertF80LongDoubleAPFloatToAPInt() const 2435{ 2436 assert(semantics == (const llvm::fltSemantics* const)&x87DoubleExtended); 2437 assert (partCount()==2); 2438 2439 uint64_t myexponent, mysignificand; 2440 2441 if (category==fcNormal) { 2442 myexponent = exponent+16383; //bias 2443 mysignificand = significandParts()[0]; 2444 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) 2445 myexponent = 0; // denormal 2446 } else if (category==fcZero) { 2447 myexponent = 0; 2448 mysignificand = 0; 2449 } else if (category==fcInfinity) { 2450 myexponent = 0x7fff; 2451 mysignificand = 0x8000000000000000ULL; 2452 } else { 2453 assert(category == fcNaN && "Unknown category"); 2454 myexponent = 0x7fff; 2455 mysignificand = significandParts()[0]; 2456 } 2457 2458 uint64_t words[2]; 2459 words[0] = (((uint64_t)sign & 1) << 63) | 2460 ((myexponent & 0x7fff) << 48) | 2461 ((mysignificand >>16) & 0xffffffffffffLL); 2462 words[1] = mysignificand & 0xffff; 2463 return APInt(80, 2, words); 2464} 2465 2466APInt 2467APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const 2468{ 2469 assert(semantics == (const llvm::fltSemantics* const)&PPCDoubleDouble); 2470 assert (partCount()==2); 2471 2472 uint64_t myexponent, mysignificand, myexponent2, mysignificand2; 2473 2474 if (category==fcNormal) { 2475 myexponent = exponent + 1023; //bias 2476 myexponent2 = exponent2 + 1023; 2477 mysignificand = significandParts()[0]; 2478 mysignificand2 = significandParts()[1]; 2479 if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) 2480 myexponent = 0; // denormal 2481 if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL)) 2482 myexponent2 = 0; // denormal 2483 } else if (category==fcZero) { 2484 myexponent = 0; 2485 mysignificand = 0; 2486 myexponent2 = 0; 2487 mysignificand2 = 0; 2488 } else if (category==fcInfinity) { 2489 myexponent = 0x7ff; 2490 myexponent2 = 0; 2491 mysignificand = 0; 2492 mysignificand2 = 0; 2493 } else { 2494 assert(category == fcNaN && "Unknown category"); 2495 myexponent = 0x7ff; 2496 mysignificand = significandParts()[0]; 2497 myexponent2 = exponent2; 2498 mysignificand2 = significandParts()[1]; 2499 } 2500 2501 uint64_t words[2]; 2502 words[0] = (((uint64_t)sign & 1) << 63) | 2503 ((myexponent & 0x7ff) << 52) | 2504 (mysignificand & 0xfffffffffffffLL); 2505 words[1] = (((uint64_t)sign2 & 1) << 63) | 2506 ((myexponent2 & 0x7ff) << 52) | 2507 (mysignificand2 & 0xfffffffffffffLL); 2508 return APInt(128, 2, words); 2509} 2510 2511APInt 2512APFloat::convertDoubleAPFloatToAPInt() const 2513{ 2514 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble); 2515 assert (partCount()==1); 2516 2517 uint64_t myexponent, mysignificand; 2518 2519 if (category==fcNormal) { 2520 myexponent = exponent+1023; //bias 2521 mysignificand = *significandParts(); 2522 if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) 2523 myexponent = 0; // denormal 2524 } else if (category==fcZero) { 2525 myexponent = 0; 2526 mysignificand = 0; 2527 } else if (category==fcInfinity) { 2528 myexponent = 0x7ff; 2529 mysignificand = 0; 2530 } else { 2531 assert(category == fcNaN && "Unknown category!"); 2532 myexponent = 0x7ff; 2533 mysignificand = *significandParts(); 2534 } 2535 2536 return APInt(64, (((((uint64_t)sign & 1) << 63) | 2537 ((myexponent & 0x7ff) << 52) | 2538 (mysignificand & 0xfffffffffffffLL)))); 2539} 2540 2541APInt 2542APFloat::convertFloatAPFloatToAPInt() const 2543{ 2544 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle); 2545 assert (partCount()==1); 2546 2547 uint32_t myexponent, mysignificand; 2548 2549 if (category==fcNormal) { 2550 myexponent = exponent+127; //bias 2551 mysignificand = *significandParts(); 2552 if (myexponent == 1 && !(mysignificand & 0x400000)) 2553 myexponent = 0; // denormal 2554 } else if (category==fcZero) { 2555 myexponent = 0; 2556 mysignificand = 0; 2557 } else if (category==fcInfinity) { 2558 myexponent = 0xff; 2559 mysignificand = 0; 2560 } else { 2561 assert(category == fcNaN && "Unknown category!"); 2562 myexponent = 0xff; 2563 mysignificand = *significandParts(); 2564 } 2565 2566 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) | 2567 (mysignificand & 0x7fffff))); 2568} 2569 2570// This function creates an APInt that is just a bit map of the floating 2571// point constant as it would appear in memory. It is not a conversion, 2572// and treating the result as a normal integer is unlikely to be useful. 2573 2574APInt 2575APFloat::convertToAPInt() const 2576{ 2577 if (semantics == (const llvm::fltSemantics* const)&IEEEsingle) 2578 return convertFloatAPFloatToAPInt(); 2579 2580 if (semantics == (const llvm::fltSemantics* const)&IEEEdouble) 2581 return convertDoubleAPFloatToAPInt(); 2582 2583 if (semantics == (const llvm::fltSemantics* const)&PPCDoubleDouble) 2584 return convertPPCDoubleDoubleAPFloatToAPInt(); 2585 2586 assert(semantics == (const llvm::fltSemantics* const)&x87DoubleExtended && 2587 "unknown format!"); 2588 return convertF80LongDoubleAPFloatToAPInt(); 2589} 2590 2591float 2592APFloat::convertToFloat() const 2593{ 2594 assert(semantics == (const llvm::fltSemantics* const)&IEEEsingle); 2595 APInt api = convertToAPInt(); 2596 return api.bitsToFloat(); 2597} 2598 2599double 2600APFloat::convertToDouble() const 2601{ 2602 assert(semantics == (const llvm::fltSemantics* const)&IEEEdouble); 2603 APInt api = convertToAPInt(); 2604 return api.bitsToDouble(); 2605} 2606 2607/// Integer bit is explicit in this format. Current Intel book does not 2608/// define meaning of: 2609/// exponent = all 1's, integer bit not set. 2610/// exponent = 0, integer bit set. (formerly "psuedodenormals") 2611/// exponent!=0 nor all 1's, integer bit not set. (formerly "unnormals") 2612void 2613APFloat::initFromF80LongDoubleAPInt(const APInt &api) 2614{ 2615 assert(api.getBitWidth()==80); 2616 uint64_t i1 = api.getRawData()[0]; 2617 uint64_t i2 = api.getRawData()[1]; 2618 uint64_t myexponent = (i1 >> 48) & 0x7fff; 2619 uint64_t mysignificand = ((i1 << 16) & 0xffffffffffff0000ULL) | 2620 (i2 & 0xffff); 2621 2622 initialize(&APFloat::x87DoubleExtended); 2623 assert(partCount()==2); 2624 2625 sign = i1>>63; 2626 if (myexponent==0 && mysignificand==0) { 2627 // exponent, significand meaningless 2628 category = fcZero; 2629 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { 2630 // exponent, significand meaningless 2631 category = fcInfinity; 2632 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) { 2633 // exponent meaningless 2634 category = fcNaN; 2635 significandParts()[0] = mysignificand; 2636 significandParts()[1] = 0; 2637 } else { 2638 category = fcNormal; 2639 exponent = myexponent - 16383; 2640 significandParts()[0] = mysignificand; 2641 significandParts()[1] = 0; 2642 if (myexponent==0) // denormal 2643 exponent = -16382; 2644 } 2645} 2646 2647void 2648APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) 2649{ 2650 assert(api.getBitWidth()==128); 2651 uint64_t i1 = api.getRawData()[0]; 2652 uint64_t i2 = api.getRawData()[1]; 2653 uint64_t myexponent = (i1 >> 52) & 0x7ff; 2654 uint64_t mysignificand = i1 & 0xfffffffffffffLL; 2655 uint64_t myexponent2 = (i2 >> 52) & 0x7ff; 2656 uint64_t mysignificand2 = i2 & 0xfffffffffffffLL; 2657 2658 initialize(&APFloat::PPCDoubleDouble); 2659 assert(partCount()==2); 2660 2661 sign = i1>>63; 2662 sign2 = i2>>63; 2663 if (myexponent==0 && mysignificand==0) { 2664 // exponent, significand meaningless 2665 // exponent2 and significand2 are required to be 0; we don't check 2666 category = fcZero; 2667 } else if (myexponent==0x7ff && mysignificand==0) { 2668 // exponent, significand meaningless 2669 // exponent2 and significand2 are required to be 0; we don't check 2670 category = fcInfinity; 2671 } else if (myexponent==0x7ff && mysignificand!=0) { 2672 // exponent meaningless. So is the whole second word, but keep it 2673 // for determinism. 2674 category = fcNaN; 2675 exponent2 = myexponent2; 2676 significandParts()[0] = mysignificand; 2677 significandParts()[1] = mysignificand2; 2678 } else { 2679 category = fcNormal; 2680 // Note there is no category2; the second word is treated as if it is 2681 // fcNormal, although it might be something else considered by itself. 2682 exponent = myexponent - 1023; 2683 exponent2 = myexponent2 - 1023; 2684 significandParts()[0] = mysignificand; 2685 significandParts()[1] = mysignificand2; 2686 if (myexponent==0) // denormal 2687 exponent = -1022; 2688 else 2689 significandParts()[0] |= 0x10000000000000LL; // integer bit 2690 if (myexponent2==0) 2691 exponent2 = -1022; 2692 else 2693 significandParts()[1] |= 0x10000000000000LL; // integer bit 2694 } 2695} 2696 2697void 2698APFloat::initFromDoubleAPInt(const APInt &api) 2699{ 2700 assert(api.getBitWidth()==64); 2701 uint64_t i = *api.getRawData(); 2702 uint64_t myexponent = (i >> 52) & 0x7ff; 2703 uint64_t mysignificand = i & 0xfffffffffffffLL; 2704 2705 initialize(&APFloat::IEEEdouble); 2706 assert(partCount()==1); 2707 2708 sign = i>>63; 2709 if (myexponent==0 && mysignificand==0) { 2710 // exponent, significand meaningless 2711 category = fcZero; 2712 } else if (myexponent==0x7ff && mysignificand==0) { 2713 // exponent, significand meaningless 2714 category = fcInfinity; 2715 } else if (myexponent==0x7ff && mysignificand!=0) { 2716 // exponent meaningless 2717 category = fcNaN; 2718 *significandParts() = mysignificand; 2719 } else { 2720 category = fcNormal; 2721 exponent = myexponent - 1023; 2722 *significandParts() = mysignificand; 2723 if (myexponent==0) // denormal 2724 exponent = -1022; 2725 else 2726 *significandParts() |= 0x10000000000000LL; // integer bit 2727 } 2728} 2729 2730void 2731APFloat::initFromFloatAPInt(const APInt & api) 2732{ 2733 assert(api.getBitWidth()==32); 2734 uint32_t i = (uint32_t)*api.getRawData(); 2735 uint32_t myexponent = (i >> 23) & 0xff; 2736 uint32_t mysignificand = i & 0x7fffff; 2737 2738 initialize(&APFloat::IEEEsingle); 2739 assert(partCount()==1); 2740 2741 sign = i >> 31; 2742 if (myexponent==0 && mysignificand==0) { 2743 // exponent, significand meaningless 2744 category = fcZero; 2745 } else if (myexponent==0xff && mysignificand==0) { 2746 // exponent, significand meaningless 2747 category = fcInfinity; 2748 } else if (myexponent==0xff && mysignificand!=0) { 2749 // sign, exponent, significand meaningless 2750 category = fcNaN; 2751 *significandParts() = mysignificand; 2752 } else { 2753 category = fcNormal; 2754 exponent = myexponent - 127; //bias 2755 *significandParts() = mysignificand; 2756 if (myexponent==0) // denormal 2757 exponent = -126; 2758 else 2759 *significandParts() |= 0x800000; // integer bit 2760 } 2761} 2762 2763/// Treat api as containing the bits of a floating point number. Currently 2764/// we infer the floating point type from the size of the APInt. The 2765/// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful 2766/// when the size is anything else). 2767void 2768APFloat::initFromAPInt(const APInt& api, bool isIEEE) 2769{ 2770 if (api.getBitWidth() == 32) 2771 return initFromFloatAPInt(api); 2772 else if (api.getBitWidth()==64) 2773 return initFromDoubleAPInt(api); 2774 else if (api.getBitWidth()==80) 2775 return initFromF80LongDoubleAPInt(api); 2776 else if (api.getBitWidth()==128 && !isIEEE) 2777 return initFromPPCDoubleDoubleAPInt(api); 2778 else 2779 assert(0); 2780} 2781 2782APFloat::APFloat(const APInt& api, bool isIEEE) 2783{ 2784 initFromAPInt(api, isIEEE); 2785} 2786 2787APFloat::APFloat(float f) 2788{ 2789 APInt api = APInt(32, 0); 2790 initFromAPInt(api.floatToBits(f)); 2791} 2792 2793APFloat::APFloat(double d) 2794{ 2795 APInt api = APInt(64, 0); 2796 initFromAPInt(api.doubleToBits(d)); 2797} 2798