APFloat.cpp revision e3d936ac9ce56acfeb058f573e6c6ce0682ed781
1//===-- APFloat.cpp - Implement APFloat class -----------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Neil Booth and is distributed under the 6// University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements a class to represent arbitrary precision floating 11// point values and provide a variety of arithmetic operations on them. 12// 13//===----------------------------------------------------------------------===// 14 15#include <cassert> 16#include <cstring> 17#include "llvm/ADT/APFloat.h" 18#include "llvm/Support/MathExtras.h" 19 20using namespace llvm; 21 22#define convolve(lhs, rhs) ((lhs) * 4 + (rhs)) 23 24/* Assumed in hexadecimal significand parsing, and conversion to 25 hexadecimal strings. */ 26COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0); 27 28namespace llvm { 29 30 /* Represents floating point arithmetic semantics. */ 31 struct fltSemantics { 32 /* The largest E such that 2^E is representable; this matches the 33 definition of IEEE 754. */ 34 exponent_t maxExponent; 35 36 /* The smallest E such that 2^E is a normalized number; this 37 matches the definition of IEEE 754. */ 38 exponent_t minExponent; 39 40 /* Number of bits in the significand. This includes the integer 41 bit. */ 42 unsigned int precision; 43 44 /* True if arithmetic is supported. */ 45 unsigned int arithmeticOK; 46 }; 47 48 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true }; 49 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true }; 50 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true }; 51 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true }; 52 const fltSemantics APFloat::Bogus = { 0, 0, 0, true }; 53 54 // The PowerPC format consists of two doubles. It does not map cleanly 55 // onto the usual format above. For now only storage of constants of 56 // this type is supported, no arithmetic. 57 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106, false }; 58 59 /* A tight upper bound on number of parts required to hold the value 60 pow(5, power) is 61 62 power * 815 / (351 * integerPartWidth) + 1 63 64 However, whilst the result may require only this many parts, 65 because we are multiplying two values to get it, the 66 multiplication may require an extra part with the excess part 67 being zero (consider the trivial case of 1 * 1, tcFullMultiply 68 requires two parts to hold the single-part result). So we add an 69 extra one to guarantee enough space whilst multiplying. */ 70 const unsigned int maxExponent = 16383; 71 const unsigned int maxPrecision = 113; 72 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; 73 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) 74 / (351 * integerPartWidth)); 75} 76 77/* Put a bunch of private, handy routines in an anonymous namespace. */ 78namespace { 79 80 inline unsigned int 81 partCountForBits(unsigned int bits) 82 { 83 return ((bits) + integerPartWidth - 1) / integerPartWidth; 84 } 85 86 /* Returns 0U-9U. Return values >= 10U are not digits. */ 87 inline unsigned int 88 decDigitValue(unsigned int c) 89 { 90 return c - '0'; 91 } 92 93 unsigned int 94 hexDigitValue(unsigned int c) 95 { 96 unsigned int r; 97 98 r = c - '0'; 99 if(r <= 9) 100 return r; 101 102 r = c - 'A'; 103 if(r <= 5) 104 return r + 10; 105 106 r = c - 'a'; 107 if(r <= 5) 108 return r + 10; 109 110 return -1U; 111 } 112 113 inline void 114 assertArithmeticOK(const llvm::fltSemantics &semantics) { 115 assert(semantics.arithmeticOK 116 && "Compile-time arithmetic does not support these semantics"); 117 } 118 119 /* Return the value of a decimal exponent of the form 120 [+-]ddddddd. 121 122 If the exponent overflows, returns a large exponent with the 123 appropriate sign. */ 124 int 125 readExponent(const char *p) 126 { 127 bool isNegative; 128 unsigned int absExponent; 129 const unsigned int overlargeExponent = 24000; /* FIXME. */ 130 131 isNegative = (*p == '-'); 132 if (*p == '-' || *p == '+') 133 p++; 134 135 absExponent = decDigitValue(*p++); 136 assert (absExponent < 10U); 137 138 for (;;) { 139 unsigned int value; 140 141 value = decDigitValue(*p); 142 if (value >= 10U) 143 break; 144 145 p++; 146 value += absExponent * 10; 147 if (absExponent >= overlargeExponent) { 148 absExponent = overlargeExponent; 149 break; 150 } 151 absExponent = value; 152 } 153 154 if (isNegative) 155 return -(int) absExponent; 156 else 157 return (int) absExponent; 158 } 159 160 /* This is ugly and needs cleaning up, but I don't immediately see 161 how whilst remaining safe. */ 162 int 163 totalExponent(const char *p, int exponentAdjustment) 164 { 165 integerPart unsignedExponent; 166 bool negative, overflow; 167 long exponent; 168 169 /* Move past the exponent letter and sign to the digits. */ 170 p++; 171 negative = *p == '-'; 172 if(*p == '-' || *p == '+') 173 p++; 174 175 unsignedExponent = 0; 176 overflow = false; 177 for(;;) { 178 unsigned int value; 179 180 value = decDigitValue(*p); 181 if(value >= 10U) 182 break; 183 184 p++; 185 unsignedExponent = unsignedExponent * 10 + value; 186 if(unsignedExponent > 65535) 187 overflow = true; 188 } 189 190 if(exponentAdjustment > 65535 || exponentAdjustment < -65536) 191 overflow = true; 192 193 if(!overflow) { 194 exponent = unsignedExponent; 195 if(negative) 196 exponent = -exponent; 197 exponent += exponentAdjustment; 198 if(exponent > 65535 || exponent < -65536) 199 overflow = true; 200 } 201 202 if(overflow) 203 exponent = negative ? -65536: 65535; 204 205 return exponent; 206 } 207 208 const char * 209 skipLeadingZeroesAndAnyDot(const char *p, const char **dot) 210 { 211 *dot = 0; 212 while(*p == '0') 213 p++; 214 215 if(*p == '.') { 216 *dot = p++; 217 while(*p == '0') 218 p++; 219 } 220 221 return p; 222 } 223 224 /* Given a normal decimal floating point number of the form 225 226 dddd.dddd[eE][+-]ddd 227 228 where the decimal point and exponent are optional, fill out the 229 structure D. Exponent is appropriate if the significand is 230 treated as an integer, and normalizedExponent if the significand 231 is taken to have the decimal point after a single leading 232 non-zero digit. 233 234 If the value is zero, V->firstSigDigit points to a zero, and the 235 return exponent is zero. 236 */ 237 struct decimalInfo { 238 const char *firstSigDigit; 239 const char *lastSigDigit; 240 int exponent; 241 int normalizedExponent; 242 }; 243 244 void 245 interpretDecimal(const char *p, decimalInfo *D) 246 { 247 const char *dot; 248 249 p = skipLeadingZeroesAndAnyDot (p, &dot); 250 251 D->firstSigDigit = p; 252 D->exponent = 0; 253 D->normalizedExponent = 0; 254 255 for (;;) { 256 if (*p == '.') { 257 assert(dot == 0); 258 dot = p++; 259 } 260 if (decDigitValue(*p) >= 10U) 261 break; 262 p++; 263 } 264 265 /* If number is all zerooes accept any exponent. */ 266 if (p != D->firstSigDigit) { 267 if (*p == 'e' || *p == 'E') 268 D->exponent = readExponent(p + 1); 269 270 /* Implied decimal point? */ 271 if (!dot) 272 dot = p; 273 274 /* Drop insignificant trailing zeroes. */ 275 do 276 do 277 p--; 278 while (*p == '0'); 279 while (*p == '.'); 280 281 /* Adjust the exponents for any decimal point. */ 282 D->exponent += (dot - p) - (dot > p); 283 D->normalizedExponent = (D->exponent + (p - D->firstSigDigit) 284 - (dot > D->firstSigDigit && dot < p)); 285 } 286 287 D->lastSigDigit = p; 288 } 289 290 /* Return the trailing fraction of a hexadecimal number. 291 DIGITVALUE is the first hex digit of the fraction, P points to 292 the next digit. */ 293 lostFraction 294 trailingHexadecimalFraction(const char *p, unsigned int digitValue) 295 { 296 unsigned int hexDigit; 297 298 /* If the first trailing digit isn't 0 or 8 we can work out the 299 fraction immediately. */ 300 if(digitValue > 8) 301 return lfMoreThanHalf; 302 else if(digitValue < 8 && digitValue > 0) 303 return lfLessThanHalf; 304 305 /* Otherwise we need to find the first non-zero digit. */ 306 while(*p == '0') 307 p++; 308 309 hexDigit = hexDigitValue(*p); 310 311 /* If we ran off the end it is exactly zero or one-half, otherwise 312 a little more. */ 313 if(hexDigit == -1U) 314 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; 315 else 316 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; 317 } 318 319 /* Return the fraction lost were a bignum truncated losing the least 320 significant BITS bits. */ 321 lostFraction 322 lostFractionThroughTruncation(const integerPart *parts, 323 unsigned int partCount, 324 unsigned int bits) 325 { 326 unsigned int lsb; 327 328 lsb = APInt::tcLSB(parts, partCount); 329 330 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */ 331 if(bits <= lsb) 332 return lfExactlyZero; 333 if(bits == lsb + 1) 334 return lfExactlyHalf; 335 if(bits <= partCount * integerPartWidth 336 && APInt::tcExtractBit(parts, bits - 1)) 337 return lfMoreThanHalf; 338 339 return lfLessThanHalf; 340 } 341 342 /* Shift DST right BITS bits noting lost fraction. */ 343 lostFraction 344 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits) 345 { 346 lostFraction lost_fraction; 347 348 lost_fraction = lostFractionThroughTruncation(dst, parts, bits); 349 350 APInt::tcShiftRight(dst, parts, bits); 351 352 return lost_fraction; 353 } 354 355 /* Combine the effect of two lost fractions. */ 356 lostFraction 357 combineLostFractions(lostFraction moreSignificant, 358 lostFraction lessSignificant) 359 { 360 if(lessSignificant != lfExactlyZero) { 361 if(moreSignificant == lfExactlyZero) 362 moreSignificant = lfLessThanHalf; 363 else if(moreSignificant == lfExactlyHalf) 364 moreSignificant = lfMoreThanHalf; 365 } 366 367 return moreSignificant; 368 } 369 370 /* The error from the true value, in half-ulps, on multiplying two 371 floating point numbers, which differ from the value they 372 approximate by at most HUE1 and HUE2 half-ulps, is strictly less 373 than the returned value. 374 375 See "How to Read Floating Point Numbers Accurately" by William D 376 Clinger. */ 377 unsigned int 378 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) 379 { 380 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8)); 381 382 if (HUerr1 + HUerr2 == 0) 383 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ 384 else 385 return inexactMultiply + 2 * (HUerr1 + HUerr2); 386 } 387 388 /* The number of ulps from the boundary (zero, or half if ISNEAREST) 389 when the least significant BITS are truncated. BITS cannot be 390 zero. */ 391 integerPart 392 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest) 393 { 394 unsigned int count, partBits; 395 integerPart part, boundary; 396 397 assert (bits != 0); 398 399 bits--; 400 count = bits / integerPartWidth; 401 partBits = bits % integerPartWidth + 1; 402 403 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits)); 404 405 if (isNearest) 406 boundary = (integerPart) 1 << (partBits - 1); 407 else 408 boundary = 0; 409 410 if (count == 0) { 411 if (part - boundary <= boundary - part) 412 return part - boundary; 413 else 414 return boundary - part; 415 } 416 417 if (part == boundary) { 418 while (--count) 419 if (parts[count]) 420 return ~(integerPart) 0; /* A lot. */ 421 422 return parts[0]; 423 } else if (part == boundary - 1) { 424 while (--count) 425 if (~parts[count]) 426 return ~(integerPart) 0; /* A lot. */ 427 428 return -parts[0]; 429 } 430 431 return ~(integerPart) 0; /* A lot. */ 432 } 433 434 /* Place pow(5, power) in DST, and return the number of parts used. 435 DST must be at least one part larger than size of the answer. */ 436 unsigned int 437 powerOf5(integerPart *dst, unsigned int power) 438 { 439 static integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 440 15625, 78125 }; 441 static integerPart pow5s[maxPowerOfFiveParts * 2 + 5] = { 78125 * 5 }; 442 static unsigned int partsCount[16] = { 1 }; 443 444 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; 445 unsigned int result; 446 447 assert(power <= maxExponent); 448 449 p1 = dst; 450 p2 = scratch; 451 452 *p1 = firstEightPowers[power & 7]; 453 power >>= 3; 454 455 result = 1; 456 pow5 = pow5s; 457 458 for (unsigned int n = 0; power; power >>= 1, n++) { 459 unsigned int pc; 460 461 pc = partsCount[n]; 462 463 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */ 464 if (pc == 0) { 465 pc = partsCount[n - 1]; 466 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc); 467 pc *= 2; 468 if (pow5[pc - 1] == 0) 469 pc--; 470 partsCount[n] = pc; 471 } 472 473 if (power & 1) { 474 integerPart *tmp; 475 476 APInt::tcFullMultiply(p2, p1, pow5, result, pc); 477 result += pc; 478 if (p2[result - 1] == 0) 479 result--; 480 481 /* Now result is in p1 with partsCount parts and p2 is scratch 482 space. */ 483 tmp = p1, p1 = p2, p2 = tmp; 484 } 485 486 pow5 += pc; 487 } 488 489 if (p1 != dst) 490 APInt::tcAssign(dst, p1, result); 491 492 return result; 493 } 494 495 /* Zero at the end to avoid modular arithmetic when adding one; used 496 when rounding up during hexadecimal output. */ 497 static const char hexDigitsLower[] = "0123456789abcdef0"; 498 static const char hexDigitsUpper[] = "0123456789ABCDEF0"; 499 static const char infinityL[] = "infinity"; 500 static const char infinityU[] = "INFINITY"; 501 static const char NaNL[] = "nan"; 502 static const char NaNU[] = "NAN"; 503 504 /* Write out an integerPart in hexadecimal, starting with the most 505 significant nibble. Write out exactly COUNT hexdigits, return 506 COUNT. */ 507 unsigned int 508 partAsHex (char *dst, integerPart part, unsigned int count, 509 const char *hexDigitChars) 510 { 511 unsigned int result = count; 512 513 assert (count != 0 && count <= integerPartWidth / 4); 514 515 part >>= (integerPartWidth - 4 * count); 516 while (count--) { 517 dst[count] = hexDigitChars[part & 0xf]; 518 part >>= 4; 519 } 520 521 return result; 522 } 523 524 /* Write out an unsigned decimal integer. */ 525 char * 526 writeUnsignedDecimal (char *dst, unsigned int n) 527 { 528 char buff[40], *p; 529 530 p = buff; 531 do 532 *p++ = '0' + n % 10; 533 while (n /= 10); 534 535 do 536 *dst++ = *--p; 537 while (p != buff); 538 539 return dst; 540 } 541 542 /* Write out a signed decimal integer. */ 543 char * 544 writeSignedDecimal (char *dst, int value) 545 { 546 if (value < 0) { 547 *dst++ = '-'; 548 dst = writeUnsignedDecimal(dst, -(unsigned) value); 549 } else 550 dst = writeUnsignedDecimal(dst, value); 551 552 return dst; 553 } 554} 555 556/* Constructors. */ 557void 558APFloat::initialize(const fltSemantics *ourSemantics) 559{ 560 unsigned int count; 561 562 semantics = ourSemantics; 563 count = partCount(); 564 if(count > 1) 565 significand.parts = new integerPart[count]; 566} 567 568void 569APFloat::freeSignificand() 570{ 571 if(partCount() > 1) 572 delete [] significand.parts; 573} 574 575void 576APFloat::assign(const APFloat &rhs) 577{ 578 assert(semantics == rhs.semantics); 579 580 sign = rhs.sign; 581 category = rhs.category; 582 exponent = rhs.exponent; 583 sign2 = rhs.sign2; 584 exponent2 = rhs.exponent2; 585 if(category == fcNormal || category == fcNaN) 586 copySignificand(rhs); 587} 588 589void 590APFloat::copySignificand(const APFloat &rhs) 591{ 592 assert(category == fcNormal || category == fcNaN); 593 assert(rhs.partCount() >= partCount()); 594 595 APInt::tcAssign(significandParts(), rhs.significandParts(), 596 partCount()); 597} 598 599/* Make this number a NaN, with an arbitrary but deterministic value 600 for the significand. */ 601void 602APFloat::makeNaN(void) 603{ 604 category = fcNaN; 605 APInt::tcSet(significandParts(), ~0U, partCount()); 606} 607 608APFloat & 609APFloat::operator=(const APFloat &rhs) 610{ 611 if(this != &rhs) { 612 if(semantics != rhs.semantics) { 613 freeSignificand(); 614 initialize(rhs.semantics); 615 } 616 assign(rhs); 617 } 618 619 return *this; 620} 621 622bool 623APFloat::bitwiseIsEqual(const APFloat &rhs) const { 624 if (this == &rhs) 625 return true; 626 if (semantics != rhs.semantics || 627 category != rhs.category || 628 sign != rhs.sign) 629 return false; 630 if (semantics==(const llvm::fltSemantics* const)&PPCDoubleDouble && 631 sign2 != rhs.sign2) 632 return false; 633 if (category==fcZero || category==fcInfinity) 634 return true; 635 else if (category==fcNormal && exponent!=rhs.exponent) 636 return false; 637 else if (semantics==(const llvm::fltSemantics* const)&PPCDoubleDouble && 638 exponent2!=rhs.exponent2) 639 return false; 640 else { 641 int i= partCount(); 642 const integerPart* p=significandParts(); 643 const integerPart* q=rhs.significandParts(); 644 for (; i>0; i--, p++, q++) { 645 if (*p != *q) 646 return false; 647 } 648 return true; 649 } 650} 651 652APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) 653{ 654 assertArithmeticOK(ourSemantics); 655 initialize(&ourSemantics); 656 sign = 0; 657 zeroSignificand(); 658 exponent = ourSemantics.precision - 1; 659 significandParts()[0] = value; 660 normalize(rmNearestTiesToEven, lfExactlyZero); 661} 662 663APFloat::APFloat(const fltSemantics &ourSemantics, 664 fltCategory ourCategory, bool negative) 665{ 666 assertArithmeticOK(ourSemantics); 667 initialize(&ourSemantics); 668 category = ourCategory; 669 sign = negative; 670 if(category == fcNormal) 671 category = fcZero; 672 else if (ourCategory == fcNaN) 673 makeNaN(); 674} 675 676APFloat::APFloat(const fltSemantics &ourSemantics, const char *text) 677{ 678 assertArithmeticOK(ourSemantics); 679 initialize(&ourSemantics); 680 convertFromString(text, rmNearestTiesToEven); 681} 682 683APFloat::APFloat(const APFloat &rhs) 684{ 685 initialize(rhs.semantics); 686 assign(rhs); 687} 688 689APFloat::~APFloat() 690{ 691 freeSignificand(); 692} 693 694unsigned int 695APFloat::partCount() const 696{ 697 return partCountForBits(semantics->precision + 1); 698} 699 700unsigned int 701APFloat::semanticsPrecision(const fltSemantics &semantics) 702{ 703 return semantics.precision; 704} 705 706const integerPart * 707APFloat::significandParts() const 708{ 709 return const_cast<APFloat *>(this)->significandParts(); 710} 711 712integerPart * 713APFloat::significandParts() 714{ 715 assert(category == fcNormal || category == fcNaN); 716 717 if(partCount() > 1) 718 return significand.parts; 719 else 720 return &significand.part; 721} 722 723void 724APFloat::zeroSignificand() 725{ 726 category = fcNormal; 727 APInt::tcSet(significandParts(), 0, partCount()); 728} 729 730/* Increment an fcNormal floating point number's significand. */ 731void 732APFloat::incrementSignificand() 733{ 734 integerPart carry; 735 736 carry = APInt::tcIncrement(significandParts(), partCount()); 737 738 /* Our callers should never cause us to overflow. */ 739 assert(carry == 0); 740} 741 742/* Add the significand of the RHS. Returns the carry flag. */ 743integerPart 744APFloat::addSignificand(const APFloat &rhs) 745{ 746 integerPart *parts; 747 748 parts = significandParts(); 749 750 assert(semantics == rhs.semantics); 751 assert(exponent == rhs.exponent); 752 753 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount()); 754} 755 756/* Subtract the significand of the RHS with a borrow flag. Returns 757 the borrow flag. */ 758integerPart 759APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow) 760{ 761 integerPart *parts; 762 763 parts = significandParts(); 764 765 assert(semantics == rhs.semantics); 766 assert(exponent == rhs.exponent); 767 768 return APInt::tcSubtract(parts, rhs.significandParts(), borrow, 769 partCount()); 770} 771 772/* Multiply the significand of the RHS. If ADDEND is non-NULL, add it 773 on to the full-precision result of the multiplication. Returns the 774 lost fraction. */ 775lostFraction 776APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) 777{ 778 unsigned int omsb; // One, not zero, based MSB. 779 unsigned int partsCount, newPartsCount, precision; 780 integerPart *lhsSignificand; 781 integerPart scratch[4]; 782 integerPart *fullSignificand; 783 lostFraction lost_fraction; 784 785 assert(semantics == rhs.semantics); 786 787 precision = semantics->precision; 788 newPartsCount = partCountForBits(precision * 2); 789 790 if(newPartsCount > 4) 791 fullSignificand = new integerPart[newPartsCount]; 792 else 793 fullSignificand = scratch; 794 795 lhsSignificand = significandParts(); 796 partsCount = partCount(); 797 798 APInt::tcFullMultiply(fullSignificand, lhsSignificand, 799 rhs.significandParts(), partsCount, partsCount); 800 801 lost_fraction = lfExactlyZero; 802 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 803 exponent += rhs.exponent; 804 805 if(addend) { 806 Significand savedSignificand = significand; 807 const fltSemantics *savedSemantics = semantics; 808 fltSemantics extendedSemantics; 809 opStatus status; 810 unsigned int extendedPrecision; 811 812 /* Normalize our MSB. */ 813 extendedPrecision = precision + precision - 1; 814 if(omsb != extendedPrecision) 815 { 816 APInt::tcShiftLeft(fullSignificand, newPartsCount, 817 extendedPrecision - omsb); 818 exponent -= extendedPrecision - omsb; 819 } 820 821 /* Create new semantics. */ 822 extendedSemantics = *semantics; 823 extendedSemantics.precision = extendedPrecision; 824 825 if(newPartsCount == 1) 826 significand.part = fullSignificand[0]; 827 else 828 significand.parts = fullSignificand; 829 semantics = &extendedSemantics; 830 831 APFloat extendedAddend(*addend); 832 status = extendedAddend.convert(extendedSemantics, rmTowardZero); 833 assert(status == opOK); 834 lost_fraction = addOrSubtractSignificand(extendedAddend, false); 835 836 /* Restore our state. */ 837 if(newPartsCount == 1) 838 fullSignificand[0] = significand.part; 839 significand = savedSignificand; 840 semantics = savedSemantics; 841 842 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 843 } 844 845 exponent -= (precision - 1); 846 847 if(omsb > precision) { 848 unsigned int bits, significantParts; 849 lostFraction lf; 850 851 bits = omsb - precision; 852 significantParts = partCountForBits(omsb); 853 lf = shiftRight(fullSignificand, significantParts, bits); 854 lost_fraction = combineLostFractions(lf, lost_fraction); 855 exponent += bits; 856 } 857 858 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); 859 860 if(newPartsCount > 4) 861 delete [] fullSignificand; 862 863 return lost_fraction; 864} 865 866/* Multiply the significands of LHS and RHS to DST. */ 867lostFraction 868APFloat::divideSignificand(const APFloat &rhs) 869{ 870 unsigned int bit, i, partsCount; 871 const integerPart *rhsSignificand; 872 integerPart *lhsSignificand, *dividend, *divisor; 873 integerPart scratch[4]; 874 lostFraction lost_fraction; 875 876 assert(semantics == rhs.semantics); 877 878 lhsSignificand = significandParts(); 879 rhsSignificand = rhs.significandParts(); 880 partsCount = partCount(); 881 882 if(partsCount > 2) 883 dividend = new integerPart[partsCount * 2]; 884 else 885 dividend = scratch; 886 887 divisor = dividend + partsCount; 888 889 /* Copy the dividend and divisor as they will be modified in-place. */ 890 for(i = 0; i < partsCount; i++) { 891 dividend[i] = lhsSignificand[i]; 892 divisor[i] = rhsSignificand[i]; 893 lhsSignificand[i] = 0; 894 } 895 896 exponent -= rhs.exponent; 897 898 unsigned int precision = semantics->precision; 899 900 /* Normalize the divisor. */ 901 bit = precision - APInt::tcMSB(divisor, partsCount) - 1; 902 if(bit) { 903 exponent += bit; 904 APInt::tcShiftLeft(divisor, partsCount, bit); 905 } 906 907 /* Normalize the dividend. */ 908 bit = precision - APInt::tcMSB(dividend, partsCount) - 1; 909 if(bit) { 910 exponent -= bit; 911 APInt::tcShiftLeft(dividend, partsCount, bit); 912 } 913 914 /* Ensure the dividend >= divisor initially for the loop below. 915 Incidentally, this means that the division loop below is 916 guaranteed to set the integer bit to one. */ 917 if(APInt::tcCompare(dividend, divisor, partsCount) < 0) { 918 exponent--; 919 APInt::tcShiftLeft(dividend, partsCount, 1); 920 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); 921 } 922 923 /* Long division. */ 924 for(bit = precision; bit; bit -= 1) { 925 if(APInt::tcCompare(dividend, divisor, partsCount) >= 0) { 926 APInt::tcSubtract(dividend, divisor, 0, partsCount); 927 APInt::tcSetBit(lhsSignificand, bit - 1); 928 } 929 930 APInt::tcShiftLeft(dividend, partsCount, 1); 931 } 932 933 /* Figure out the lost fraction. */ 934 int cmp = APInt::tcCompare(dividend, divisor, partsCount); 935 936 if(cmp > 0) 937 lost_fraction = lfMoreThanHalf; 938 else if(cmp == 0) 939 lost_fraction = lfExactlyHalf; 940 else if(APInt::tcIsZero(dividend, partsCount)) 941 lost_fraction = lfExactlyZero; 942 else 943 lost_fraction = lfLessThanHalf; 944 945 if(partsCount > 2) 946 delete [] dividend; 947 948 return lost_fraction; 949} 950 951unsigned int 952APFloat::significandMSB() const 953{ 954 return APInt::tcMSB(significandParts(), partCount()); 955} 956 957unsigned int 958APFloat::significandLSB() const 959{ 960 return APInt::tcLSB(significandParts(), partCount()); 961} 962 963/* Note that a zero result is NOT normalized to fcZero. */ 964lostFraction 965APFloat::shiftSignificandRight(unsigned int bits) 966{ 967 /* Our exponent should not overflow. */ 968 assert((exponent_t) (exponent + bits) >= exponent); 969 970 exponent += bits; 971 972 return shiftRight(significandParts(), partCount(), bits); 973} 974 975/* Shift the significand left BITS bits, subtract BITS from its exponent. */ 976void 977APFloat::shiftSignificandLeft(unsigned int bits) 978{ 979 assert(bits < semantics->precision); 980 981 if(bits) { 982 unsigned int partsCount = partCount(); 983 984 APInt::tcShiftLeft(significandParts(), partsCount, bits); 985 exponent -= bits; 986 987 assert(!APInt::tcIsZero(significandParts(), partsCount)); 988 } 989} 990 991APFloat::cmpResult 992APFloat::compareAbsoluteValue(const APFloat &rhs) const 993{ 994 int compare; 995 996 assert(semantics == rhs.semantics); 997 assert(category == fcNormal); 998 assert(rhs.category == fcNormal); 999 1000 compare = exponent - rhs.exponent; 1001 1002 /* If exponents are equal, do an unsigned bignum comparison of the 1003 significands. */ 1004 if(compare == 0) 1005 compare = APInt::tcCompare(significandParts(), rhs.significandParts(), 1006 partCount()); 1007 1008 if(compare > 0) 1009 return cmpGreaterThan; 1010 else if(compare < 0) 1011 return cmpLessThan; 1012 else 1013 return cmpEqual; 1014} 1015 1016/* Handle overflow. Sign is preserved. We either become infinity or 1017 the largest finite number. */ 1018APFloat::opStatus 1019APFloat::handleOverflow(roundingMode rounding_mode) 1020{ 1021 /* Infinity? */ 1022 if(rounding_mode == rmNearestTiesToEven 1023 || rounding_mode == rmNearestTiesToAway 1024 || (rounding_mode == rmTowardPositive && !sign) 1025 || (rounding_mode == rmTowardNegative && sign)) 1026 { 1027 category = fcInfinity; 1028 return (opStatus) (opOverflow | opInexact); 1029 } 1030 1031 /* Otherwise we become the largest finite number. */ 1032 category = fcNormal; 1033 exponent = semantics->maxExponent; 1034 APInt::tcSetLeastSignificantBits(significandParts(), partCount(), 1035 semantics->precision); 1036 1037 return opInexact; 1038} 1039 1040/* Returns TRUE if, when truncating the current number, with BIT the 1041 new LSB, with the given lost fraction and rounding mode, the result 1042 would need to be rounded away from zero (i.e., by increasing the 1043 signficand). This routine must work for fcZero of both signs, and 1044 fcNormal numbers. */ 1045bool 1046APFloat::roundAwayFromZero(roundingMode rounding_mode, 1047 lostFraction lost_fraction, 1048 unsigned int bit) const 1049{ 1050 /* NaNs and infinities should not have lost fractions. */ 1051 assert(category == fcNormal || category == fcZero); 1052 1053 /* Current callers never pass this so we don't handle it. */ 1054 assert(lost_fraction != lfExactlyZero); 1055 1056 switch(rounding_mode) { 1057 default: 1058 assert(0); 1059 1060 case rmNearestTiesToAway: 1061 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; 1062 1063 case rmNearestTiesToEven: 1064 if(lost_fraction == lfMoreThanHalf) 1065 return true; 1066 1067 /* Our zeroes don't have a significand to test. */ 1068 if(lost_fraction == lfExactlyHalf && category != fcZero) 1069 return APInt::tcExtractBit(significandParts(), bit); 1070 1071 return false; 1072 1073 case rmTowardZero: 1074 return false; 1075 1076 case rmTowardPositive: 1077 return sign == false; 1078 1079 case rmTowardNegative: 1080 return sign == true; 1081 } 1082} 1083 1084APFloat::opStatus 1085APFloat::normalize(roundingMode rounding_mode, 1086 lostFraction lost_fraction) 1087{ 1088 unsigned int omsb; /* One, not zero, based MSB. */ 1089 int exponentChange; 1090 1091 if(category != fcNormal) 1092 return opOK; 1093 1094 /* Before rounding normalize the exponent of fcNormal numbers. */ 1095 omsb = significandMSB() + 1; 1096 1097 if(omsb) { 1098 /* OMSB is numbered from 1. We want to place it in the integer 1099 bit numbered PRECISON if possible, with a compensating change in 1100 the exponent. */ 1101 exponentChange = omsb - semantics->precision; 1102 1103 /* If the resulting exponent is too high, overflow according to 1104 the rounding mode. */ 1105 if(exponent + exponentChange > semantics->maxExponent) 1106 return handleOverflow(rounding_mode); 1107 1108 /* Subnormal numbers have exponent minExponent, and their MSB 1109 is forced based on that. */ 1110 if(exponent + exponentChange < semantics->minExponent) 1111 exponentChange = semantics->minExponent - exponent; 1112 1113 /* Shifting left is easy as we don't lose precision. */ 1114 if(exponentChange < 0) { 1115 assert(lost_fraction == lfExactlyZero); 1116 1117 shiftSignificandLeft(-exponentChange); 1118 1119 return opOK; 1120 } 1121 1122 if(exponentChange > 0) { 1123 lostFraction lf; 1124 1125 /* Shift right and capture any new lost fraction. */ 1126 lf = shiftSignificandRight(exponentChange); 1127 1128 lost_fraction = combineLostFractions(lf, lost_fraction); 1129 1130 /* Keep OMSB up-to-date. */ 1131 if(omsb > (unsigned) exponentChange) 1132 omsb -= exponentChange; 1133 else 1134 omsb = 0; 1135 } 1136 } 1137 1138 /* Now round the number according to rounding_mode given the lost 1139 fraction. */ 1140 1141 /* As specified in IEEE 754, since we do not trap we do not report 1142 underflow for exact results. */ 1143 if(lost_fraction == lfExactlyZero) { 1144 /* Canonicalize zeroes. */ 1145 if(omsb == 0) 1146 category = fcZero; 1147 1148 return opOK; 1149 } 1150 1151 /* Increment the significand if we're rounding away from zero. */ 1152 if(roundAwayFromZero(rounding_mode, lost_fraction, 0)) { 1153 if(omsb == 0) 1154 exponent = semantics->minExponent; 1155 1156 incrementSignificand(); 1157 omsb = significandMSB() + 1; 1158 1159 /* Did the significand increment overflow? */ 1160 if(omsb == (unsigned) semantics->precision + 1) { 1161 /* Renormalize by incrementing the exponent and shifting our 1162 significand right one. However if we already have the 1163 maximum exponent we overflow to infinity. */ 1164 if(exponent == semantics->maxExponent) { 1165 category = fcInfinity; 1166 1167 return (opStatus) (opOverflow | opInexact); 1168 } 1169 1170 shiftSignificandRight(1); 1171 1172 return opInexact; 1173 } 1174 } 1175 1176 /* The normal case - we were and are not denormal, and any 1177 significand increment above didn't overflow. */ 1178 if(omsb == semantics->precision) 1179 return opInexact; 1180 1181 /* We have a non-zero denormal. */ 1182 assert(omsb < semantics->precision); 1183 1184 /* Canonicalize zeroes. */ 1185 if(omsb == 0) 1186 category = fcZero; 1187 1188 /* The fcZero case is a denormal that underflowed to zero. */ 1189 return (opStatus) (opUnderflow | opInexact); 1190} 1191 1192APFloat::opStatus 1193APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract) 1194{ 1195 switch(convolve(category, rhs.category)) { 1196 default: 1197 assert(0); 1198 1199 case convolve(fcNaN, fcZero): 1200 case convolve(fcNaN, fcNormal): 1201 case convolve(fcNaN, fcInfinity): 1202 case convolve(fcNaN, fcNaN): 1203 case convolve(fcNormal, fcZero): 1204 case convolve(fcInfinity, fcNormal): 1205 case convolve(fcInfinity, fcZero): 1206 return opOK; 1207 1208 case convolve(fcZero, fcNaN): 1209 case convolve(fcNormal, fcNaN): 1210 case convolve(fcInfinity, fcNaN): 1211 category = fcNaN; 1212 copySignificand(rhs); 1213 return opOK; 1214 1215 case convolve(fcNormal, fcInfinity): 1216 case convolve(fcZero, fcInfinity): 1217 category = fcInfinity; 1218 sign = rhs.sign ^ subtract; 1219 return opOK; 1220 1221 case convolve(fcZero, fcNormal): 1222 assign(rhs); 1223 sign = rhs.sign ^ subtract; 1224 return opOK; 1225 1226 case convolve(fcZero, fcZero): 1227 /* Sign depends on rounding mode; handled by caller. */ 1228 return opOK; 1229 1230 case convolve(fcInfinity, fcInfinity): 1231 /* Differently signed infinities can only be validly 1232 subtracted. */ 1233 if((sign ^ rhs.sign) != subtract) { 1234 makeNaN(); 1235 return opInvalidOp; 1236 } 1237 1238 return opOK; 1239 1240 case convolve(fcNormal, fcNormal): 1241 return opDivByZero; 1242 } 1243} 1244 1245/* Add or subtract two normal numbers. */ 1246lostFraction 1247APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract) 1248{ 1249 integerPart carry; 1250 lostFraction lost_fraction; 1251 int bits; 1252 1253 /* Determine if the operation on the absolute values is effectively 1254 an addition or subtraction. */ 1255 subtract ^= (sign ^ rhs.sign) ? true : false; 1256 1257 /* Are we bigger exponent-wise than the RHS? */ 1258 bits = exponent - rhs.exponent; 1259 1260 /* Subtraction is more subtle than one might naively expect. */ 1261 if(subtract) { 1262 APFloat temp_rhs(rhs); 1263 bool reverse; 1264 1265 if (bits == 0) { 1266 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan; 1267 lost_fraction = lfExactlyZero; 1268 } else if (bits > 0) { 1269 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); 1270 shiftSignificandLeft(1); 1271 reverse = false; 1272 } else { 1273 lost_fraction = shiftSignificandRight(-bits - 1); 1274 temp_rhs.shiftSignificandLeft(1); 1275 reverse = true; 1276 } 1277 1278 if (reverse) { 1279 carry = temp_rhs.subtractSignificand 1280 (*this, lost_fraction != lfExactlyZero); 1281 copySignificand(temp_rhs); 1282 sign = !sign; 1283 } else { 1284 carry = subtractSignificand 1285 (temp_rhs, lost_fraction != lfExactlyZero); 1286 } 1287 1288 /* Invert the lost fraction - it was on the RHS and 1289 subtracted. */ 1290 if(lost_fraction == lfLessThanHalf) 1291 lost_fraction = lfMoreThanHalf; 1292 else if(lost_fraction == lfMoreThanHalf) 1293 lost_fraction = lfLessThanHalf; 1294 1295 /* The code above is intended to ensure that no borrow is 1296 necessary. */ 1297 assert(!carry); 1298 } else { 1299 if(bits > 0) { 1300 APFloat temp_rhs(rhs); 1301 1302 lost_fraction = temp_rhs.shiftSignificandRight(bits); 1303 carry = addSignificand(temp_rhs); 1304 } else { 1305 lost_fraction = shiftSignificandRight(-bits); 1306 carry = addSignificand(rhs); 1307 } 1308 1309 /* We have a guard bit; generating a carry cannot happen. */ 1310 assert(!carry); 1311 } 1312 1313 return lost_fraction; 1314} 1315 1316APFloat::opStatus 1317APFloat::multiplySpecials(const APFloat &rhs) 1318{ 1319 switch(convolve(category, rhs.category)) { 1320 default: 1321 assert(0); 1322 1323 case convolve(fcNaN, fcZero): 1324 case convolve(fcNaN, fcNormal): 1325 case convolve(fcNaN, fcInfinity): 1326 case convolve(fcNaN, fcNaN): 1327 return opOK; 1328 1329 case convolve(fcZero, fcNaN): 1330 case convolve(fcNormal, fcNaN): 1331 case convolve(fcInfinity, fcNaN): 1332 category = fcNaN; 1333 copySignificand(rhs); 1334 return opOK; 1335 1336 case convolve(fcNormal, fcInfinity): 1337 case convolve(fcInfinity, fcNormal): 1338 case convolve(fcInfinity, fcInfinity): 1339 category = fcInfinity; 1340 return opOK; 1341 1342 case convolve(fcZero, fcNormal): 1343 case convolve(fcNormal, fcZero): 1344 case convolve(fcZero, fcZero): 1345 category = fcZero; 1346 return opOK; 1347 1348 case convolve(fcZero, fcInfinity): 1349 case convolve(fcInfinity, fcZero): 1350 makeNaN(); 1351 return opInvalidOp; 1352 1353 case convolve(fcNormal, fcNormal): 1354 return opOK; 1355 } 1356} 1357 1358APFloat::opStatus 1359APFloat::divideSpecials(const APFloat &rhs) 1360{ 1361 switch(convolve(category, rhs.category)) { 1362 default: 1363 assert(0); 1364 1365 case convolve(fcNaN, fcZero): 1366 case convolve(fcNaN, fcNormal): 1367 case convolve(fcNaN, fcInfinity): 1368 case convolve(fcNaN, fcNaN): 1369 case convolve(fcInfinity, fcZero): 1370 case convolve(fcInfinity, fcNormal): 1371 case convolve(fcZero, fcInfinity): 1372 case convolve(fcZero, fcNormal): 1373 return opOK; 1374 1375 case convolve(fcZero, fcNaN): 1376 case convolve(fcNormal, fcNaN): 1377 case convolve(fcInfinity, fcNaN): 1378 category = fcNaN; 1379 copySignificand(rhs); 1380 return opOK; 1381 1382 case convolve(fcNormal, fcInfinity): 1383 category = fcZero; 1384 return opOK; 1385 1386 case convolve(fcNormal, fcZero): 1387 category = fcInfinity; 1388 return opDivByZero; 1389 1390 case convolve(fcInfinity, fcInfinity): 1391 case convolve(fcZero, fcZero): 1392 makeNaN(); 1393 return opInvalidOp; 1394 1395 case convolve(fcNormal, fcNormal): 1396 return opOK; 1397 } 1398} 1399 1400/* Change sign. */ 1401void 1402APFloat::changeSign() 1403{ 1404 /* Look mummy, this one's easy. */ 1405 sign = !sign; 1406} 1407 1408void 1409APFloat::clearSign() 1410{ 1411 /* So is this one. */ 1412 sign = 0; 1413} 1414 1415void 1416APFloat::copySign(const APFloat &rhs) 1417{ 1418 /* And this one. */ 1419 sign = rhs.sign; 1420} 1421 1422/* Normalized addition or subtraction. */ 1423APFloat::opStatus 1424APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode, 1425 bool subtract) 1426{ 1427 opStatus fs; 1428 1429 assertArithmeticOK(*semantics); 1430 1431 fs = addOrSubtractSpecials(rhs, subtract); 1432 1433 /* This return code means it was not a simple case. */ 1434 if(fs == opDivByZero) { 1435 lostFraction lost_fraction; 1436 1437 lost_fraction = addOrSubtractSignificand(rhs, subtract); 1438 fs = normalize(rounding_mode, lost_fraction); 1439 1440 /* Can only be zero if we lost no fraction. */ 1441 assert(category != fcZero || lost_fraction == lfExactlyZero); 1442 } 1443 1444 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1445 positive zero unless rounding to minus infinity, except that 1446 adding two like-signed zeroes gives that zero. */ 1447 if(category == fcZero) { 1448 if(rhs.category != fcZero || (sign == rhs.sign) == subtract) 1449 sign = (rounding_mode == rmTowardNegative); 1450 } 1451 1452 return fs; 1453} 1454 1455/* Normalized addition. */ 1456APFloat::opStatus 1457APFloat::add(const APFloat &rhs, roundingMode rounding_mode) 1458{ 1459 return addOrSubtract(rhs, rounding_mode, false); 1460} 1461 1462/* Normalized subtraction. */ 1463APFloat::opStatus 1464APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode) 1465{ 1466 return addOrSubtract(rhs, rounding_mode, true); 1467} 1468 1469/* Normalized multiply. */ 1470APFloat::opStatus 1471APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode) 1472{ 1473 opStatus fs; 1474 1475 assertArithmeticOK(*semantics); 1476 sign ^= rhs.sign; 1477 fs = multiplySpecials(rhs); 1478 1479 if(category == fcNormal) { 1480 lostFraction lost_fraction = multiplySignificand(rhs, 0); 1481 fs = normalize(rounding_mode, lost_fraction); 1482 if(lost_fraction != lfExactlyZero) 1483 fs = (opStatus) (fs | opInexact); 1484 } 1485 1486 return fs; 1487} 1488 1489/* Normalized divide. */ 1490APFloat::opStatus 1491APFloat::divide(const APFloat &rhs, roundingMode rounding_mode) 1492{ 1493 opStatus fs; 1494 1495 assertArithmeticOK(*semantics); 1496 sign ^= rhs.sign; 1497 fs = divideSpecials(rhs); 1498 1499 if(category == fcNormal) { 1500 lostFraction lost_fraction = divideSignificand(rhs); 1501 fs = normalize(rounding_mode, lost_fraction); 1502 if(lost_fraction != lfExactlyZero) 1503 fs = (opStatus) (fs | opInexact); 1504 } 1505 1506 return fs; 1507} 1508 1509/* Normalized remainder. This is not currently doing TRT. */ 1510APFloat::opStatus 1511APFloat::mod(const APFloat &rhs, roundingMode rounding_mode) 1512{ 1513 opStatus fs; 1514 APFloat V = *this; 1515 unsigned int origSign = sign; 1516 1517 assertArithmeticOK(*semantics); 1518 fs = V.divide(rhs, rmNearestTiesToEven); 1519 if (fs == opDivByZero) 1520 return fs; 1521 1522 int parts = partCount(); 1523 integerPart *x = new integerPart[parts]; 1524 fs = V.convertToInteger(x, parts * integerPartWidth, true, 1525 rmNearestTiesToEven); 1526 if (fs==opInvalidOp) 1527 return fs; 1528 1529 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true, 1530 rmNearestTiesToEven); 1531 assert(fs==opOK); // should always work 1532 1533 fs = V.multiply(rhs, rounding_mode); 1534 assert(fs==opOK || fs==opInexact); // should not overflow or underflow 1535 1536 fs = subtract(V, rounding_mode); 1537 assert(fs==opOK || fs==opInexact); // likewise 1538 1539 if (isZero()) 1540 sign = origSign; // IEEE754 requires this 1541 delete[] x; 1542 return fs; 1543} 1544 1545/* Normalized fused-multiply-add. */ 1546APFloat::opStatus 1547APFloat::fusedMultiplyAdd(const APFloat &multiplicand, 1548 const APFloat &addend, 1549 roundingMode rounding_mode) 1550{ 1551 opStatus fs; 1552 1553 assertArithmeticOK(*semantics); 1554 1555 /* Post-multiplication sign, before addition. */ 1556 sign ^= multiplicand.sign; 1557 1558 /* If and only if all arguments are normal do we need to do an 1559 extended-precision calculation. */ 1560 if(category == fcNormal 1561 && multiplicand.category == fcNormal 1562 && addend.category == fcNormal) { 1563 lostFraction lost_fraction; 1564 1565 lost_fraction = multiplySignificand(multiplicand, &addend); 1566 fs = normalize(rounding_mode, lost_fraction); 1567 if(lost_fraction != lfExactlyZero) 1568 fs = (opStatus) (fs | opInexact); 1569 1570 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1571 positive zero unless rounding to minus infinity, except that 1572 adding two like-signed zeroes gives that zero. */ 1573 if(category == fcZero && sign != addend.sign) 1574 sign = (rounding_mode == rmTowardNegative); 1575 } else { 1576 fs = multiplySpecials(multiplicand); 1577 1578 /* FS can only be opOK or opInvalidOp. There is no more work 1579 to do in the latter case. The IEEE-754R standard says it is 1580 implementation-defined in this case whether, if ADDEND is a 1581 quiet NaN, we raise invalid op; this implementation does so. 1582 1583 If we need to do the addition we can do so with normal 1584 precision. */ 1585 if(fs == opOK) 1586 fs = addOrSubtract(addend, rounding_mode, false); 1587 } 1588 1589 return fs; 1590} 1591 1592/* Comparison requires normalized numbers. */ 1593APFloat::cmpResult 1594APFloat::compare(const APFloat &rhs) const 1595{ 1596 cmpResult result; 1597 1598 assertArithmeticOK(*semantics); 1599 assert(semantics == rhs.semantics); 1600 1601 switch(convolve(category, rhs.category)) { 1602 default: 1603 assert(0); 1604 1605 case convolve(fcNaN, fcZero): 1606 case convolve(fcNaN, fcNormal): 1607 case convolve(fcNaN, fcInfinity): 1608 case convolve(fcNaN, fcNaN): 1609 case convolve(fcZero, fcNaN): 1610 case convolve(fcNormal, fcNaN): 1611 case convolve(fcInfinity, fcNaN): 1612 return cmpUnordered; 1613 1614 case convolve(fcInfinity, fcNormal): 1615 case convolve(fcInfinity, fcZero): 1616 case convolve(fcNormal, fcZero): 1617 if(sign) 1618 return cmpLessThan; 1619 else 1620 return cmpGreaterThan; 1621 1622 case convolve(fcNormal, fcInfinity): 1623 case convolve(fcZero, fcInfinity): 1624 case convolve(fcZero, fcNormal): 1625 if(rhs.sign) 1626 return cmpGreaterThan; 1627 else 1628 return cmpLessThan; 1629 1630 case convolve(fcInfinity, fcInfinity): 1631 if(sign == rhs.sign) 1632 return cmpEqual; 1633 else if(sign) 1634 return cmpLessThan; 1635 else 1636 return cmpGreaterThan; 1637 1638 case convolve(fcZero, fcZero): 1639 return cmpEqual; 1640 1641 case convolve(fcNormal, fcNormal): 1642 break; 1643 } 1644 1645 /* Two normal numbers. Do they have the same sign? */ 1646 if(sign != rhs.sign) { 1647 if(sign) 1648 result = cmpLessThan; 1649 else 1650 result = cmpGreaterThan; 1651 } else { 1652 /* Compare absolute values; invert result if negative. */ 1653 result = compareAbsoluteValue(rhs); 1654 1655 if(sign) { 1656 if(result == cmpLessThan) 1657 result = cmpGreaterThan; 1658 else if(result == cmpGreaterThan) 1659 result = cmpLessThan; 1660 } 1661 } 1662 1663 return result; 1664} 1665 1666APFloat::opStatus 1667APFloat::convert(const fltSemantics &toSemantics, 1668 roundingMode rounding_mode) 1669{ 1670 lostFraction lostFraction; 1671 unsigned int newPartCount, oldPartCount; 1672 opStatus fs; 1673 1674 assertArithmeticOK(*semantics); 1675 lostFraction = lfExactlyZero; 1676 newPartCount = partCountForBits(toSemantics.precision + 1); 1677 oldPartCount = partCount(); 1678 1679 /* Handle storage complications. If our new form is wider, 1680 re-allocate our bit pattern into wider storage. If it is 1681 narrower, we ignore the excess parts, but if narrowing to a 1682 single part we need to free the old storage. 1683 Be careful not to reference significandParts for zeroes 1684 and infinities, since it aborts. */ 1685 if (newPartCount > oldPartCount) { 1686 integerPart *newParts; 1687 newParts = new integerPart[newPartCount]; 1688 APInt::tcSet(newParts, 0, newPartCount); 1689 if (category==fcNormal || category==fcNaN) 1690 APInt::tcAssign(newParts, significandParts(), oldPartCount); 1691 freeSignificand(); 1692 significand.parts = newParts; 1693 } else if (newPartCount < oldPartCount) { 1694 /* Capture any lost fraction through truncation of parts so we get 1695 correct rounding whilst normalizing. */ 1696 if (category==fcNormal) 1697 lostFraction = lostFractionThroughTruncation 1698 (significandParts(), oldPartCount, toSemantics.precision); 1699 if (newPartCount == 1) { 1700 integerPart newPart = 0; 1701 if (category==fcNormal || category==fcNaN) 1702 newPart = significandParts()[0]; 1703 freeSignificand(); 1704 significand.part = newPart; 1705 } 1706 } 1707 1708 if(category == fcNormal) { 1709 /* Re-interpret our bit-pattern. */ 1710 exponent += toSemantics.precision - semantics->precision; 1711 semantics = &toSemantics; 1712 fs = normalize(rounding_mode, lostFraction); 1713 } else if (category == fcNaN) { 1714 int shift = toSemantics.precision - semantics->precision; 1715 // No normalization here, just truncate 1716 if (shift>0) 1717 APInt::tcShiftLeft(significandParts(), newPartCount, shift); 1718 else if (shift < 0) 1719 APInt::tcShiftRight(significandParts(), newPartCount, -shift); 1720 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan) 1721 // does not give you back the same bits. This is dubious, and we 1722 // don't currently do it. You're really supposed to get 1723 // an invalid operation signal at runtime, but nobody does that. 1724 semantics = &toSemantics; 1725 fs = opOK; 1726 } else { 1727 semantics = &toSemantics; 1728 fs = opOK; 1729 } 1730 1731 return fs; 1732} 1733 1734/* Convert a floating point number to an integer according to the 1735 rounding mode. If the rounded integer value is out of range this 1736 returns an invalid operation exception and the contents of the 1737 destination parts are unspecified. If the rounded value is in 1738 range but the floating point number is not the exact integer, the C 1739 standard doesn't require an inexact exception to be raised. IEEE 1740 854 does require it so we do that. 1741 1742 Note that for conversions to integer type the C standard requires 1743 round-to-zero to always be used. */ 1744APFloat::opStatus 1745APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width, 1746 bool isSigned, 1747 roundingMode rounding_mode) const 1748{ 1749 lostFraction lost_fraction; 1750 const integerPart *src; 1751 unsigned int dstPartsCount, truncatedBits; 1752 1753 assertArithmeticOK(*semantics); 1754 1755 /* Handle the three special cases first. */ 1756 if(category == fcInfinity || category == fcNaN) 1757 return opInvalidOp; 1758 1759 dstPartsCount = partCountForBits(width); 1760 1761 if(category == fcZero) { 1762 APInt::tcSet(parts, 0, dstPartsCount); 1763 return opOK; 1764 } 1765 1766 src = significandParts(); 1767 1768 /* Step 1: place our absolute value, with any fraction truncated, in 1769 the destination. */ 1770 if (exponent < 0) { 1771 /* Our absolute value is less than one; truncate everything. */ 1772 APInt::tcSet(parts, 0, dstPartsCount); 1773 truncatedBits = semantics->precision; 1774 } else { 1775 /* We want the most significant (exponent + 1) bits; the rest are 1776 truncated. */ 1777 unsigned int bits = exponent + 1U; 1778 1779 /* Hopelessly large in magnitude? */ 1780 if (bits > width) 1781 return opInvalidOp; 1782 1783 if (bits < semantics->precision) { 1784 /* We truncate (semantics->precision - bits) bits. */ 1785 truncatedBits = semantics->precision - bits; 1786 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits); 1787 } else { 1788 /* We want at least as many bits as are available. */ 1789 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0); 1790 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision); 1791 truncatedBits = 0; 1792 } 1793 } 1794 1795 /* Step 2: work out any lost fraction, and increment the absolute 1796 value if we would round away from zero. */ 1797 if (truncatedBits) { 1798 lost_fraction = lostFractionThroughTruncation(src, partCount(), 1799 truncatedBits); 1800 if (lost_fraction != lfExactlyZero 1801 && roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { 1802 if (APInt::tcIncrement(parts, dstPartsCount)) 1803 return opInvalidOp; /* Overflow. */ 1804 } 1805 } else { 1806 lost_fraction = lfExactlyZero; 1807 } 1808 1809 /* Step 3: check if we fit in the destination. */ 1810 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1; 1811 1812 if (sign) { 1813 if (!isSigned) { 1814 /* Negative numbers cannot be represented as unsigned. */ 1815 if (omsb != 0) 1816 return opInvalidOp; 1817 } else { 1818 /* It takes omsb bits to represent the unsigned integer value. 1819 We lose a bit for the sign, but care is needed as the 1820 maximally negative integer is a special case. */ 1821 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb) 1822 return opInvalidOp; 1823 1824 /* This case can happen because of rounding. */ 1825 if (omsb > width) 1826 return opInvalidOp; 1827 } 1828 1829 APInt::tcNegate (parts, dstPartsCount); 1830 } else { 1831 if (omsb >= width + !isSigned) 1832 return opInvalidOp; 1833 } 1834 1835 if (lost_fraction == lfExactlyZero) 1836 return opOK; 1837 else 1838 return opInexact; 1839} 1840 1841/* Same as convertToSignExtendedInteger, except we provide 1842 deterministic values in case of an invalid operation exception, 1843 namely zero for NaNs and the minimal or maximal value respectively 1844 for underflow or overflow. */ 1845APFloat::opStatus 1846APFloat::convertToInteger(integerPart *parts, unsigned int width, 1847 bool isSigned, 1848 roundingMode rounding_mode) const 1849{ 1850 opStatus fs; 1851 1852 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode); 1853 1854 if (fs == opInvalidOp) { 1855 unsigned int bits, dstPartsCount; 1856 1857 dstPartsCount = partCountForBits(width); 1858 1859 if (category == fcNaN) 1860 bits = 0; 1861 else if (sign) 1862 bits = isSigned; 1863 else 1864 bits = width - isSigned; 1865 1866 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits); 1867 if (sign && isSigned) 1868 APInt::tcShiftLeft(parts, dstPartsCount, width - 1); 1869 } 1870 1871 return fs; 1872} 1873 1874/* Convert an unsigned integer SRC to a floating point number, 1875 rounding according to ROUNDING_MODE. The sign of the floating 1876 point number is not modified. */ 1877APFloat::opStatus 1878APFloat::convertFromUnsignedParts(const integerPart *src, 1879 unsigned int srcCount, 1880 roundingMode rounding_mode) 1881{ 1882 unsigned int omsb, precision, dstCount; 1883 integerPart *dst; 1884 lostFraction lost_fraction; 1885 1886 assertArithmeticOK(*semantics); 1887 category = fcNormal; 1888 omsb = APInt::tcMSB(src, srcCount) + 1; 1889 dst = significandParts(); 1890 dstCount = partCount(); 1891 precision = semantics->precision; 1892 1893 /* We want the most significant PRECISON bits of SRC. There may not 1894 be that many; extract what we can. */ 1895 if (precision <= omsb) { 1896 exponent = omsb - 1; 1897 lost_fraction = lostFractionThroughTruncation(src, srcCount, 1898 omsb - precision); 1899 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision); 1900 } else { 1901 exponent = precision - 1; 1902 lost_fraction = lfExactlyZero; 1903 APInt::tcExtract(dst, dstCount, src, omsb, 0); 1904 } 1905 1906 return normalize(rounding_mode, lost_fraction); 1907} 1908 1909/* Convert a two's complement integer SRC to a floating point number, 1910 rounding according to ROUNDING_MODE. ISSIGNED is true if the 1911 integer is signed, in which case it must be sign-extended. */ 1912APFloat::opStatus 1913APFloat::convertFromSignExtendedInteger(const integerPart *src, 1914 unsigned int srcCount, 1915 bool isSigned, 1916 roundingMode rounding_mode) 1917{ 1918 opStatus status; 1919 1920 assertArithmeticOK(*semantics); 1921 if (isSigned 1922 && APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { 1923 integerPart *copy; 1924 1925 /* If we're signed and negative negate a copy. */ 1926 sign = true; 1927 copy = new integerPart[srcCount]; 1928 APInt::tcAssign(copy, src, srcCount); 1929 APInt::tcNegate(copy, srcCount); 1930 status = convertFromUnsignedParts(copy, srcCount, rounding_mode); 1931 delete [] copy; 1932 } else { 1933 sign = false; 1934 status = convertFromUnsignedParts(src, srcCount, rounding_mode); 1935 } 1936 1937 return status; 1938} 1939 1940/* FIXME: should this just take a const APInt reference? */ 1941APFloat::opStatus 1942APFloat::convertFromZeroExtendedInteger(const integerPart *parts, 1943 unsigned int width, bool isSigned, 1944 roundingMode rounding_mode) 1945{ 1946 unsigned int partCount = partCountForBits(width); 1947 APInt api = APInt(width, partCount, parts); 1948 1949 sign = false; 1950 if(isSigned && APInt::tcExtractBit(parts, width - 1)) { 1951 sign = true; 1952 api = -api; 1953 } 1954 1955 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 1956} 1957 1958APFloat::opStatus 1959APFloat::convertFromHexadecimalString(const char *p, 1960 roundingMode rounding_mode) 1961{ 1962 lostFraction lost_fraction; 1963 integerPart *significand; 1964 unsigned int bitPos, partsCount; 1965 const char *dot, *firstSignificantDigit; 1966 1967 zeroSignificand(); 1968 exponent = 0; 1969 category = fcNormal; 1970 1971 significand = significandParts(); 1972 partsCount = partCount(); 1973 bitPos = partsCount * integerPartWidth; 1974 1975 /* Skip leading zeroes and any (hexa)decimal point. */ 1976 p = skipLeadingZeroesAndAnyDot(p, &dot); 1977 firstSignificantDigit = p; 1978 1979 for(;;) { 1980 integerPart hex_value; 1981 1982 if(*p == '.') { 1983 assert(dot == 0); 1984 dot = p++; 1985 } 1986 1987 hex_value = hexDigitValue(*p); 1988 if(hex_value == -1U) { 1989 lost_fraction = lfExactlyZero; 1990 break; 1991 } 1992 1993 p++; 1994 1995 /* Store the number whilst 4-bit nibbles remain. */ 1996 if(bitPos) { 1997 bitPos -= 4; 1998 hex_value <<= bitPos % integerPartWidth; 1999 significand[bitPos / integerPartWidth] |= hex_value; 2000 } else { 2001 lost_fraction = trailingHexadecimalFraction(p, hex_value); 2002 while(hexDigitValue(*p) != -1U) 2003 p++; 2004 break; 2005 } 2006 } 2007 2008 /* Hex floats require an exponent but not a hexadecimal point. */ 2009 assert(*p == 'p' || *p == 'P'); 2010 2011 /* Ignore the exponent if we are zero. */ 2012 if(p != firstSignificantDigit) { 2013 int expAdjustment; 2014 2015 /* Implicit hexadecimal point? */ 2016 if(!dot) 2017 dot = p; 2018 2019 /* Calculate the exponent adjustment implicit in the number of 2020 significant digits. */ 2021 expAdjustment = dot - firstSignificantDigit; 2022 if(expAdjustment < 0) 2023 expAdjustment++; 2024 expAdjustment = expAdjustment * 4 - 1; 2025 2026 /* Adjust for writing the significand starting at the most 2027 significant nibble. */ 2028 expAdjustment += semantics->precision; 2029 expAdjustment -= partsCount * integerPartWidth; 2030 2031 /* Adjust for the given exponent. */ 2032 exponent = totalExponent(p, expAdjustment); 2033 } 2034 2035 return normalize(rounding_mode, lost_fraction); 2036} 2037 2038APFloat::opStatus 2039APFloat::roundSignificandWithExponent(const integerPart *decSigParts, 2040 unsigned sigPartCount, int exp, 2041 roundingMode rounding_mode) 2042{ 2043 unsigned int parts, pow5PartCount; 2044 fltSemantics calcSemantics = { 32767, -32767, 0, true }; 2045 integerPart pow5Parts[maxPowerOfFiveParts]; 2046 bool isNearest; 2047 2048 isNearest = (rounding_mode == rmNearestTiesToEven 2049 || rounding_mode == rmNearestTiesToAway); 2050 2051 parts = partCountForBits(semantics->precision + 11); 2052 2053 /* Calculate pow(5, abs(exp)). */ 2054 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp); 2055 2056 for (;; parts *= 2) { 2057 opStatus sigStatus, powStatus; 2058 unsigned int excessPrecision, truncatedBits; 2059 2060 calcSemantics.precision = parts * integerPartWidth - 1; 2061 excessPrecision = calcSemantics.precision - semantics->precision; 2062 truncatedBits = excessPrecision; 2063 2064 APFloat decSig(calcSemantics, fcZero, sign); 2065 APFloat pow5(calcSemantics, fcZero, false); 2066 2067 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount, 2068 rmNearestTiesToEven); 2069 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount, 2070 rmNearestTiesToEven); 2071 /* Add exp, as 10^n = 5^n * 2^n. */ 2072 decSig.exponent += exp; 2073 2074 lostFraction calcLostFraction; 2075 integerPart HUerr, HUdistance, powHUerr; 2076 2077 if (exp >= 0) { 2078 /* multiplySignificand leaves the precision-th bit set to 1. */ 2079 calcLostFraction = decSig.multiplySignificand(pow5, NULL); 2080 powHUerr = powStatus != opOK; 2081 } else { 2082 calcLostFraction = decSig.divideSignificand(pow5); 2083 /* Denormal numbers have less precision. */ 2084 if (decSig.exponent < semantics->minExponent) { 2085 excessPrecision += (semantics->minExponent - decSig.exponent); 2086 truncatedBits = excessPrecision; 2087 if (excessPrecision > calcSemantics.precision) 2088 excessPrecision = calcSemantics.precision; 2089 } 2090 /* Extra half-ulp lost in reciprocal of exponent. */ 2091 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0: 2; 2092 } 2093 2094 /* Both multiplySignificand and divideSignificand return the 2095 result with the integer bit set. */ 2096 assert (APInt::tcExtractBit 2097 (decSig.significandParts(), calcSemantics.precision - 1) == 1); 2098 2099 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK, 2100 powHUerr); 2101 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(), 2102 excessPrecision, isNearest); 2103 2104 /* Are we guaranteed to round correctly if we truncate? */ 2105 if (HUdistance >= HUerr) { 2106 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(), 2107 calcSemantics.precision - excessPrecision, 2108 excessPrecision); 2109 /* Take the exponent of decSig. If we tcExtract-ed less bits 2110 above we must adjust our exponent to compensate for the 2111 implicit right shift. */ 2112 exponent = (decSig.exponent + semantics->precision 2113 - (calcSemantics.precision - excessPrecision)); 2114 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(), 2115 decSig.partCount(), 2116 truncatedBits); 2117 return normalize(rounding_mode, calcLostFraction); 2118 } 2119 } 2120} 2121 2122APFloat::opStatus 2123APFloat::convertFromDecimalString(const char *p, roundingMode rounding_mode) 2124{ 2125 decimalInfo D; 2126 opStatus fs; 2127 2128 /* Scan the text. */ 2129 interpretDecimal(p, &D); 2130 2131 /* Handle the quick cases. First the case of no significant digits, 2132 i.e. zero, and then exponents that are obviously too large or too 2133 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp 2134 definitely overflows if 2135 2136 (exp - 1) * L >= maxExponent 2137 2138 and definitely underflows to zero where 2139 2140 (exp + 1) * L <= minExponent - precision 2141 2142 With integer arithmetic the tightest bounds for L are 2143 2144 93/28 < L < 196/59 [ numerator <= 256 ] 2145 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] 2146 */ 2147 2148 if (*D.firstSigDigit == '0') { 2149 category = fcZero; 2150 fs = opOK; 2151 } else if ((D.normalizedExponent + 1) * 28738 2152 <= 8651 * (semantics->minExponent - (int) semantics->precision)) { 2153 /* Underflow to zero and round. */ 2154 zeroSignificand(); 2155 fs = normalize(rounding_mode, lfLessThanHalf); 2156 } else if ((D.normalizedExponent - 1) * 42039 2157 >= 12655 * semantics->maxExponent) { 2158 /* Overflow and round. */ 2159 fs = handleOverflow(rounding_mode); 2160 } else { 2161 integerPart *decSignificand; 2162 unsigned int partCount; 2163 2164 /* A tight upper bound on number of bits required to hold an 2165 N-digit decimal integer is N * 196 / 59. Allocate enough space 2166 to hold the full significand, and an extra part required by 2167 tcMultiplyPart. */ 2168 partCount = (D.lastSigDigit - D.firstSigDigit) + 1; 2169 partCount = partCountForBits(1 + 196 * partCount / 59); 2170 decSignificand = new integerPart[partCount + 1]; 2171 partCount = 0; 2172 2173 /* Convert to binary efficiently - we do almost all multiplication 2174 in an integerPart. When this would overflow do we do a single 2175 bignum multiplication, and then revert again to multiplication 2176 in an integerPart. */ 2177 do { 2178 integerPart decValue, val, multiplier; 2179 2180 val = 0; 2181 multiplier = 1; 2182 2183 do { 2184 if (*p == '.') 2185 p++; 2186 2187 decValue = decDigitValue(*p++); 2188 multiplier *= 10; 2189 val = val * 10 + decValue; 2190 /* The maximum number that can be multiplied by ten with any 2191 digit added without overflowing an integerPart. */ 2192 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10); 2193 2194 /* Multiply out the current part. */ 2195 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val, 2196 partCount, partCount + 1, false); 2197 2198 /* If we used another part (likely but not guaranteed), increase 2199 the count. */ 2200 if (decSignificand[partCount]) 2201 partCount++; 2202 } while (p <= D.lastSigDigit); 2203 2204 category = fcNormal; 2205 fs = roundSignificandWithExponent(decSignificand, partCount, 2206 D.exponent, rounding_mode); 2207 2208 delete [] decSignificand; 2209 } 2210 2211 return fs; 2212} 2213 2214APFloat::opStatus 2215APFloat::convertFromString(const char *p, roundingMode rounding_mode) 2216{ 2217 assertArithmeticOK(*semantics); 2218 2219 /* Handle a leading minus sign. */ 2220 if(*p == '-') 2221 sign = 1, p++; 2222 else 2223 sign = 0; 2224 2225 if(p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) 2226 return convertFromHexadecimalString(p + 2, rounding_mode); 2227 else 2228 return convertFromDecimalString(p, rounding_mode); 2229} 2230 2231/* Write out a hexadecimal representation of the floating point value 2232 to DST, which must be of sufficient size, in the C99 form 2233 [-]0xh.hhhhp[+-]d. Return the number of characters written, 2234 excluding the terminating NUL. 2235 2236 If UPPERCASE, the output is in upper case, otherwise in lower case. 2237 2238 HEXDIGITS digits appear altogether, rounding the value if 2239 necessary. If HEXDIGITS is 0, the minimal precision to display the 2240 number precisely is used instead. If nothing would appear after 2241 the decimal point it is suppressed. 2242 2243 The decimal exponent is always printed and has at least one digit. 2244 Zero values display an exponent of zero. Infinities and NaNs 2245 appear as "infinity" or "nan" respectively. 2246 2247 The above rules are as specified by C99. There is ambiguity about 2248 what the leading hexadecimal digit should be. This implementation 2249 uses whatever is necessary so that the exponent is displayed as 2250 stored. This implies the exponent will fall within the IEEE format 2251 range, and the leading hexadecimal digit will be 0 (for denormals), 2252 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with 2253 any other digits zero). 2254*/ 2255unsigned int 2256APFloat::convertToHexString(char *dst, unsigned int hexDigits, 2257 bool upperCase, roundingMode rounding_mode) const 2258{ 2259 char *p; 2260 2261 assertArithmeticOK(*semantics); 2262 2263 p = dst; 2264 if (sign) 2265 *dst++ = '-'; 2266 2267 switch (category) { 2268 case fcInfinity: 2269 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); 2270 dst += sizeof infinityL - 1; 2271 break; 2272 2273 case fcNaN: 2274 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); 2275 dst += sizeof NaNU - 1; 2276 break; 2277 2278 case fcZero: 2279 *dst++ = '0'; 2280 *dst++ = upperCase ? 'X': 'x'; 2281 *dst++ = '0'; 2282 if (hexDigits > 1) { 2283 *dst++ = '.'; 2284 memset (dst, '0', hexDigits - 1); 2285 dst += hexDigits - 1; 2286 } 2287 *dst++ = upperCase ? 'P': 'p'; 2288 *dst++ = '0'; 2289 break; 2290 2291 case fcNormal: 2292 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); 2293 break; 2294 } 2295 2296 *dst = 0; 2297 2298 return dst - p; 2299} 2300 2301/* Does the hard work of outputting the correctly rounded hexadecimal 2302 form of a normal floating point number with the specified number of 2303 hexadecimal digits. If HEXDIGITS is zero the minimum number of 2304 digits necessary to print the value precisely is output. */ 2305char * 2306APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, 2307 bool upperCase, 2308 roundingMode rounding_mode) const 2309{ 2310 unsigned int count, valueBits, shift, partsCount, outputDigits; 2311 const char *hexDigitChars; 2312 const integerPart *significand; 2313 char *p; 2314 bool roundUp; 2315 2316 *dst++ = '0'; 2317 *dst++ = upperCase ? 'X': 'x'; 2318 2319 roundUp = false; 2320 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower; 2321 2322 significand = significandParts(); 2323 partsCount = partCount(); 2324 2325 /* +3 because the first digit only uses the single integer bit, so 2326 we have 3 virtual zero most-significant-bits. */ 2327 valueBits = semantics->precision + 3; 2328 shift = integerPartWidth - valueBits % integerPartWidth; 2329 2330 /* The natural number of digits required ignoring trailing 2331 insignificant zeroes. */ 2332 outputDigits = (valueBits - significandLSB () + 3) / 4; 2333 2334 /* hexDigits of zero means use the required number for the 2335 precision. Otherwise, see if we are truncating. If we are, 2336 find out if we need to round away from zero. */ 2337 if (hexDigits) { 2338 if (hexDigits < outputDigits) { 2339 /* We are dropping non-zero bits, so need to check how to round. 2340 "bits" is the number of dropped bits. */ 2341 unsigned int bits; 2342 lostFraction fraction; 2343 2344 bits = valueBits - hexDigits * 4; 2345 fraction = lostFractionThroughTruncation (significand, partsCount, bits); 2346 roundUp = roundAwayFromZero(rounding_mode, fraction, bits); 2347 } 2348 outputDigits = hexDigits; 2349 } 2350 2351 /* Write the digits consecutively, and start writing in the location 2352 of the hexadecimal point. We move the most significant digit 2353 left and add the hexadecimal point later. */ 2354 p = ++dst; 2355 2356 count = (valueBits + integerPartWidth - 1) / integerPartWidth; 2357 2358 while (outputDigits && count) { 2359 integerPart part; 2360 2361 /* Put the most significant integerPartWidth bits in "part". */ 2362 if (--count == partsCount) 2363 part = 0; /* An imaginary higher zero part. */ 2364 else 2365 part = significand[count] << shift; 2366 2367 if (count && shift) 2368 part |= significand[count - 1] >> (integerPartWidth - shift); 2369 2370 /* Convert as much of "part" to hexdigits as we can. */ 2371 unsigned int curDigits = integerPartWidth / 4; 2372 2373 if (curDigits > outputDigits) 2374 curDigits = outputDigits; 2375 dst += partAsHex (dst, part, curDigits, hexDigitChars); 2376 outputDigits -= curDigits; 2377 } 2378 2379 if (roundUp) { 2380 char *q = dst; 2381 2382 /* Note that hexDigitChars has a trailing '0'. */ 2383 do { 2384 q--; 2385 *q = hexDigitChars[hexDigitValue (*q) + 1]; 2386 } while (*q == '0'); 2387 assert (q >= p); 2388 } else { 2389 /* Add trailing zeroes. */ 2390 memset (dst, '0', outputDigits); 2391 dst += outputDigits; 2392 } 2393 2394 /* Move the most significant digit to before the point, and if there 2395 is something after the decimal point add it. This must come 2396 after rounding above. */ 2397 p[-1] = p[0]; 2398 if (dst -1 == p) 2399 dst--; 2400 else 2401 p[0] = '.'; 2402 2403 /* Finally output the exponent. */ 2404 *dst++ = upperCase ? 'P': 'p'; 2405 2406 return writeSignedDecimal (dst, exponent); 2407} 2408 2409// For good performance it is desirable for different APFloats 2410// to produce different integers. 2411uint32_t 2412APFloat::getHashValue() const 2413{ 2414 if (category==fcZero) return sign<<8 | semantics->precision ; 2415 else if (category==fcInfinity) return sign<<9 | semantics->precision; 2416 else if (category==fcNaN) return 1<<10 | semantics->precision; 2417 else { 2418 uint32_t hash = sign<<11 | semantics->precision | exponent<<12; 2419 const integerPart* p = significandParts(); 2420 for (int i=partCount(); i>0; i--, p++) 2421 hash ^= ((uint32_t)*p) ^ (*p)>>32; 2422 return hash; 2423 } 2424} 2425 2426// Conversion from APFloat to/from host float/double. It may eventually be 2427// possible to eliminate these and have everybody deal with APFloats, but that 2428// will take a while. This approach will not easily extend to long double. 2429// Current implementation requires integerPartWidth==64, which is correct at 2430// the moment but could be made more general. 2431 2432// Denormals have exponent minExponent in APFloat, but minExponent-1 in 2433// the actual IEEE respresentations. We compensate for that here. 2434 2435APInt 2436APFloat::convertF80LongDoubleAPFloatToAPInt() const 2437{ 2438 assert(semantics == (const llvm::fltSemantics* const)&x87DoubleExtended); 2439 assert (partCount()==2); 2440 2441 uint64_t myexponent, mysignificand; 2442 2443 if (category==fcNormal) { 2444 myexponent = exponent+16383; //bias 2445 mysignificand = significandParts()[0]; 2446 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) 2447 myexponent = 0; // denormal 2448 } else if (category==fcZero) { 2449 myexponent = 0; 2450 mysignificand = 0; 2451 } else if (category==fcInfinity) { 2452 myexponent = 0x7fff; 2453 mysignificand = 0x8000000000000000ULL; 2454 } else { 2455 assert(category == fcNaN && "Unknown category"); 2456 myexponent = 0x7fff; 2457 mysignificand = significandParts()[0]; 2458 } 2459 2460 uint64_t words[2]; 2461 words[0] = (((uint64_t)sign & 1) << 63) | 2462 ((myexponent & 0x7fff) << 48) | 2463 ((mysignificand >>16) & 0xffffffffffffLL); 2464 words[1] = mysignificand & 0xffff; 2465 return APInt(80, 2, words); 2466} 2467 2468APInt 2469APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const 2470{ 2471 assert(semantics == (const llvm::fltSemantics* const)&PPCDoubleDouble); 2472 assert (partCount()==2); 2473 2474 uint64_t myexponent, mysignificand, myexponent2, mysignificand2; 2475 2476 if (category==fcNormal) { 2477 myexponent = exponent + 1023; //bias 2478 myexponent2 = exponent2 + 1023; 2479 mysignificand = significandParts()[0]; 2480 mysignificand2 = significandParts()[1]; 2481 if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) 2482 myexponent = 0; // denormal 2483 if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL)) 2484 myexponent2 = 0; // denormal 2485 } else if (category==fcZero) { 2486 myexponent = 0; 2487 mysignificand = 0; 2488 myexponent2 = 0; 2489 mysignificand2 = 0; 2490 } else if (category==fcInfinity) { 2491 myexponent = 0x7ff; 2492 myexponent2 = 0; 2493 mysignificand = 0; 2494 mysignificand2 = 0; 2495 } else { 2496 assert(category == fcNaN && "Unknown category"); 2497 myexponent = 0x7ff; 2498 mysignificand = significandParts()[0]; 2499 myexponent2 = exponent2; 2500 mysignificand2 = significandParts()[1]; 2501 } 2502 2503 uint64_t words[2]; 2504 words[0] = (((uint64_t)sign & 1) << 63) | 2505 ((myexponent & 0x7ff) << 52) | 2506 (mysignificand & 0xfffffffffffffLL); 2507 words[1] = (((uint64_t)sign2 & 1) << 63) | 2508 ((myexponent2 & 0x7ff) << 52) | 2509 (mysignificand2 & 0xfffffffffffffLL); 2510 return APInt(128, 2, words); 2511} 2512 2513APInt 2514APFloat::convertDoubleAPFloatToAPInt() const 2515{ 2516 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble); 2517 assert (partCount()==1); 2518 2519 uint64_t myexponent, mysignificand; 2520 2521 if (category==fcNormal) { 2522 myexponent = exponent+1023; //bias 2523 mysignificand = *significandParts(); 2524 if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) 2525 myexponent = 0; // denormal 2526 } else if (category==fcZero) { 2527 myexponent = 0; 2528 mysignificand = 0; 2529 } else if (category==fcInfinity) { 2530 myexponent = 0x7ff; 2531 mysignificand = 0; 2532 } else { 2533 assert(category == fcNaN && "Unknown category!"); 2534 myexponent = 0x7ff; 2535 mysignificand = *significandParts(); 2536 } 2537 2538 return APInt(64, (((((uint64_t)sign & 1) << 63) | 2539 ((myexponent & 0x7ff) << 52) | 2540 (mysignificand & 0xfffffffffffffLL)))); 2541} 2542 2543APInt 2544APFloat::convertFloatAPFloatToAPInt() const 2545{ 2546 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle); 2547 assert (partCount()==1); 2548 2549 uint32_t myexponent, mysignificand; 2550 2551 if (category==fcNormal) { 2552 myexponent = exponent+127; //bias 2553 mysignificand = *significandParts(); 2554 if (myexponent == 1 && !(mysignificand & 0x400000)) 2555 myexponent = 0; // denormal 2556 } else if (category==fcZero) { 2557 myexponent = 0; 2558 mysignificand = 0; 2559 } else if (category==fcInfinity) { 2560 myexponent = 0xff; 2561 mysignificand = 0; 2562 } else { 2563 assert(category == fcNaN && "Unknown category!"); 2564 myexponent = 0xff; 2565 mysignificand = *significandParts(); 2566 } 2567 2568 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) | 2569 (mysignificand & 0x7fffff))); 2570} 2571 2572// This function creates an APInt that is just a bit map of the floating 2573// point constant as it would appear in memory. It is not a conversion, 2574// and treating the result as a normal integer is unlikely to be useful. 2575 2576APInt 2577APFloat::convertToAPInt() const 2578{ 2579 if (semantics == (const llvm::fltSemantics* const)&IEEEsingle) 2580 return convertFloatAPFloatToAPInt(); 2581 2582 if (semantics == (const llvm::fltSemantics* const)&IEEEdouble) 2583 return convertDoubleAPFloatToAPInt(); 2584 2585 if (semantics == (const llvm::fltSemantics* const)&PPCDoubleDouble) 2586 return convertPPCDoubleDoubleAPFloatToAPInt(); 2587 2588 assert(semantics == (const llvm::fltSemantics* const)&x87DoubleExtended && 2589 "unknown format!"); 2590 return convertF80LongDoubleAPFloatToAPInt(); 2591} 2592 2593float 2594APFloat::convertToFloat() const 2595{ 2596 assert(semantics == (const llvm::fltSemantics* const)&IEEEsingle); 2597 APInt api = convertToAPInt(); 2598 return api.bitsToFloat(); 2599} 2600 2601double 2602APFloat::convertToDouble() const 2603{ 2604 assert(semantics == (const llvm::fltSemantics* const)&IEEEdouble); 2605 APInt api = convertToAPInt(); 2606 return api.bitsToDouble(); 2607} 2608 2609/// Integer bit is explicit in this format. Current Intel book does not 2610/// define meaning of: 2611/// exponent = all 1's, integer bit not set. 2612/// exponent = 0, integer bit set. (formerly "psuedodenormals") 2613/// exponent!=0 nor all 1's, integer bit not set. (formerly "unnormals") 2614void 2615APFloat::initFromF80LongDoubleAPInt(const APInt &api) 2616{ 2617 assert(api.getBitWidth()==80); 2618 uint64_t i1 = api.getRawData()[0]; 2619 uint64_t i2 = api.getRawData()[1]; 2620 uint64_t myexponent = (i1 >> 48) & 0x7fff; 2621 uint64_t mysignificand = ((i1 << 16) & 0xffffffffffff0000ULL) | 2622 (i2 & 0xffff); 2623 2624 initialize(&APFloat::x87DoubleExtended); 2625 assert(partCount()==2); 2626 2627 sign = i1>>63; 2628 if (myexponent==0 && mysignificand==0) { 2629 // exponent, significand meaningless 2630 category = fcZero; 2631 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { 2632 // exponent, significand meaningless 2633 category = fcInfinity; 2634 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) { 2635 // exponent meaningless 2636 category = fcNaN; 2637 significandParts()[0] = mysignificand; 2638 significandParts()[1] = 0; 2639 } else { 2640 category = fcNormal; 2641 exponent = myexponent - 16383; 2642 significandParts()[0] = mysignificand; 2643 significandParts()[1] = 0; 2644 if (myexponent==0) // denormal 2645 exponent = -16382; 2646 } 2647} 2648 2649void 2650APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) 2651{ 2652 assert(api.getBitWidth()==128); 2653 uint64_t i1 = api.getRawData()[0]; 2654 uint64_t i2 = api.getRawData()[1]; 2655 uint64_t myexponent = (i1 >> 52) & 0x7ff; 2656 uint64_t mysignificand = i1 & 0xfffffffffffffLL; 2657 uint64_t myexponent2 = (i2 >> 52) & 0x7ff; 2658 uint64_t mysignificand2 = i2 & 0xfffffffffffffLL; 2659 2660 initialize(&APFloat::PPCDoubleDouble); 2661 assert(partCount()==2); 2662 2663 sign = i1>>63; 2664 sign2 = i2>>63; 2665 if (myexponent==0 && mysignificand==0) { 2666 // exponent, significand meaningless 2667 // exponent2 and significand2 are required to be 0; we don't check 2668 category = fcZero; 2669 } else if (myexponent==0x7ff && mysignificand==0) { 2670 // exponent, significand meaningless 2671 // exponent2 and significand2 are required to be 0; we don't check 2672 category = fcInfinity; 2673 } else if (myexponent==0x7ff && mysignificand!=0) { 2674 // exponent meaningless. So is the whole second word, but keep it 2675 // for determinism. 2676 category = fcNaN; 2677 exponent2 = myexponent2; 2678 significandParts()[0] = mysignificand; 2679 significandParts()[1] = mysignificand2; 2680 } else { 2681 category = fcNormal; 2682 // Note there is no category2; the second word is treated as if it is 2683 // fcNormal, although it might be something else considered by itself. 2684 exponent = myexponent - 1023; 2685 exponent2 = myexponent2 - 1023; 2686 significandParts()[0] = mysignificand; 2687 significandParts()[1] = mysignificand2; 2688 if (myexponent==0) // denormal 2689 exponent = -1022; 2690 else 2691 significandParts()[0] |= 0x10000000000000LL; // integer bit 2692 if (myexponent2==0) 2693 exponent2 = -1022; 2694 else 2695 significandParts()[1] |= 0x10000000000000LL; // integer bit 2696 } 2697} 2698 2699void 2700APFloat::initFromDoubleAPInt(const APInt &api) 2701{ 2702 assert(api.getBitWidth()==64); 2703 uint64_t i = *api.getRawData(); 2704 uint64_t myexponent = (i >> 52) & 0x7ff; 2705 uint64_t mysignificand = i & 0xfffffffffffffLL; 2706 2707 initialize(&APFloat::IEEEdouble); 2708 assert(partCount()==1); 2709 2710 sign = i>>63; 2711 if (myexponent==0 && mysignificand==0) { 2712 // exponent, significand meaningless 2713 category = fcZero; 2714 } else if (myexponent==0x7ff && mysignificand==0) { 2715 // exponent, significand meaningless 2716 category = fcInfinity; 2717 } else if (myexponent==0x7ff && mysignificand!=0) { 2718 // exponent meaningless 2719 category = fcNaN; 2720 *significandParts() = mysignificand; 2721 } else { 2722 category = fcNormal; 2723 exponent = myexponent - 1023; 2724 *significandParts() = mysignificand; 2725 if (myexponent==0) // denormal 2726 exponent = -1022; 2727 else 2728 *significandParts() |= 0x10000000000000LL; // integer bit 2729 } 2730} 2731 2732void 2733APFloat::initFromFloatAPInt(const APInt & api) 2734{ 2735 assert(api.getBitWidth()==32); 2736 uint32_t i = (uint32_t)*api.getRawData(); 2737 uint32_t myexponent = (i >> 23) & 0xff; 2738 uint32_t mysignificand = i & 0x7fffff; 2739 2740 initialize(&APFloat::IEEEsingle); 2741 assert(partCount()==1); 2742 2743 sign = i >> 31; 2744 if (myexponent==0 && mysignificand==0) { 2745 // exponent, significand meaningless 2746 category = fcZero; 2747 } else if (myexponent==0xff && mysignificand==0) { 2748 // exponent, significand meaningless 2749 category = fcInfinity; 2750 } else if (myexponent==0xff && mysignificand!=0) { 2751 // sign, exponent, significand meaningless 2752 category = fcNaN; 2753 *significandParts() = mysignificand; 2754 } else { 2755 category = fcNormal; 2756 exponent = myexponent - 127; //bias 2757 *significandParts() = mysignificand; 2758 if (myexponent==0) // denormal 2759 exponent = -126; 2760 else 2761 *significandParts() |= 0x800000; // integer bit 2762 } 2763} 2764 2765/// Treat api as containing the bits of a floating point number. Currently 2766/// we infer the floating point type from the size of the APInt. The 2767/// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful 2768/// when the size is anything else). 2769void 2770APFloat::initFromAPInt(const APInt& api, bool isIEEE) 2771{ 2772 if (api.getBitWidth() == 32) 2773 return initFromFloatAPInt(api); 2774 else if (api.getBitWidth()==64) 2775 return initFromDoubleAPInt(api); 2776 else if (api.getBitWidth()==80) 2777 return initFromF80LongDoubleAPInt(api); 2778 else if (api.getBitWidth()==128 && !isIEEE) 2779 return initFromPPCDoubleDoubleAPInt(api); 2780 else 2781 assert(0); 2782} 2783 2784APFloat::APFloat(const APInt& api, bool isIEEE) 2785{ 2786 initFromAPInt(api, isIEEE); 2787} 2788 2789APFloat::APFloat(float f) 2790{ 2791 APInt api = APInt(32, 0); 2792 initFromAPInt(api.floatToBits(f)); 2793} 2794 2795APFloat::APFloat(double d) 2796{ 2797 APInt api = APInt(64, 0); 2798 initFromAPInt(api.doubleToBits(d)); 2799} 2800