APFloat.cpp revision 9f17eb0b79717d479e462f0284442adbeae903ef
1//===-- APFloat.cpp - Implement APFloat class -----------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements a class to represent arbitrary precision floating 11// point values and provide a variety of arithmetic operations on them. 12// 13//===----------------------------------------------------------------------===// 14 15#include "llvm/ADT/APFloat.h" 16#include "llvm/ADT/FoldingSet.h" 17#include <cassert> 18#include <cstring> 19#include "llvm/Support/MathExtras.h" 20 21using namespace llvm; 22 23#define convolve(lhs, rhs) ((lhs) * 4 + (rhs)) 24 25/* Assumed in hexadecimal significand parsing, and conversion to 26 hexadecimal strings. */ 27#define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1] 28COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0); 29 30namespace llvm { 31 32 /* Represents floating point arithmetic semantics. */ 33 struct fltSemantics { 34 /* The largest E such that 2^E is representable; this matches the 35 definition of IEEE 754. */ 36 exponent_t maxExponent; 37 38 /* The smallest E such that 2^E is a normalized number; this 39 matches the definition of IEEE 754. */ 40 exponent_t minExponent; 41 42 /* Number of bits in the significand. This includes the integer 43 bit. */ 44 unsigned int precision; 45 46 /* True if arithmetic is supported. */ 47 unsigned int arithmeticOK; 48 }; 49 50 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true }; 51 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true }; 52 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true }; 53 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true }; 54 const fltSemantics APFloat::Bogus = { 0, 0, 0, true }; 55 56 // The PowerPC format consists of two doubles. It does not map cleanly 57 // onto the usual format above. For now only storage of constants of 58 // this type is supported, no arithmetic. 59 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106, false }; 60 61 /* A tight upper bound on number of parts required to hold the value 62 pow(5, power) is 63 64 power * 815 / (351 * integerPartWidth) + 1 65 66 However, whilst the result may require only this many parts, 67 because we are multiplying two values to get it, the 68 multiplication may require an extra part with the excess part 69 being zero (consider the trivial case of 1 * 1, tcFullMultiply 70 requires two parts to hold the single-part result). So we add an 71 extra one to guarantee enough space whilst multiplying. */ 72 const unsigned int maxExponent = 16383; 73 const unsigned int maxPrecision = 113; 74 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; 75 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) 76 / (351 * integerPartWidth)); 77} 78 79/* Put a bunch of private, handy routines in an anonymous namespace. */ 80namespace { 81 82 static inline unsigned int 83 partCountForBits(unsigned int bits) 84 { 85 return ((bits) + integerPartWidth - 1) / integerPartWidth; 86 } 87 88 /* Returns 0U-9U. Return values >= 10U are not digits. */ 89 static inline unsigned int 90 decDigitValue(unsigned int c) 91 { 92 return c - '0'; 93 } 94 95 static unsigned int 96 hexDigitValue(unsigned int c) 97 { 98 unsigned int r; 99 100 r = c - '0'; 101 if(r <= 9) 102 return r; 103 104 r = c - 'A'; 105 if(r <= 5) 106 return r + 10; 107 108 r = c - 'a'; 109 if(r <= 5) 110 return r + 10; 111 112 return -1U; 113 } 114 115 static inline void 116 assertArithmeticOK(const llvm::fltSemantics &semantics) { 117 assert(semantics.arithmeticOK 118 && "Compile-time arithmetic does not support these semantics"); 119 } 120 121 /* Return the value of a decimal exponent of the form 122 [+-]ddddddd. 123 124 If the exponent overflows, returns a large exponent with the 125 appropriate sign. */ 126 static int 127 readExponent(const char *p) 128 { 129 bool isNegative; 130 unsigned int absExponent; 131 const unsigned int overlargeExponent = 24000; /* FIXME. */ 132 133 isNegative = (*p == '-'); 134 if (*p == '-' || *p == '+') 135 p++; 136 137 absExponent = decDigitValue(*p++); 138 assert (absExponent < 10U); 139 140 for (;;) { 141 unsigned int value; 142 143 value = decDigitValue(*p); 144 if (value >= 10U) 145 break; 146 147 p++; 148 value += absExponent * 10; 149 if (absExponent >= overlargeExponent) { 150 absExponent = overlargeExponent; 151 break; 152 } 153 absExponent = value; 154 } 155 156 if (isNegative) 157 return -(int) absExponent; 158 else 159 return (int) absExponent; 160 } 161 162 /* This is ugly and needs cleaning up, but I don't immediately see 163 how whilst remaining safe. */ 164 static int 165 totalExponent(const char *p, int exponentAdjustment) 166 { 167 int unsignedExponent; 168 bool negative, overflow; 169 int exponent; 170 171 /* Move past the exponent letter and sign to the digits. */ 172 p++; 173 negative = *p == '-'; 174 if(*p == '-' || *p == '+') 175 p++; 176 177 unsignedExponent = 0; 178 overflow = false; 179 for(;;) { 180 unsigned int value; 181 182 value = decDigitValue(*p); 183 if(value >= 10U) 184 break; 185 186 p++; 187 unsignedExponent = unsignedExponent * 10 + value; 188 if(unsignedExponent > 65535) 189 overflow = true; 190 } 191 192 if(exponentAdjustment > 65535 || exponentAdjustment < -65536) 193 overflow = true; 194 195 if(!overflow) { 196 exponent = unsignedExponent; 197 if(negative) 198 exponent = -exponent; 199 exponent += exponentAdjustment; 200 if(exponent > 65535 || exponent < -65536) 201 overflow = true; 202 } 203 204 if(overflow) 205 exponent = negative ? -65536: 65535; 206 207 return exponent; 208 } 209 210 static const char * 211 skipLeadingZeroesAndAnyDot(const char *p, const char **dot) 212 { 213 *dot = 0; 214 while(*p == '0') 215 p++; 216 217 if(*p == '.') { 218 *dot = p++; 219 while(*p == '0') 220 p++; 221 } 222 223 return p; 224 } 225 226 /* Given a normal decimal floating point number of the form 227 228 dddd.dddd[eE][+-]ddd 229 230 where the decimal point and exponent are optional, fill out the 231 structure D. Exponent is appropriate if the significand is 232 treated as an integer, and normalizedExponent if the significand 233 is taken to have the decimal point after a single leading 234 non-zero digit. 235 236 If the value is zero, V->firstSigDigit points to a non-digit, and 237 the return exponent is zero. 238 */ 239 struct decimalInfo { 240 const char *firstSigDigit; 241 const char *lastSigDigit; 242 int exponent; 243 int normalizedExponent; 244 }; 245 246 static void 247 interpretDecimal(const char *p, decimalInfo *D) 248 { 249 const char *dot; 250 251 p = skipLeadingZeroesAndAnyDot (p, &dot); 252 253 D->firstSigDigit = p; 254 D->exponent = 0; 255 D->normalizedExponent = 0; 256 257 for (;;) { 258 if (*p == '.') { 259 assert(dot == 0); 260 dot = p++; 261 } 262 if (decDigitValue(*p) >= 10U) 263 break; 264 p++; 265 } 266 267 /* If number is all zerooes accept any exponent. */ 268 if (p != D->firstSigDigit) { 269 if (*p == 'e' || *p == 'E') 270 D->exponent = readExponent(p + 1); 271 272 /* Implied decimal point? */ 273 if (!dot) 274 dot = p; 275 276 /* Drop insignificant trailing zeroes. */ 277 do 278 do 279 p--; 280 while (*p == '0'); 281 while (*p == '.'); 282 283 /* Adjust the exponents for any decimal point. */ 284 D->exponent += static_cast<exponent_t>((dot - p) - (dot > p)); 285 D->normalizedExponent = (D->exponent + 286 static_cast<exponent_t>((p - D->firstSigDigit) 287 - (dot > D->firstSigDigit && dot < p))); 288 } 289 290 D->lastSigDigit = p; 291 } 292 293 /* Return the trailing fraction of a hexadecimal number. 294 DIGITVALUE is the first hex digit of the fraction, P points to 295 the next digit. */ 296 static lostFraction 297 trailingHexadecimalFraction(const char *p, unsigned int digitValue) 298 { 299 unsigned int hexDigit; 300 301 /* If the first trailing digit isn't 0 or 8 we can work out the 302 fraction immediately. */ 303 if(digitValue > 8) 304 return lfMoreThanHalf; 305 else if(digitValue < 8 && digitValue > 0) 306 return lfLessThanHalf; 307 308 /* Otherwise we need to find the first non-zero digit. */ 309 while(*p == '0') 310 p++; 311 312 hexDigit = hexDigitValue(*p); 313 314 /* If we ran off the end it is exactly zero or one-half, otherwise 315 a little more. */ 316 if(hexDigit == -1U) 317 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; 318 else 319 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; 320 } 321 322 /* Return the fraction lost were a bignum truncated losing the least 323 significant BITS bits. */ 324 static lostFraction 325 lostFractionThroughTruncation(const integerPart *parts, 326 unsigned int partCount, 327 unsigned int bits) 328 { 329 unsigned int lsb; 330 331 lsb = APInt::tcLSB(parts, partCount); 332 333 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */ 334 if(bits <= lsb) 335 return lfExactlyZero; 336 if(bits == lsb + 1) 337 return lfExactlyHalf; 338 if(bits <= partCount * integerPartWidth 339 && APInt::tcExtractBit(parts, bits - 1)) 340 return lfMoreThanHalf; 341 342 return lfLessThanHalf; 343 } 344 345 /* Shift DST right BITS bits noting lost fraction. */ 346 static lostFraction 347 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits) 348 { 349 lostFraction lost_fraction; 350 351 lost_fraction = lostFractionThroughTruncation(dst, parts, bits); 352 353 APInt::tcShiftRight(dst, parts, bits); 354 355 return lost_fraction; 356 } 357 358 /* Combine the effect of two lost fractions. */ 359 static lostFraction 360 combineLostFractions(lostFraction moreSignificant, 361 lostFraction lessSignificant) 362 { 363 if(lessSignificant != lfExactlyZero) { 364 if(moreSignificant == lfExactlyZero) 365 moreSignificant = lfLessThanHalf; 366 else if(moreSignificant == lfExactlyHalf) 367 moreSignificant = lfMoreThanHalf; 368 } 369 370 return moreSignificant; 371 } 372 373 /* The error from the true value, in half-ulps, on multiplying two 374 floating point numbers, which differ from the value they 375 approximate by at most HUE1 and HUE2 half-ulps, is strictly less 376 than the returned value. 377 378 See "How to Read Floating Point Numbers Accurately" by William D 379 Clinger. */ 380 static unsigned int 381 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) 382 { 383 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8)); 384 385 if (HUerr1 + HUerr2 == 0) 386 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ 387 else 388 return inexactMultiply + 2 * (HUerr1 + HUerr2); 389 } 390 391 /* The number of ulps from the boundary (zero, or half if ISNEAREST) 392 when the least significant BITS are truncated. BITS cannot be 393 zero. */ 394 static integerPart 395 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest) 396 { 397 unsigned int count, partBits; 398 integerPart part, boundary; 399 400 assert (bits != 0); 401 402 bits--; 403 count = bits / integerPartWidth; 404 partBits = bits % integerPartWidth + 1; 405 406 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits)); 407 408 if (isNearest) 409 boundary = (integerPart) 1 << (partBits - 1); 410 else 411 boundary = 0; 412 413 if (count == 0) { 414 if (part - boundary <= boundary - part) 415 return part - boundary; 416 else 417 return boundary - part; 418 } 419 420 if (part == boundary) { 421 while (--count) 422 if (parts[count]) 423 return ~(integerPart) 0; /* A lot. */ 424 425 return parts[0]; 426 } else if (part == boundary - 1) { 427 while (--count) 428 if (~parts[count]) 429 return ~(integerPart) 0; /* A lot. */ 430 431 return -parts[0]; 432 } 433 434 return ~(integerPart) 0; /* A lot. */ 435 } 436 437 /* Place pow(5, power) in DST, and return the number of parts used. 438 DST must be at least one part larger than size of the answer. */ 439 static unsigned int 440 powerOf5(integerPart *dst, unsigned int power) 441 { 442 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 443 15625, 78125 }; 444 static integerPart pow5s[maxPowerOfFiveParts * 2 + 5] = { 78125 * 5 }; 445 static unsigned int partsCount[16] = { 1 }; 446 447 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; 448 unsigned int result; 449 450 assert(power <= maxExponent); 451 452 p1 = dst; 453 p2 = scratch; 454 455 *p1 = firstEightPowers[power & 7]; 456 power >>= 3; 457 458 result = 1; 459 pow5 = pow5s; 460 461 for (unsigned int n = 0; power; power >>= 1, n++) { 462 unsigned int pc; 463 464 pc = partsCount[n]; 465 466 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */ 467 if (pc == 0) { 468 pc = partsCount[n - 1]; 469 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc); 470 pc *= 2; 471 if (pow5[pc - 1] == 0) 472 pc--; 473 partsCount[n] = pc; 474 } 475 476 if (power & 1) { 477 integerPart *tmp; 478 479 APInt::tcFullMultiply(p2, p1, pow5, result, pc); 480 result += pc; 481 if (p2[result - 1] == 0) 482 result--; 483 484 /* Now result is in p1 with partsCount parts and p2 is scratch 485 space. */ 486 tmp = p1, p1 = p2, p2 = tmp; 487 } 488 489 pow5 += pc; 490 } 491 492 if (p1 != dst) 493 APInt::tcAssign(dst, p1, result); 494 495 return result; 496 } 497 498 /* Zero at the end to avoid modular arithmetic when adding one; used 499 when rounding up during hexadecimal output. */ 500 static const char hexDigitsLower[] = "0123456789abcdef0"; 501 static const char hexDigitsUpper[] = "0123456789ABCDEF0"; 502 static const char infinityL[] = "infinity"; 503 static const char infinityU[] = "INFINITY"; 504 static const char NaNL[] = "nan"; 505 static const char NaNU[] = "NAN"; 506 507 /* Write out an integerPart in hexadecimal, starting with the most 508 significant nibble. Write out exactly COUNT hexdigits, return 509 COUNT. */ 510 static unsigned int 511 partAsHex (char *dst, integerPart part, unsigned int count, 512 const char *hexDigitChars) 513 { 514 unsigned int result = count; 515 516 assert (count != 0 && count <= integerPartWidth / 4); 517 518 part >>= (integerPartWidth - 4 * count); 519 while (count--) { 520 dst[count] = hexDigitChars[part & 0xf]; 521 part >>= 4; 522 } 523 524 return result; 525 } 526 527 /* Write out an unsigned decimal integer. */ 528 static char * 529 writeUnsignedDecimal (char *dst, unsigned int n) 530 { 531 char buff[40], *p; 532 533 p = buff; 534 do 535 *p++ = '0' + n % 10; 536 while (n /= 10); 537 538 do 539 *dst++ = *--p; 540 while (p != buff); 541 542 return dst; 543 } 544 545 /* Write out a signed decimal integer. */ 546 static char * 547 writeSignedDecimal (char *dst, int value) 548 { 549 if (value < 0) { 550 *dst++ = '-'; 551 dst = writeUnsignedDecimal(dst, -(unsigned) value); 552 } else 553 dst = writeUnsignedDecimal(dst, value); 554 555 return dst; 556 } 557} 558 559/* Constructors. */ 560void 561APFloat::initialize(const fltSemantics *ourSemantics) 562{ 563 unsigned int count; 564 565 semantics = ourSemantics; 566 count = partCount(); 567 if(count > 1) 568 significand.parts = new integerPart[count]; 569} 570 571void 572APFloat::freeSignificand() 573{ 574 if(partCount() > 1) 575 delete [] significand.parts; 576} 577 578void 579APFloat::assign(const APFloat &rhs) 580{ 581 assert(semantics == rhs.semantics); 582 583 sign = rhs.sign; 584 category = rhs.category; 585 exponent = rhs.exponent; 586 sign2 = rhs.sign2; 587 exponent2 = rhs.exponent2; 588 if(category == fcNormal || category == fcNaN) 589 copySignificand(rhs); 590} 591 592void 593APFloat::copySignificand(const APFloat &rhs) 594{ 595 assert(category == fcNormal || category == fcNaN); 596 assert(rhs.partCount() >= partCount()); 597 598 APInt::tcAssign(significandParts(), rhs.significandParts(), 599 partCount()); 600} 601 602/* Make this number a NaN, with an arbitrary but deterministic value 603 for the significand. */ 604void 605APFloat::makeNaN(void) 606{ 607 category = fcNaN; 608 APInt::tcSet(significandParts(), ~0U, partCount()); 609} 610 611APFloat & 612APFloat::operator=(const APFloat &rhs) 613{ 614 if(this != &rhs) { 615 if(semantics != rhs.semantics) { 616 freeSignificand(); 617 initialize(rhs.semantics); 618 } 619 assign(rhs); 620 } 621 622 return *this; 623} 624 625bool 626APFloat::bitwiseIsEqual(const APFloat &rhs) const { 627 if (this == &rhs) 628 return true; 629 if (semantics != rhs.semantics || 630 category != rhs.category || 631 sign != rhs.sign) 632 return false; 633 if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble && 634 sign2 != rhs.sign2) 635 return false; 636 if (category==fcZero || category==fcInfinity) 637 return true; 638 else if (category==fcNormal && exponent!=rhs.exponent) 639 return false; 640 else if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble && 641 exponent2!=rhs.exponent2) 642 return false; 643 else { 644 int i= partCount(); 645 const integerPart* p=significandParts(); 646 const integerPart* q=rhs.significandParts(); 647 for (; i>0; i--, p++, q++) { 648 if (*p != *q) 649 return false; 650 } 651 return true; 652 } 653} 654 655APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) 656{ 657 assertArithmeticOK(ourSemantics); 658 initialize(&ourSemantics); 659 sign = 0; 660 zeroSignificand(); 661 exponent = ourSemantics.precision - 1; 662 significandParts()[0] = value; 663 normalize(rmNearestTiesToEven, lfExactlyZero); 664} 665 666APFloat::APFloat(const fltSemantics &ourSemantics, 667 fltCategory ourCategory, bool negative) 668{ 669 assertArithmeticOK(ourSemantics); 670 initialize(&ourSemantics); 671 category = ourCategory; 672 sign = negative; 673 if(category == fcNormal) 674 category = fcZero; 675 else if (ourCategory == fcNaN) 676 makeNaN(); 677} 678 679APFloat::APFloat(const fltSemantics &ourSemantics, const char *text) 680{ 681 assertArithmeticOK(ourSemantics); 682 initialize(&ourSemantics); 683 convertFromString(text, rmNearestTiesToEven); 684} 685 686APFloat::APFloat(const APFloat &rhs) 687{ 688 initialize(rhs.semantics); 689 assign(rhs); 690} 691 692APFloat::~APFloat() 693{ 694 freeSignificand(); 695} 696 697// Profile - This method 'profiles' an APFloat for use with FoldingSet. 698void APFloat::Profile(FoldingSetNodeID& ID) const { 699 ID.Add(convertToAPInt()); 700} 701 702unsigned int 703APFloat::partCount() const 704{ 705 return partCountForBits(semantics->precision + 1); 706} 707 708unsigned int 709APFloat::semanticsPrecision(const fltSemantics &semantics) 710{ 711 return semantics.precision; 712} 713 714const integerPart * 715APFloat::significandParts() const 716{ 717 return const_cast<APFloat *>(this)->significandParts(); 718} 719 720integerPart * 721APFloat::significandParts() 722{ 723 assert(category == fcNormal || category == fcNaN); 724 725 if(partCount() > 1) 726 return significand.parts; 727 else 728 return &significand.part; 729} 730 731void 732APFloat::zeroSignificand() 733{ 734 category = fcNormal; 735 APInt::tcSet(significandParts(), 0, partCount()); 736} 737 738/* Increment an fcNormal floating point number's significand. */ 739void 740APFloat::incrementSignificand() 741{ 742 integerPart carry; 743 744 carry = APInt::tcIncrement(significandParts(), partCount()); 745 746 /* Our callers should never cause us to overflow. */ 747 assert(carry == 0); 748} 749 750/* Add the significand of the RHS. Returns the carry flag. */ 751integerPart 752APFloat::addSignificand(const APFloat &rhs) 753{ 754 integerPart *parts; 755 756 parts = significandParts(); 757 758 assert(semantics == rhs.semantics); 759 assert(exponent == rhs.exponent); 760 761 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount()); 762} 763 764/* Subtract the significand of the RHS with a borrow flag. Returns 765 the borrow flag. */ 766integerPart 767APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow) 768{ 769 integerPart *parts; 770 771 parts = significandParts(); 772 773 assert(semantics == rhs.semantics); 774 assert(exponent == rhs.exponent); 775 776 return APInt::tcSubtract(parts, rhs.significandParts(), borrow, 777 partCount()); 778} 779 780/* Multiply the significand of the RHS. If ADDEND is non-NULL, add it 781 on to the full-precision result of the multiplication. Returns the 782 lost fraction. */ 783lostFraction 784APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) 785{ 786 unsigned int omsb; // One, not zero, based MSB. 787 unsigned int partsCount, newPartsCount, precision; 788 integerPart *lhsSignificand; 789 integerPart scratch[4]; 790 integerPart *fullSignificand; 791 lostFraction lost_fraction; 792 793 assert(semantics == rhs.semantics); 794 795 precision = semantics->precision; 796 newPartsCount = partCountForBits(precision * 2); 797 798 if(newPartsCount > 4) 799 fullSignificand = new integerPart[newPartsCount]; 800 else 801 fullSignificand = scratch; 802 803 lhsSignificand = significandParts(); 804 partsCount = partCount(); 805 806 APInt::tcFullMultiply(fullSignificand, lhsSignificand, 807 rhs.significandParts(), partsCount, partsCount); 808 809 lost_fraction = lfExactlyZero; 810 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 811 exponent += rhs.exponent; 812 813 if(addend) { 814 Significand savedSignificand = significand; 815 const fltSemantics *savedSemantics = semantics; 816 fltSemantics extendedSemantics; 817 opStatus status; 818 unsigned int extendedPrecision; 819 820 /* Normalize our MSB. */ 821 extendedPrecision = precision + precision - 1; 822 if(omsb != extendedPrecision) 823 { 824 APInt::tcShiftLeft(fullSignificand, newPartsCount, 825 extendedPrecision - omsb); 826 exponent -= extendedPrecision - omsb; 827 } 828 829 /* Create new semantics. */ 830 extendedSemantics = *semantics; 831 extendedSemantics.precision = extendedPrecision; 832 833 if(newPartsCount == 1) 834 significand.part = fullSignificand[0]; 835 else 836 significand.parts = fullSignificand; 837 semantics = &extendedSemantics; 838 839 APFloat extendedAddend(*addend); 840 status = extendedAddend.convert(extendedSemantics, rmTowardZero); 841 assert(status == opOK); 842 lost_fraction = addOrSubtractSignificand(extendedAddend, false); 843 844 /* Restore our state. */ 845 if(newPartsCount == 1) 846 fullSignificand[0] = significand.part; 847 significand = savedSignificand; 848 semantics = savedSemantics; 849 850 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 851 } 852 853 exponent -= (precision - 1); 854 855 if(omsb > precision) { 856 unsigned int bits, significantParts; 857 lostFraction lf; 858 859 bits = omsb - precision; 860 significantParts = partCountForBits(omsb); 861 lf = shiftRight(fullSignificand, significantParts, bits); 862 lost_fraction = combineLostFractions(lf, lost_fraction); 863 exponent += bits; 864 } 865 866 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); 867 868 if(newPartsCount > 4) 869 delete [] fullSignificand; 870 871 return lost_fraction; 872} 873 874/* Multiply the significands of LHS and RHS to DST. */ 875lostFraction 876APFloat::divideSignificand(const APFloat &rhs) 877{ 878 unsigned int bit, i, partsCount; 879 const integerPart *rhsSignificand; 880 integerPart *lhsSignificand, *dividend, *divisor; 881 integerPart scratch[4]; 882 lostFraction lost_fraction; 883 884 assert(semantics == rhs.semantics); 885 886 lhsSignificand = significandParts(); 887 rhsSignificand = rhs.significandParts(); 888 partsCount = partCount(); 889 890 if(partsCount > 2) 891 dividend = new integerPart[partsCount * 2]; 892 else 893 dividend = scratch; 894 895 divisor = dividend + partsCount; 896 897 /* Copy the dividend and divisor as they will be modified in-place. */ 898 for(i = 0; i < partsCount; i++) { 899 dividend[i] = lhsSignificand[i]; 900 divisor[i] = rhsSignificand[i]; 901 lhsSignificand[i] = 0; 902 } 903 904 exponent -= rhs.exponent; 905 906 unsigned int precision = semantics->precision; 907 908 /* Normalize the divisor. */ 909 bit = precision - APInt::tcMSB(divisor, partsCount) - 1; 910 if(bit) { 911 exponent += bit; 912 APInt::tcShiftLeft(divisor, partsCount, bit); 913 } 914 915 /* Normalize the dividend. */ 916 bit = precision - APInt::tcMSB(dividend, partsCount) - 1; 917 if(bit) { 918 exponent -= bit; 919 APInt::tcShiftLeft(dividend, partsCount, bit); 920 } 921 922 /* Ensure the dividend >= divisor initially for the loop below. 923 Incidentally, this means that the division loop below is 924 guaranteed to set the integer bit to one. */ 925 if(APInt::tcCompare(dividend, divisor, partsCount) < 0) { 926 exponent--; 927 APInt::tcShiftLeft(dividend, partsCount, 1); 928 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); 929 } 930 931 /* Long division. */ 932 for(bit = precision; bit; bit -= 1) { 933 if(APInt::tcCompare(dividend, divisor, partsCount) >= 0) { 934 APInt::tcSubtract(dividend, divisor, 0, partsCount); 935 APInt::tcSetBit(lhsSignificand, bit - 1); 936 } 937 938 APInt::tcShiftLeft(dividend, partsCount, 1); 939 } 940 941 /* Figure out the lost fraction. */ 942 int cmp = APInt::tcCompare(dividend, divisor, partsCount); 943 944 if(cmp > 0) 945 lost_fraction = lfMoreThanHalf; 946 else if(cmp == 0) 947 lost_fraction = lfExactlyHalf; 948 else if(APInt::tcIsZero(dividend, partsCount)) 949 lost_fraction = lfExactlyZero; 950 else 951 lost_fraction = lfLessThanHalf; 952 953 if(partsCount > 2) 954 delete [] dividend; 955 956 return lost_fraction; 957} 958 959unsigned int 960APFloat::significandMSB() const 961{ 962 return APInt::tcMSB(significandParts(), partCount()); 963} 964 965unsigned int 966APFloat::significandLSB() const 967{ 968 return APInt::tcLSB(significandParts(), partCount()); 969} 970 971/* Note that a zero result is NOT normalized to fcZero. */ 972lostFraction 973APFloat::shiftSignificandRight(unsigned int bits) 974{ 975 /* Our exponent should not overflow. */ 976 assert((exponent_t) (exponent + bits) >= exponent); 977 978 exponent += bits; 979 980 return shiftRight(significandParts(), partCount(), bits); 981} 982 983/* Shift the significand left BITS bits, subtract BITS from its exponent. */ 984void 985APFloat::shiftSignificandLeft(unsigned int bits) 986{ 987 assert(bits < semantics->precision); 988 989 if(bits) { 990 unsigned int partsCount = partCount(); 991 992 APInt::tcShiftLeft(significandParts(), partsCount, bits); 993 exponent -= bits; 994 995 assert(!APInt::tcIsZero(significandParts(), partsCount)); 996 } 997} 998 999APFloat::cmpResult 1000APFloat::compareAbsoluteValue(const APFloat &rhs) const 1001{ 1002 int compare; 1003 1004 assert(semantics == rhs.semantics); 1005 assert(category == fcNormal); 1006 assert(rhs.category == fcNormal); 1007 1008 compare = exponent - rhs.exponent; 1009 1010 /* If exponents are equal, do an unsigned bignum comparison of the 1011 significands. */ 1012 if(compare == 0) 1013 compare = APInt::tcCompare(significandParts(), rhs.significandParts(), 1014 partCount()); 1015 1016 if(compare > 0) 1017 return cmpGreaterThan; 1018 else if(compare < 0) 1019 return cmpLessThan; 1020 else 1021 return cmpEqual; 1022} 1023 1024/* Handle overflow. Sign is preserved. We either become infinity or 1025 the largest finite number. */ 1026APFloat::opStatus 1027APFloat::handleOverflow(roundingMode rounding_mode) 1028{ 1029 /* Infinity? */ 1030 if(rounding_mode == rmNearestTiesToEven 1031 || rounding_mode == rmNearestTiesToAway 1032 || (rounding_mode == rmTowardPositive && !sign) 1033 || (rounding_mode == rmTowardNegative && sign)) 1034 { 1035 category = fcInfinity; 1036 return (opStatus) (opOverflow | opInexact); 1037 } 1038 1039 /* Otherwise we become the largest finite number. */ 1040 category = fcNormal; 1041 exponent = semantics->maxExponent; 1042 APInt::tcSetLeastSignificantBits(significandParts(), partCount(), 1043 semantics->precision); 1044 1045 return opInexact; 1046} 1047 1048/* Returns TRUE if, when truncating the current number, with BIT the 1049 new LSB, with the given lost fraction and rounding mode, the result 1050 would need to be rounded away from zero (i.e., by increasing the 1051 signficand). This routine must work for fcZero of both signs, and 1052 fcNormal numbers. */ 1053bool 1054APFloat::roundAwayFromZero(roundingMode rounding_mode, 1055 lostFraction lost_fraction, 1056 unsigned int bit) const 1057{ 1058 /* NaNs and infinities should not have lost fractions. */ 1059 assert(category == fcNormal || category == fcZero); 1060 1061 /* Current callers never pass this so we don't handle it. */ 1062 assert(lost_fraction != lfExactlyZero); 1063 1064 switch(rounding_mode) { 1065 default: 1066 assert(0); 1067 1068 case rmNearestTiesToAway: 1069 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; 1070 1071 case rmNearestTiesToEven: 1072 if(lost_fraction == lfMoreThanHalf) 1073 return true; 1074 1075 /* Our zeroes don't have a significand to test. */ 1076 if(lost_fraction == lfExactlyHalf && category != fcZero) 1077 return APInt::tcExtractBit(significandParts(), bit); 1078 1079 return false; 1080 1081 case rmTowardZero: 1082 return false; 1083 1084 case rmTowardPositive: 1085 return sign == false; 1086 1087 case rmTowardNegative: 1088 return sign == true; 1089 } 1090} 1091 1092APFloat::opStatus 1093APFloat::normalize(roundingMode rounding_mode, 1094 lostFraction lost_fraction) 1095{ 1096 unsigned int omsb; /* One, not zero, based MSB. */ 1097 int exponentChange; 1098 1099 if(category != fcNormal) 1100 return opOK; 1101 1102 /* Before rounding normalize the exponent of fcNormal numbers. */ 1103 omsb = significandMSB() + 1; 1104 1105 if(omsb) { 1106 /* OMSB is numbered from 1. We want to place it in the integer 1107 bit numbered PRECISON if possible, with a compensating change in 1108 the exponent. */ 1109 exponentChange = omsb - semantics->precision; 1110 1111 /* If the resulting exponent is too high, overflow according to 1112 the rounding mode. */ 1113 if(exponent + exponentChange > semantics->maxExponent) 1114 return handleOverflow(rounding_mode); 1115 1116 /* Subnormal numbers have exponent minExponent, and their MSB 1117 is forced based on that. */ 1118 if(exponent + exponentChange < semantics->minExponent) 1119 exponentChange = semantics->minExponent - exponent; 1120 1121 /* Shifting left is easy as we don't lose precision. */ 1122 if(exponentChange < 0) { 1123 assert(lost_fraction == lfExactlyZero); 1124 1125 shiftSignificandLeft(-exponentChange); 1126 1127 return opOK; 1128 } 1129 1130 if(exponentChange > 0) { 1131 lostFraction lf; 1132 1133 /* Shift right and capture any new lost fraction. */ 1134 lf = shiftSignificandRight(exponentChange); 1135 1136 lost_fraction = combineLostFractions(lf, lost_fraction); 1137 1138 /* Keep OMSB up-to-date. */ 1139 if(omsb > (unsigned) exponentChange) 1140 omsb -= exponentChange; 1141 else 1142 omsb = 0; 1143 } 1144 } 1145 1146 /* Now round the number according to rounding_mode given the lost 1147 fraction. */ 1148 1149 /* As specified in IEEE 754, since we do not trap we do not report 1150 underflow for exact results. */ 1151 if(lost_fraction == lfExactlyZero) { 1152 /* Canonicalize zeroes. */ 1153 if(omsb == 0) 1154 category = fcZero; 1155 1156 return opOK; 1157 } 1158 1159 /* Increment the significand if we're rounding away from zero. */ 1160 if(roundAwayFromZero(rounding_mode, lost_fraction, 0)) { 1161 if(omsb == 0) 1162 exponent = semantics->minExponent; 1163 1164 incrementSignificand(); 1165 omsb = significandMSB() + 1; 1166 1167 /* Did the significand increment overflow? */ 1168 if(omsb == (unsigned) semantics->precision + 1) { 1169 /* Renormalize by incrementing the exponent and shifting our 1170 significand right one. However if we already have the 1171 maximum exponent we overflow to infinity. */ 1172 if(exponent == semantics->maxExponent) { 1173 category = fcInfinity; 1174 1175 return (opStatus) (opOverflow | opInexact); 1176 } 1177 1178 shiftSignificandRight(1); 1179 1180 return opInexact; 1181 } 1182 } 1183 1184 /* The normal case - we were and are not denormal, and any 1185 significand increment above didn't overflow. */ 1186 if(omsb == semantics->precision) 1187 return opInexact; 1188 1189 /* We have a non-zero denormal. */ 1190 assert(omsb < semantics->precision); 1191 1192 /* Canonicalize zeroes. */ 1193 if(omsb == 0) 1194 category = fcZero; 1195 1196 /* The fcZero case is a denormal that underflowed to zero. */ 1197 return (opStatus) (opUnderflow | opInexact); 1198} 1199 1200APFloat::opStatus 1201APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract) 1202{ 1203 switch(convolve(category, rhs.category)) { 1204 default: 1205 assert(0); 1206 1207 case convolve(fcNaN, fcZero): 1208 case convolve(fcNaN, fcNormal): 1209 case convolve(fcNaN, fcInfinity): 1210 case convolve(fcNaN, fcNaN): 1211 case convolve(fcNormal, fcZero): 1212 case convolve(fcInfinity, fcNormal): 1213 case convolve(fcInfinity, fcZero): 1214 return opOK; 1215 1216 case convolve(fcZero, fcNaN): 1217 case convolve(fcNormal, fcNaN): 1218 case convolve(fcInfinity, fcNaN): 1219 category = fcNaN; 1220 copySignificand(rhs); 1221 return opOK; 1222 1223 case convolve(fcNormal, fcInfinity): 1224 case convolve(fcZero, fcInfinity): 1225 category = fcInfinity; 1226 sign = rhs.sign ^ subtract; 1227 return opOK; 1228 1229 case convolve(fcZero, fcNormal): 1230 assign(rhs); 1231 sign = rhs.sign ^ subtract; 1232 return opOK; 1233 1234 case convolve(fcZero, fcZero): 1235 /* Sign depends on rounding mode; handled by caller. */ 1236 return opOK; 1237 1238 case convolve(fcInfinity, fcInfinity): 1239 /* Differently signed infinities can only be validly 1240 subtracted. */ 1241 if((sign ^ rhs.sign) != subtract) { 1242 makeNaN(); 1243 return opInvalidOp; 1244 } 1245 1246 return opOK; 1247 1248 case convolve(fcNormal, fcNormal): 1249 return opDivByZero; 1250 } 1251} 1252 1253/* Add or subtract two normal numbers. */ 1254lostFraction 1255APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract) 1256{ 1257 integerPart carry; 1258 lostFraction lost_fraction; 1259 int bits; 1260 1261 /* Determine if the operation on the absolute values is effectively 1262 an addition or subtraction. */ 1263 subtract ^= (sign ^ rhs.sign) ? true : false; 1264 1265 /* Are we bigger exponent-wise than the RHS? */ 1266 bits = exponent - rhs.exponent; 1267 1268 /* Subtraction is more subtle than one might naively expect. */ 1269 if(subtract) { 1270 APFloat temp_rhs(rhs); 1271 bool reverse; 1272 1273 if (bits == 0) { 1274 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan; 1275 lost_fraction = lfExactlyZero; 1276 } else if (bits > 0) { 1277 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); 1278 shiftSignificandLeft(1); 1279 reverse = false; 1280 } else { 1281 lost_fraction = shiftSignificandRight(-bits - 1); 1282 temp_rhs.shiftSignificandLeft(1); 1283 reverse = true; 1284 } 1285 1286 if (reverse) { 1287 carry = temp_rhs.subtractSignificand 1288 (*this, lost_fraction != lfExactlyZero); 1289 copySignificand(temp_rhs); 1290 sign = !sign; 1291 } else { 1292 carry = subtractSignificand 1293 (temp_rhs, lost_fraction != lfExactlyZero); 1294 } 1295 1296 /* Invert the lost fraction - it was on the RHS and 1297 subtracted. */ 1298 if(lost_fraction == lfLessThanHalf) 1299 lost_fraction = lfMoreThanHalf; 1300 else if(lost_fraction == lfMoreThanHalf) 1301 lost_fraction = lfLessThanHalf; 1302 1303 /* The code above is intended to ensure that no borrow is 1304 necessary. */ 1305 assert(!carry); 1306 } else { 1307 if(bits > 0) { 1308 APFloat temp_rhs(rhs); 1309 1310 lost_fraction = temp_rhs.shiftSignificandRight(bits); 1311 carry = addSignificand(temp_rhs); 1312 } else { 1313 lost_fraction = shiftSignificandRight(-bits); 1314 carry = addSignificand(rhs); 1315 } 1316 1317 /* We have a guard bit; generating a carry cannot happen. */ 1318 assert(!carry); 1319 } 1320 1321 return lost_fraction; 1322} 1323 1324APFloat::opStatus 1325APFloat::multiplySpecials(const APFloat &rhs) 1326{ 1327 switch(convolve(category, rhs.category)) { 1328 default: 1329 assert(0); 1330 1331 case convolve(fcNaN, fcZero): 1332 case convolve(fcNaN, fcNormal): 1333 case convolve(fcNaN, fcInfinity): 1334 case convolve(fcNaN, fcNaN): 1335 return opOK; 1336 1337 case convolve(fcZero, fcNaN): 1338 case convolve(fcNormal, fcNaN): 1339 case convolve(fcInfinity, fcNaN): 1340 category = fcNaN; 1341 copySignificand(rhs); 1342 return opOK; 1343 1344 case convolve(fcNormal, fcInfinity): 1345 case convolve(fcInfinity, fcNormal): 1346 case convolve(fcInfinity, fcInfinity): 1347 category = fcInfinity; 1348 return opOK; 1349 1350 case convolve(fcZero, fcNormal): 1351 case convolve(fcNormal, fcZero): 1352 case convolve(fcZero, fcZero): 1353 category = fcZero; 1354 return opOK; 1355 1356 case convolve(fcZero, fcInfinity): 1357 case convolve(fcInfinity, fcZero): 1358 makeNaN(); 1359 return opInvalidOp; 1360 1361 case convolve(fcNormal, fcNormal): 1362 return opOK; 1363 } 1364} 1365 1366APFloat::opStatus 1367APFloat::divideSpecials(const APFloat &rhs) 1368{ 1369 switch(convolve(category, rhs.category)) { 1370 default: 1371 assert(0); 1372 1373 case convolve(fcNaN, fcZero): 1374 case convolve(fcNaN, fcNormal): 1375 case convolve(fcNaN, fcInfinity): 1376 case convolve(fcNaN, fcNaN): 1377 case convolve(fcInfinity, fcZero): 1378 case convolve(fcInfinity, fcNormal): 1379 case convolve(fcZero, fcInfinity): 1380 case convolve(fcZero, fcNormal): 1381 return opOK; 1382 1383 case convolve(fcZero, fcNaN): 1384 case convolve(fcNormal, fcNaN): 1385 case convolve(fcInfinity, fcNaN): 1386 category = fcNaN; 1387 copySignificand(rhs); 1388 return opOK; 1389 1390 case convolve(fcNormal, fcInfinity): 1391 category = fcZero; 1392 return opOK; 1393 1394 case convolve(fcNormal, fcZero): 1395 category = fcInfinity; 1396 return opDivByZero; 1397 1398 case convolve(fcInfinity, fcInfinity): 1399 case convolve(fcZero, fcZero): 1400 makeNaN(); 1401 return opInvalidOp; 1402 1403 case convolve(fcNormal, fcNormal): 1404 return opOK; 1405 } 1406} 1407 1408/* Change sign. */ 1409void 1410APFloat::changeSign() 1411{ 1412 /* Look mummy, this one's easy. */ 1413 sign = !sign; 1414} 1415 1416void 1417APFloat::clearSign() 1418{ 1419 /* So is this one. */ 1420 sign = 0; 1421} 1422 1423void 1424APFloat::copySign(const APFloat &rhs) 1425{ 1426 /* And this one. */ 1427 sign = rhs.sign; 1428} 1429 1430/* Normalized addition or subtraction. */ 1431APFloat::opStatus 1432APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode, 1433 bool subtract) 1434{ 1435 opStatus fs; 1436 1437 assertArithmeticOK(*semantics); 1438 1439 fs = addOrSubtractSpecials(rhs, subtract); 1440 1441 /* This return code means it was not a simple case. */ 1442 if(fs == opDivByZero) { 1443 lostFraction lost_fraction; 1444 1445 lost_fraction = addOrSubtractSignificand(rhs, subtract); 1446 fs = normalize(rounding_mode, lost_fraction); 1447 1448 /* Can only be zero if we lost no fraction. */ 1449 assert(category != fcZero || lost_fraction == lfExactlyZero); 1450 } 1451 1452 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1453 positive zero unless rounding to minus infinity, except that 1454 adding two like-signed zeroes gives that zero. */ 1455 if(category == fcZero) { 1456 if(rhs.category != fcZero || (sign == rhs.sign) == subtract) 1457 sign = (rounding_mode == rmTowardNegative); 1458 } 1459 1460 return fs; 1461} 1462 1463/* Normalized addition. */ 1464APFloat::opStatus 1465APFloat::add(const APFloat &rhs, roundingMode rounding_mode) 1466{ 1467 return addOrSubtract(rhs, rounding_mode, false); 1468} 1469 1470/* Normalized subtraction. */ 1471APFloat::opStatus 1472APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode) 1473{ 1474 return addOrSubtract(rhs, rounding_mode, true); 1475} 1476 1477/* Normalized multiply. */ 1478APFloat::opStatus 1479APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode) 1480{ 1481 opStatus fs; 1482 1483 assertArithmeticOK(*semantics); 1484 sign ^= rhs.sign; 1485 fs = multiplySpecials(rhs); 1486 1487 if(category == fcNormal) { 1488 lostFraction lost_fraction = multiplySignificand(rhs, 0); 1489 fs = normalize(rounding_mode, lost_fraction); 1490 if(lost_fraction != lfExactlyZero) 1491 fs = (opStatus) (fs | opInexact); 1492 } 1493 1494 return fs; 1495} 1496 1497/* Normalized divide. */ 1498APFloat::opStatus 1499APFloat::divide(const APFloat &rhs, roundingMode rounding_mode) 1500{ 1501 opStatus fs; 1502 1503 assertArithmeticOK(*semantics); 1504 sign ^= rhs.sign; 1505 fs = divideSpecials(rhs); 1506 1507 if(category == fcNormal) { 1508 lostFraction lost_fraction = divideSignificand(rhs); 1509 fs = normalize(rounding_mode, lost_fraction); 1510 if(lost_fraction != lfExactlyZero) 1511 fs = (opStatus) (fs | opInexact); 1512 } 1513 1514 return fs; 1515} 1516 1517/* Normalized remainder. This is not currently doing TRT. */ 1518APFloat::opStatus 1519APFloat::mod(const APFloat &rhs, roundingMode rounding_mode) 1520{ 1521 opStatus fs; 1522 APFloat V = *this; 1523 unsigned int origSign = sign; 1524 1525 assertArithmeticOK(*semantics); 1526 fs = V.divide(rhs, rmNearestTiesToEven); 1527 if (fs == opDivByZero) 1528 return fs; 1529 1530 int parts = partCount(); 1531 integerPart *x = new integerPart[parts]; 1532 fs = V.convertToInteger(x, parts * integerPartWidth, true, 1533 rmNearestTiesToEven); 1534 if (fs==opInvalidOp) 1535 return fs; 1536 1537 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true, 1538 rmNearestTiesToEven); 1539 assert(fs==opOK); // should always work 1540 1541 fs = V.multiply(rhs, rounding_mode); 1542 assert(fs==opOK || fs==opInexact); // should not overflow or underflow 1543 1544 fs = subtract(V, rounding_mode); 1545 assert(fs==opOK || fs==opInexact); // likewise 1546 1547 if (isZero()) 1548 sign = origSign; // IEEE754 requires this 1549 delete[] x; 1550 return fs; 1551} 1552 1553/* Normalized fused-multiply-add. */ 1554APFloat::opStatus 1555APFloat::fusedMultiplyAdd(const APFloat &multiplicand, 1556 const APFloat &addend, 1557 roundingMode rounding_mode) 1558{ 1559 opStatus fs; 1560 1561 assertArithmeticOK(*semantics); 1562 1563 /* Post-multiplication sign, before addition. */ 1564 sign ^= multiplicand.sign; 1565 1566 /* If and only if all arguments are normal do we need to do an 1567 extended-precision calculation. */ 1568 if(category == fcNormal 1569 && multiplicand.category == fcNormal 1570 && addend.category == fcNormal) { 1571 lostFraction lost_fraction; 1572 1573 lost_fraction = multiplySignificand(multiplicand, &addend); 1574 fs = normalize(rounding_mode, lost_fraction); 1575 if(lost_fraction != lfExactlyZero) 1576 fs = (opStatus) (fs | opInexact); 1577 1578 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1579 positive zero unless rounding to minus infinity, except that 1580 adding two like-signed zeroes gives that zero. */ 1581 if(category == fcZero && sign != addend.sign) 1582 sign = (rounding_mode == rmTowardNegative); 1583 } else { 1584 fs = multiplySpecials(multiplicand); 1585 1586 /* FS can only be opOK or opInvalidOp. There is no more work 1587 to do in the latter case. The IEEE-754R standard says it is 1588 implementation-defined in this case whether, if ADDEND is a 1589 quiet NaN, we raise invalid op; this implementation does so. 1590 1591 If we need to do the addition we can do so with normal 1592 precision. */ 1593 if(fs == opOK) 1594 fs = addOrSubtract(addend, rounding_mode, false); 1595 } 1596 1597 return fs; 1598} 1599 1600/* Comparison requires normalized numbers. */ 1601APFloat::cmpResult 1602APFloat::compare(const APFloat &rhs) const 1603{ 1604 cmpResult result; 1605 1606 assertArithmeticOK(*semantics); 1607 assert(semantics == rhs.semantics); 1608 1609 switch(convolve(category, rhs.category)) { 1610 default: 1611 assert(0); 1612 1613 case convolve(fcNaN, fcZero): 1614 case convolve(fcNaN, fcNormal): 1615 case convolve(fcNaN, fcInfinity): 1616 case convolve(fcNaN, fcNaN): 1617 case convolve(fcZero, fcNaN): 1618 case convolve(fcNormal, fcNaN): 1619 case convolve(fcInfinity, fcNaN): 1620 return cmpUnordered; 1621 1622 case convolve(fcInfinity, fcNormal): 1623 case convolve(fcInfinity, fcZero): 1624 case convolve(fcNormal, fcZero): 1625 if(sign) 1626 return cmpLessThan; 1627 else 1628 return cmpGreaterThan; 1629 1630 case convolve(fcNormal, fcInfinity): 1631 case convolve(fcZero, fcInfinity): 1632 case convolve(fcZero, fcNormal): 1633 if(rhs.sign) 1634 return cmpGreaterThan; 1635 else 1636 return cmpLessThan; 1637 1638 case convolve(fcInfinity, fcInfinity): 1639 if(sign == rhs.sign) 1640 return cmpEqual; 1641 else if(sign) 1642 return cmpLessThan; 1643 else 1644 return cmpGreaterThan; 1645 1646 case convolve(fcZero, fcZero): 1647 return cmpEqual; 1648 1649 case convolve(fcNormal, fcNormal): 1650 break; 1651 } 1652 1653 /* Two normal numbers. Do they have the same sign? */ 1654 if(sign != rhs.sign) { 1655 if(sign) 1656 result = cmpLessThan; 1657 else 1658 result = cmpGreaterThan; 1659 } else { 1660 /* Compare absolute values; invert result if negative. */ 1661 result = compareAbsoluteValue(rhs); 1662 1663 if(sign) { 1664 if(result == cmpLessThan) 1665 result = cmpGreaterThan; 1666 else if(result == cmpGreaterThan) 1667 result = cmpLessThan; 1668 } 1669 } 1670 1671 return result; 1672} 1673 1674APFloat::opStatus 1675APFloat::convert(const fltSemantics &toSemantics, 1676 roundingMode rounding_mode) 1677{ 1678 lostFraction lostFraction; 1679 unsigned int newPartCount, oldPartCount; 1680 opStatus fs; 1681 1682 assertArithmeticOK(*semantics); 1683 assertArithmeticOK(toSemantics); 1684 lostFraction = lfExactlyZero; 1685 newPartCount = partCountForBits(toSemantics.precision + 1); 1686 oldPartCount = partCount(); 1687 1688 /* Handle storage complications. If our new form is wider, 1689 re-allocate our bit pattern into wider storage. If it is 1690 narrower, we ignore the excess parts, but if narrowing to a 1691 single part we need to free the old storage. 1692 Be careful not to reference significandParts for zeroes 1693 and infinities, since it aborts. */ 1694 if (newPartCount > oldPartCount) { 1695 integerPart *newParts; 1696 newParts = new integerPart[newPartCount]; 1697 APInt::tcSet(newParts, 0, newPartCount); 1698 if (category==fcNormal || category==fcNaN) 1699 APInt::tcAssign(newParts, significandParts(), oldPartCount); 1700 freeSignificand(); 1701 significand.parts = newParts; 1702 } else if (newPartCount < oldPartCount) { 1703 /* Capture any lost fraction through truncation of parts so we get 1704 correct rounding whilst normalizing. */ 1705 if (category==fcNormal) 1706 lostFraction = lostFractionThroughTruncation 1707 (significandParts(), oldPartCount, toSemantics.precision); 1708 if (newPartCount == 1) { 1709 integerPart newPart = 0; 1710 if (category==fcNormal || category==fcNaN) 1711 newPart = significandParts()[0]; 1712 freeSignificand(); 1713 significand.part = newPart; 1714 } 1715 } 1716 1717 if(category == fcNormal) { 1718 /* Re-interpret our bit-pattern. */ 1719 exponent += toSemantics.precision - semantics->precision; 1720 semantics = &toSemantics; 1721 fs = normalize(rounding_mode, lostFraction); 1722 } else if (category == fcNaN) { 1723 int shift = toSemantics.precision - semantics->precision; 1724 // Do this now so significandParts gets the right answer 1725 semantics = &toSemantics; 1726 // No normalization here, just truncate 1727 if (shift>0) 1728 APInt::tcShiftLeft(significandParts(), newPartCount, shift); 1729 else if (shift < 0) 1730 APInt::tcShiftRight(significandParts(), newPartCount, -shift); 1731 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan) 1732 // does not give you back the same bits. This is dubious, and we 1733 // don't currently do it. You're really supposed to get 1734 // an invalid operation signal at runtime, but nobody does that. 1735 fs = opOK; 1736 } else { 1737 semantics = &toSemantics; 1738 fs = opOK; 1739 } 1740 1741 return fs; 1742} 1743 1744/* Convert a floating point number to an integer according to the 1745 rounding mode. If the rounded integer value is out of range this 1746 returns an invalid operation exception and the contents of the 1747 destination parts are unspecified. If the rounded value is in 1748 range but the floating point number is not the exact integer, the C 1749 standard doesn't require an inexact exception to be raised. IEEE 1750 854 does require it so we do that. 1751 1752 Note that for conversions to integer type the C standard requires 1753 round-to-zero to always be used. */ 1754APFloat::opStatus 1755APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width, 1756 bool isSigned, 1757 roundingMode rounding_mode) const 1758{ 1759 lostFraction lost_fraction; 1760 const integerPart *src; 1761 unsigned int dstPartsCount, truncatedBits; 1762 1763 assertArithmeticOK(*semantics); 1764 1765 /* Handle the three special cases first. */ 1766 if(category == fcInfinity || category == fcNaN) 1767 return opInvalidOp; 1768 1769 dstPartsCount = partCountForBits(width); 1770 1771 if(category == fcZero) { 1772 APInt::tcSet(parts, 0, dstPartsCount); 1773 return opOK; 1774 } 1775 1776 src = significandParts(); 1777 1778 /* Step 1: place our absolute value, with any fraction truncated, in 1779 the destination. */ 1780 if (exponent < 0) { 1781 /* Our absolute value is less than one; truncate everything. */ 1782 APInt::tcSet(parts, 0, dstPartsCount); 1783 truncatedBits = semantics->precision; 1784 } else { 1785 /* We want the most significant (exponent + 1) bits; the rest are 1786 truncated. */ 1787 unsigned int bits = exponent + 1U; 1788 1789 /* Hopelessly large in magnitude? */ 1790 if (bits > width) 1791 return opInvalidOp; 1792 1793 if (bits < semantics->precision) { 1794 /* We truncate (semantics->precision - bits) bits. */ 1795 truncatedBits = semantics->precision - bits; 1796 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits); 1797 } else { 1798 /* We want at least as many bits as are available. */ 1799 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0); 1800 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision); 1801 truncatedBits = 0; 1802 } 1803 } 1804 1805 /* Step 2: work out any lost fraction, and increment the absolute 1806 value if we would round away from zero. */ 1807 if (truncatedBits) { 1808 lost_fraction = lostFractionThroughTruncation(src, partCount(), 1809 truncatedBits); 1810 if (lost_fraction != lfExactlyZero 1811 && roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { 1812 if (APInt::tcIncrement(parts, dstPartsCount)) 1813 return opInvalidOp; /* Overflow. */ 1814 } 1815 } else { 1816 lost_fraction = lfExactlyZero; 1817 } 1818 1819 /* Step 3: check if we fit in the destination. */ 1820 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1; 1821 1822 if (sign) { 1823 if (!isSigned) { 1824 /* Negative numbers cannot be represented as unsigned. */ 1825 if (omsb != 0) 1826 return opInvalidOp; 1827 } else { 1828 /* It takes omsb bits to represent the unsigned integer value. 1829 We lose a bit for the sign, but care is needed as the 1830 maximally negative integer is a special case. */ 1831 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb) 1832 return opInvalidOp; 1833 1834 /* This case can happen because of rounding. */ 1835 if (omsb > width) 1836 return opInvalidOp; 1837 } 1838 1839 APInt::tcNegate (parts, dstPartsCount); 1840 } else { 1841 if (omsb >= width + !isSigned) 1842 return opInvalidOp; 1843 } 1844 1845 if (lost_fraction == lfExactlyZero) 1846 return opOK; 1847 else 1848 return opInexact; 1849} 1850 1851/* Same as convertToSignExtendedInteger, except we provide 1852 deterministic values in case of an invalid operation exception, 1853 namely zero for NaNs and the minimal or maximal value respectively 1854 for underflow or overflow. */ 1855APFloat::opStatus 1856APFloat::convertToInteger(integerPart *parts, unsigned int width, 1857 bool isSigned, 1858 roundingMode rounding_mode) const 1859{ 1860 opStatus fs; 1861 1862 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode); 1863 1864 if (fs == opInvalidOp) { 1865 unsigned int bits, dstPartsCount; 1866 1867 dstPartsCount = partCountForBits(width); 1868 1869 if (category == fcNaN) 1870 bits = 0; 1871 else if (sign) 1872 bits = isSigned; 1873 else 1874 bits = width - isSigned; 1875 1876 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits); 1877 if (sign && isSigned) 1878 APInt::tcShiftLeft(parts, dstPartsCount, width - 1); 1879 } 1880 1881 return fs; 1882} 1883 1884/* Convert an unsigned integer SRC to a floating point number, 1885 rounding according to ROUNDING_MODE. The sign of the floating 1886 point number is not modified. */ 1887APFloat::opStatus 1888APFloat::convertFromUnsignedParts(const integerPart *src, 1889 unsigned int srcCount, 1890 roundingMode rounding_mode) 1891{ 1892 unsigned int omsb, precision, dstCount; 1893 integerPart *dst; 1894 lostFraction lost_fraction; 1895 1896 assertArithmeticOK(*semantics); 1897 category = fcNormal; 1898 omsb = APInt::tcMSB(src, srcCount) + 1; 1899 dst = significandParts(); 1900 dstCount = partCount(); 1901 precision = semantics->precision; 1902 1903 /* We want the most significant PRECISON bits of SRC. There may not 1904 be that many; extract what we can. */ 1905 if (precision <= omsb) { 1906 exponent = omsb - 1; 1907 lost_fraction = lostFractionThroughTruncation(src, srcCount, 1908 omsb - precision); 1909 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision); 1910 } else { 1911 exponent = precision - 1; 1912 lost_fraction = lfExactlyZero; 1913 APInt::tcExtract(dst, dstCount, src, omsb, 0); 1914 } 1915 1916 return normalize(rounding_mode, lost_fraction); 1917} 1918 1919APFloat::opStatus 1920APFloat::convertFromAPInt(const APInt &Val, 1921 bool isSigned, 1922 roundingMode rounding_mode) 1923{ 1924 unsigned int partCount = Val.getNumWords(); 1925 APInt api = Val; 1926 1927 sign = false; 1928 if (isSigned && api.isNegative()) { 1929 sign = true; 1930 api = -api; 1931 } 1932 1933 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 1934} 1935 1936/* Convert a two's complement integer SRC to a floating point number, 1937 rounding according to ROUNDING_MODE. ISSIGNED is true if the 1938 integer is signed, in which case it must be sign-extended. */ 1939APFloat::opStatus 1940APFloat::convertFromSignExtendedInteger(const integerPart *src, 1941 unsigned int srcCount, 1942 bool isSigned, 1943 roundingMode rounding_mode) 1944{ 1945 opStatus status; 1946 1947 assertArithmeticOK(*semantics); 1948 if (isSigned 1949 && APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { 1950 integerPart *copy; 1951 1952 /* If we're signed and negative negate a copy. */ 1953 sign = true; 1954 copy = new integerPart[srcCount]; 1955 APInt::tcAssign(copy, src, srcCount); 1956 APInt::tcNegate(copy, srcCount); 1957 status = convertFromUnsignedParts(copy, srcCount, rounding_mode); 1958 delete [] copy; 1959 } else { 1960 sign = false; 1961 status = convertFromUnsignedParts(src, srcCount, rounding_mode); 1962 } 1963 1964 return status; 1965} 1966 1967/* FIXME: should this just take a const APInt reference? */ 1968APFloat::opStatus 1969APFloat::convertFromZeroExtendedInteger(const integerPart *parts, 1970 unsigned int width, bool isSigned, 1971 roundingMode rounding_mode) 1972{ 1973 unsigned int partCount = partCountForBits(width); 1974 APInt api = APInt(width, partCount, parts); 1975 1976 sign = false; 1977 if(isSigned && APInt::tcExtractBit(parts, width - 1)) { 1978 sign = true; 1979 api = -api; 1980 } 1981 1982 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 1983} 1984 1985APFloat::opStatus 1986APFloat::convertFromHexadecimalString(const char *p, 1987 roundingMode rounding_mode) 1988{ 1989 lostFraction lost_fraction; 1990 integerPart *significand; 1991 unsigned int bitPos, partsCount; 1992 const char *dot, *firstSignificantDigit; 1993 1994 zeroSignificand(); 1995 exponent = 0; 1996 category = fcNormal; 1997 1998 significand = significandParts(); 1999 partsCount = partCount(); 2000 bitPos = partsCount * integerPartWidth; 2001 2002 /* Skip leading zeroes and any (hexa)decimal point. */ 2003 p = skipLeadingZeroesAndAnyDot(p, &dot); 2004 firstSignificantDigit = p; 2005 2006 for(;;) { 2007 integerPart hex_value; 2008 2009 if(*p == '.') { 2010 assert(dot == 0); 2011 dot = p++; 2012 } 2013 2014 hex_value = hexDigitValue(*p); 2015 if(hex_value == -1U) { 2016 lost_fraction = lfExactlyZero; 2017 break; 2018 } 2019 2020 p++; 2021 2022 /* Store the number whilst 4-bit nibbles remain. */ 2023 if(bitPos) { 2024 bitPos -= 4; 2025 hex_value <<= bitPos % integerPartWidth; 2026 significand[bitPos / integerPartWidth] |= hex_value; 2027 } else { 2028 lost_fraction = trailingHexadecimalFraction(p, hex_value); 2029 while(hexDigitValue(*p) != -1U) 2030 p++; 2031 break; 2032 } 2033 } 2034 2035 /* Hex floats require an exponent but not a hexadecimal point. */ 2036 assert(*p == 'p' || *p == 'P'); 2037 2038 /* Ignore the exponent if we are zero. */ 2039 if(p != firstSignificantDigit) { 2040 int expAdjustment; 2041 2042 /* Implicit hexadecimal point? */ 2043 if(!dot) 2044 dot = p; 2045 2046 /* Calculate the exponent adjustment implicit in the number of 2047 significant digits. */ 2048 expAdjustment = static_cast<int>(dot - firstSignificantDigit); 2049 if(expAdjustment < 0) 2050 expAdjustment++; 2051 expAdjustment = expAdjustment * 4 - 1; 2052 2053 /* Adjust for writing the significand starting at the most 2054 significant nibble. */ 2055 expAdjustment += semantics->precision; 2056 expAdjustment -= partsCount * integerPartWidth; 2057 2058 /* Adjust for the given exponent. */ 2059 exponent = totalExponent(p, expAdjustment); 2060 } 2061 2062 return normalize(rounding_mode, lost_fraction); 2063} 2064 2065APFloat::opStatus 2066APFloat::roundSignificandWithExponent(const integerPart *decSigParts, 2067 unsigned sigPartCount, int exp, 2068 roundingMode rounding_mode) 2069{ 2070 unsigned int parts, pow5PartCount; 2071 fltSemantics calcSemantics = { 32767, -32767, 0, true }; 2072 integerPart pow5Parts[maxPowerOfFiveParts]; 2073 bool isNearest; 2074 2075 isNearest = (rounding_mode == rmNearestTiesToEven 2076 || rounding_mode == rmNearestTiesToAway); 2077 2078 parts = partCountForBits(semantics->precision + 11); 2079 2080 /* Calculate pow(5, abs(exp)). */ 2081 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp); 2082 2083 for (;; parts *= 2) { 2084 opStatus sigStatus, powStatus; 2085 unsigned int excessPrecision, truncatedBits; 2086 2087 calcSemantics.precision = parts * integerPartWidth - 1; 2088 excessPrecision = calcSemantics.precision - semantics->precision; 2089 truncatedBits = excessPrecision; 2090 2091 APFloat decSig(calcSemantics, fcZero, sign); 2092 APFloat pow5(calcSemantics, fcZero, false); 2093 2094 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount, 2095 rmNearestTiesToEven); 2096 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount, 2097 rmNearestTiesToEven); 2098 /* Add exp, as 10^n = 5^n * 2^n. */ 2099 decSig.exponent += exp; 2100 2101 lostFraction calcLostFraction; 2102 integerPart HUerr, HUdistance; 2103 unsigned int powHUerr; 2104 2105 if (exp >= 0) { 2106 /* multiplySignificand leaves the precision-th bit set to 1. */ 2107 calcLostFraction = decSig.multiplySignificand(pow5, NULL); 2108 powHUerr = powStatus != opOK; 2109 } else { 2110 calcLostFraction = decSig.divideSignificand(pow5); 2111 /* Denormal numbers have less precision. */ 2112 if (decSig.exponent < semantics->minExponent) { 2113 excessPrecision += (semantics->minExponent - decSig.exponent); 2114 truncatedBits = excessPrecision; 2115 if (excessPrecision > calcSemantics.precision) 2116 excessPrecision = calcSemantics.precision; 2117 } 2118 /* Extra half-ulp lost in reciprocal of exponent. */ 2119 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2; 2120 } 2121 2122 /* Both multiplySignificand and divideSignificand return the 2123 result with the integer bit set. */ 2124 assert (APInt::tcExtractBit 2125 (decSig.significandParts(), calcSemantics.precision - 1) == 1); 2126 2127 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK, 2128 powHUerr); 2129 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(), 2130 excessPrecision, isNearest); 2131 2132 /* Are we guaranteed to round correctly if we truncate? */ 2133 if (HUdistance >= HUerr) { 2134 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(), 2135 calcSemantics.precision - excessPrecision, 2136 excessPrecision); 2137 /* Take the exponent of decSig. If we tcExtract-ed less bits 2138 above we must adjust our exponent to compensate for the 2139 implicit right shift. */ 2140 exponent = (decSig.exponent + semantics->precision 2141 - (calcSemantics.precision - excessPrecision)); 2142 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(), 2143 decSig.partCount(), 2144 truncatedBits); 2145 return normalize(rounding_mode, calcLostFraction); 2146 } 2147 } 2148} 2149 2150APFloat::opStatus 2151APFloat::convertFromDecimalString(const char *p, roundingMode rounding_mode) 2152{ 2153 decimalInfo D; 2154 opStatus fs; 2155 2156 /* Scan the text. */ 2157 interpretDecimal(p, &D); 2158 2159 /* Handle the quick cases. First the case of no significant digits, 2160 i.e. zero, and then exponents that are obviously too large or too 2161 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp 2162 definitely overflows if 2163 2164 (exp - 1) * L >= maxExponent 2165 2166 and definitely underflows to zero where 2167 2168 (exp + 1) * L <= minExponent - precision 2169 2170 With integer arithmetic the tightest bounds for L are 2171 2172 93/28 < L < 196/59 [ numerator <= 256 ] 2173 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] 2174 */ 2175 2176 if (decDigitValue(*D.firstSigDigit) >= 10U) { 2177 category = fcZero; 2178 fs = opOK; 2179 } else if ((D.normalizedExponent + 1) * 28738 2180 <= 8651 * (semantics->minExponent - (int) semantics->precision)) { 2181 /* Underflow to zero and round. */ 2182 zeroSignificand(); 2183 fs = normalize(rounding_mode, lfLessThanHalf); 2184 } else if ((D.normalizedExponent - 1) * 42039 2185 >= 12655 * semantics->maxExponent) { 2186 /* Overflow and round. */ 2187 fs = handleOverflow(rounding_mode); 2188 } else { 2189 integerPart *decSignificand; 2190 unsigned int partCount; 2191 2192 /* A tight upper bound on number of bits required to hold an 2193 N-digit decimal integer is N * 196 / 59. Allocate enough space 2194 to hold the full significand, and an extra part required by 2195 tcMultiplyPart. */ 2196 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1; 2197 partCount = partCountForBits(1 + 196 * partCount / 59); 2198 decSignificand = new integerPart[partCount + 1]; 2199 partCount = 0; 2200 2201 /* Convert to binary efficiently - we do almost all multiplication 2202 in an integerPart. When this would overflow do we do a single 2203 bignum multiplication, and then revert again to multiplication 2204 in an integerPart. */ 2205 do { 2206 integerPart decValue, val, multiplier; 2207 2208 val = 0; 2209 multiplier = 1; 2210 2211 do { 2212 if (*p == '.') 2213 p++; 2214 2215 decValue = decDigitValue(*p++); 2216 multiplier *= 10; 2217 val = val * 10 + decValue; 2218 /* The maximum number that can be multiplied by ten with any 2219 digit added without overflowing an integerPart. */ 2220 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10); 2221 2222 /* Multiply out the current part. */ 2223 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val, 2224 partCount, partCount + 1, false); 2225 2226 /* If we used another part (likely but not guaranteed), increase 2227 the count. */ 2228 if (decSignificand[partCount]) 2229 partCount++; 2230 } while (p <= D.lastSigDigit); 2231 2232 category = fcNormal; 2233 fs = roundSignificandWithExponent(decSignificand, partCount, 2234 D.exponent, rounding_mode); 2235 2236 delete [] decSignificand; 2237 } 2238 2239 return fs; 2240} 2241 2242APFloat::opStatus 2243APFloat::convertFromString(const char *p, roundingMode rounding_mode) 2244{ 2245 assertArithmeticOK(*semantics); 2246 2247 /* Handle a leading minus sign. */ 2248 if(*p == '-') 2249 sign = 1, p++; 2250 else 2251 sign = 0; 2252 2253 if(p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) 2254 return convertFromHexadecimalString(p + 2, rounding_mode); 2255 else 2256 return convertFromDecimalString(p, rounding_mode); 2257} 2258 2259/* Write out a hexadecimal representation of the floating point value 2260 to DST, which must be of sufficient size, in the C99 form 2261 [-]0xh.hhhhp[+-]d. Return the number of characters written, 2262 excluding the terminating NUL. 2263 2264 If UPPERCASE, the output is in upper case, otherwise in lower case. 2265 2266 HEXDIGITS digits appear altogether, rounding the value if 2267 necessary. If HEXDIGITS is 0, the minimal precision to display the 2268 number precisely is used instead. If nothing would appear after 2269 the decimal point it is suppressed. 2270 2271 The decimal exponent is always printed and has at least one digit. 2272 Zero values display an exponent of zero. Infinities and NaNs 2273 appear as "infinity" or "nan" respectively. 2274 2275 The above rules are as specified by C99. There is ambiguity about 2276 what the leading hexadecimal digit should be. This implementation 2277 uses whatever is necessary so that the exponent is displayed as 2278 stored. This implies the exponent will fall within the IEEE format 2279 range, and the leading hexadecimal digit will be 0 (for denormals), 2280 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with 2281 any other digits zero). 2282*/ 2283unsigned int 2284APFloat::convertToHexString(char *dst, unsigned int hexDigits, 2285 bool upperCase, roundingMode rounding_mode) const 2286{ 2287 char *p; 2288 2289 assertArithmeticOK(*semantics); 2290 2291 p = dst; 2292 if (sign) 2293 *dst++ = '-'; 2294 2295 switch (category) { 2296 case fcInfinity: 2297 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); 2298 dst += sizeof infinityL - 1; 2299 break; 2300 2301 case fcNaN: 2302 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); 2303 dst += sizeof NaNU - 1; 2304 break; 2305 2306 case fcZero: 2307 *dst++ = '0'; 2308 *dst++ = upperCase ? 'X': 'x'; 2309 *dst++ = '0'; 2310 if (hexDigits > 1) { 2311 *dst++ = '.'; 2312 memset (dst, '0', hexDigits - 1); 2313 dst += hexDigits - 1; 2314 } 2315 *dst++ = upperCase ? 'P': 'p'; 2316 *dst++ = '0'; 2317 break; 2318 2319 case fcNormal: 2320 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); 2321 break; 2322 } 2323 2324 *dst = 0; 2325 2326 return static_cast<unsigned int>(dst - p); 2327} 2328 2329/* Does the hard work of outputting the correctly rounded hexadecimal 2330 form of a normal floating point number with the specified number of 2331 hexadecimal digits. If HEXDIGITS is zero the minimum number of 2332 digits necessary to print the value precisely is output. */ 2333char * 2334APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, 2335 bool upperCase, 2336 roundingMode rounding_mode) const 2337{ 2338 unsigned int count, valueBits, shift, partsCount, outputDigits; 2339 const char *hexDigitChars; 2340 const integerPart *significand; 2341 char *p; 2342 bool roundUp; 2343 2344 *dst++ = '0'; 2345 *dst++ = upperCase ? 'X': 'x'; 2346 2347 roundUp = false; 2348 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower; 2349 2350 significand = significandParts(); 2351 partsCount = partCount(); 2352 2353 /* +3 because the first digit only uses the single integer bit, so 2354 we have 3 virtual zero most-significant-bits. */ 2355 valueBits = semantics->precision + 3; 2356 shift = integerPartWidth - valueBits % integerPartWidth; 2357 2358 /* The natural number of digits required ignoring trailing 2359 insignificant zeroes. */ 2360 outputDigits = (valueBits - significandLSB () + 3) / 4; 2361 2362 /* hexDigits of zero means use the required number for the 2363 precision. Otherwise, see if we are truncating. If we are, 2364 find out if we need to round away from zero. */ 2365 if (hexDigits) { 2366 if (hexDigits < outputDigits) { 2367 /* We are dropping non-zero bits, so need to check how to round. 2368 "bits" is the number of dropped bits. */ 2369 unsigned int bits; 2370 lostFraction fraction; 2371 2372 bits = valueBits - hexDigits * 4; 2373 fraction = lostFractionThroughTruncation (significand, partsCount, bits); 2374 roundUp = roundAwayFromZero(rounding_mode, fraction, bits); 2375 } 2376 outputDigits = hexDigits; 2377 } 2378 2379 /* Write the digits consecutively, and start writing in the location 2380 of the hexadecimal point. We move the most significant digit 2381 left and add the hexadecimal point later. */ 2382 p = ++dst; 2383 2384 count = (valueBits + integerPartWidth - 1) / integerPartWidth; 2385 2386 while (outputDigits && count) { 2387 integerPart part; 2388 2389 /* Put the most significant integerPartWidth bits in "part". */ 2390 if (--count == partsCount) 2391 part = 0; /* An imaginary higher zero part. */ 2392 else 2393 part = significand[count] << shift; 2394 2395 if (count && shift) 2396 part |= significand[count - 1] >> (integerPartWidth - shift); 2397 2398 /* Convert as much of "part" to hexdigits as we can. */ 2399 unsigned int curDigits = integerPartWidth / 4; 2400 2401 if (curDigits > outputDigits) 2402 curDigits = outputDigits; 2403 dst += partAsHex (dst, part, curDigits, hexDigitChars); 2404 outputDigits -= curDigits; 2405 } 2406 2407 if (roundUp) { 2408 char *q = dst; 2409 2410 /* Note that hexDigitChars has a trailing '0'. */ 2411 do { 2412 q--; 2413 *q = hexDigitChars[hexDigitValue (*q) + 1]; 2414 } while (*q == '0'); 2415 assert (q >= p); 2416 } else { 2417 /* Add trailing zeroes. */ 2418 memset (dst, '0', outputDigits); 2419 dst += outputDigits; 2420 } 2421 2422 /* Move the most significant digit to before the point, and if there 2423 is something after the decimal point add it. This must come 2424 after rounding above. */ 2425 p[-1] = p[0]; 2426 if (dst -1 == p) 2427 dst--; 2428 else 2429 p[0] = '.'; 2430 2431 /* Finally output the exponent. */ 2432 *dst++ = upperCase ? 'P': 'p'; 2433 2434 return writeSignedDecimal (dst, exponent); 2435} 2436 2437// For good performance it is desirable for different APFloats 2438// to produce different integers. 2439uint32_t 2440APFloat::getHashValue() const 2441{ 2442 if (category==fcZero) return sign<<8 | semantics->precision ; 2443 else if (category==fcInfinity) return sign<<9 | semantics->precision; 2444 else if (category==fcNaN) return 1<<10 | semantics->precision; 2445 else { 2446 uint32_t hash = sign<<11 | semantics->precision | exponent<<12; 2447 const integerPart* p = significandParts(); 2448 for (int i=partCount(); i>0; i--, p++) 2449 hash ^= ((uint32_t)*p) ^ (uint32_t)((*p)>>32); 2450 return hash; 2451 } 2452} 2453 2454// Conversion from APFloat to/from host float/double. It may eventually be 2455// possible to eliminate these and have everybody deal with APFloats, but that 2456// will take a while. This approach will not easily extend to long double. 2457// Current implementation requires integerPartWidth==64, which is correct at 2458// the moment but could be made more general. 2459 2460// Denormals have exponent minExponent in APFloat, but minExponent-1 in 2461// the actual IEEE respresentations. We compensate for that here. 2462 2463APInt 2464APFloat::convertF80LongDoubleAPFloatToAPInt() const 2465{ 2466 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended); 2467 assert (partCount()==2); 2468 2469 uint64_t myexponent, mysignificand; 2470 2471 if (category==fcNormal) { 2472 myexponent = exponent+16383; //bias 2473 mysignificand = significandParts()[0]; 2474 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) 2475 myexponent = 0; // denormal 2476 } else if (category==fcZero) { 2477 myexponent = 0; 2478 mysignificand = 0; 2479 } else if (category==fcInfinity) { 2480 myexponent = 0x7fff; 2481 mysignificand = 0x8000000000000000ULL; 2482 } else { 2483 assert(category == fcNaN && "Unknown category"); 2484 myexponent = 0x7fff; 2485 mysignificand = significandParts()[0]; 2486 } 2487 2488 uint64_t words[2]; 2489 words[0] = ((uint64_t)(sign & 1) << 63) | 2490 ((myexponent & 0x7fffLL) << 48) | 2491 ((mysignificand >>16) & 0xffffffffffffLL); 2492 words[1] = mysignificand & 0xffff; 2493 return APInt(80, 2, words); 2494} 2495 2496APInt 2497APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const 2498{ 2499 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble); 2500 assert (partCount()==2); 2501 2502 uint64_t myexponent, mysignificand, myexponent2, mysignificand2; 2503 2504 if (category==fcNormal) { 2505 myexponent = exponent + 1023; //bias 2506 myexponent2 = exponent2 + 1023; 2507 mysignificand = significandParts()[0]; 2508 mysignificand2 = significandParts()[1]; 2509 if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) 2510 myexponent = 0; // denormal 2511 if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL)) 2512 myexponent2 = 0; // denormal 2513 } else if (category==fcZero) { 2514 myexponent = 0; 2515 mysignificand = 0; 2516 myexponent2 = 0; 2517 mysignificand2 = 0; 2518 } else if (category==fcInfinity) { 2519 myexponent = 0x7ff; 2520 myexponent2 = 0; 2521 mysignificand = 0; 2522 mysignificand2 = 0; 2523 } else { 2524 assert(category == fcNaN && "Unknown category"); 2525 myexponent = 0x7ff; 2526 mysignificand = significandParts()[0]; 2527 myexponent2 = exponent2; 2528 mysignificand2 = significandParts()[1]; 2529 } 2530 2531 uint64_t words[2]; 2532 words[0] = ((uint64_t)(sign & 1) << 63) | 2533 ((myexponent & 0x7ff) << 52) | 2534 (mysignificand & 0xfffffffffffffLL); 2535 words[1] = ((uint64_t)(sign2 & 1) << 63) | 2536 ((myexponent2 & 0x7ff) << 52) | 2537 (mysignificand2 & 0xfffffffffffffLL); 2538 return APInt(128, 2, words); 2539} 2540 2541APInt 2542APFloat::convertDoubleAPFloatToAPInt() const 2543{ 2544 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble); 2545 assert (partCount()==1); 2546 2547 uint64_t myexponent, mysignificand; 2548 2549 if (category==fcNormal) { 2550 myexponent = exponent+1023; //bias 2551 mysignificand = *significandParts(); 2552 if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) 2553 myexponent = 0; // denormal 2554 } else if (category==fcZero) { 2555 myexponent = 0; 2556 mysignificand = 0; 2557 } else if (category==fcInfinity) { 2558 myexponent = 0x7ff; 2559 mysignificand = 0; 2560 } else { 2561 assert(category == fcNaN && "Unknown category!"); 2562 myexponent = 0x7ff; 2563 mysignificand = *significandParts(); 2564 } 2565 2566 return APInt(64, ((((uint64_t)(sign & 1) << 63) | 2567 ((myexponent & 0x7ff) << 52) | 2568 (mysignificand & 0xfffffffffffffLL)))); 2569} 2570 2571APInt 2572APFloat::convertFloatAPFloatToAPInt() const 2573{ 2574 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle); 2575 assert (partCount()==1); 2576 2577 uint32_t myexponent, mysignificand; 2578 2579 if (category==fcNormal) { 2580 myexponent = exponent+127; //bias 2581 mysignificand = (uint32_t)*significandParts(); 2582 if (myexponent == 1 && !(mysignificand & 0x800000)) 2583 myexponent = 0; // denormal 2584 } else if (category==fcZero) { 2585 myexponent = 0; 2586 mysignificand = 0; 2587 } else if (category==fcInfinity) { 2588 myexponent = 0xff; 2589 mysignificand = 0; 2590 } else { 2591 assert(category == fcNaN && "Unknown category!"); 2592 myexponent = 0xff; 2593 mysignificand = (uint32_t)*significandParts(); 2594 } 2595 2596 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) | 2597 (mysignificand & 0x7fffff))); 2598} 2599 2600// This function creates an APInt that is just a bit map of the floating 2601// point constant as it would appear in memory. It is not a conversion, 2602// and treating the result as a normal integer is unlikely to be useful. 2603 2604APInt 2605APFloat::convertToAPInt() const 2606{ 2607 if (semantics == (const llvm::fltSemantics*)&IEEEsingle) 2608 return convertFloatAPFloatToAPInt(); 2609 2610 if (semantics == (const llvm::fltSemantics*)&IEEEdouble) 2611 return convertDoubleAPFloatToAPInt(); 2612 2613 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble) 2614 return convertPPCDoubleDoubleAPFloatToAPInt(); 2615 2616 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended && 2617 "unknown format!"); 2618 return convertF80LongDoubleAPFloatToAPInt(); 2619} 2620 2621float 2622APFloat::convertToFloat() const 2623{ 2624 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle); 2625 APInt api = convertToAPInt(); 2626 return api.bitsToFloat(); 2627} 2628 2629double 2630APFloat::convertToDouble() const 2631{ 2632 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble); 2633 APInt api = convertToAPInt(); 2634 return api.bitsToDouble(); 2635} 2636 2637/// Integer bit is explicit in this format. Current Intel book does not 2638/// define meaning of: 2639/// exponent = all 1's, integer bit not set. 2640/// exponent = 0, integer bit set. (formerly "psuedodenormals") 2641/// exponent!=0 nor all 1's, integer bit not set. (formerly "unnormals") 2642void 2643APFloat::initFromF80LongDoubleAPInt(const APInt &api) 2644{ 2645 assert(api.getBitWidth()==80); 2646 uint64_t i1 = api.getRawData()[0]; 2647 uint64_t i2 = api.getRawData()[1]; 2648 uint64_t myexponent = (i1 >> 48) & 0x7fff; 2649 uint64_t mysignificand = ((i1 << 16) & 0xffffffffffff0000ULL) | 2650 (i2 & 0xffff); 2651 2652 initialize(&APFloat::x87DoubleExtended); 2653 assert(partCount()==2); 2654 2655 sign = static_cast<unsigned int>(i1>>63); 2656 if (myexponent==0 && mysignificand==0) { 2657 // exponent, significand meaningless 2658 category = fcZero; 2659 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { 2660 // exponent, significand meaningless 2661 category = fcInfinity; 2662 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) { 2663 // exponent meaningless 2664 category = fcNaN; 2665 significandParts()[0] = mysignificand; 2666 significandParts()[1] = 0; 2667 } else { 2668 category = fcNormal; 2669 exponent = myexponent - 16383; 2670 significandParts()[0] = mysignificand; 2671 significandParts()[1] = 0; 2672 if (myexponent==0) // denormal 2673 exponent = -16382; 2674 } 2675} 2676 2677void 2678APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) 2679{ 2680 assert(api.getBitWidth()==128); 2681 uint64_t i1 = api.getRawData()[0]; 2682 uint64_t i2 = api.getRawData()[1]; 2683 uint64_t myexponent = (i1 >> 52) & 0x7ff; 2684 uint64_t mysignificand = i1 & 0xfffffffffffffLL; 2685 uint64_t myexponent2 = (i2 >> 52) & 0x7ff; 2686 uint64_t mysignificand2 = i2 & 0xfffffffffffffLL; 2687 2688 initialize(&APFloat::PPCDoubleDouble); 2689 assert(partCount()==2); 2690 2691 sign = static_cast<unsigned int>(i1>>63); 2692 sign2 = static_cast<unsigned int>(i2>>63); 2693 if (myexponent==0 && mysignificand==0) { 2694 // exponent, significand meaningless 2695 // exponent2 and significand2 are required to be 0; we don't check 2696 category = fcZero; 2697 } else if (myexponent==0x7ff && mysignificand==0) { 2698 // exponent, significand meaningless 2699 // exponent2 and significand2 are required to be 0; we don't check 2700 category = fcInfinity; 2701 } else if (myexponent==0x7ff && mysignificand!=0) { 2702 // exponent meaningless. So is the whole second word, but keep it 2703 // for determinism. 2704 category = fcNaN; 2705 exponent2 = myexponent2; 2706 significandParts()[0] = mysignificand; 2707 significandParts()[1] = mysignificand2; 2708 } else { 2709 category = fcNormal; 2710 // Note there is no category2; the second word is treated as if it is 2711 // fcNormal, although it might be something else considered by itself. 2712 exponent = myexponent - 1023; 2713 exponent2 = myexponent2 - 1023; 2714 significandParts()[0] = mysignificand; 2715 significandParts()[1] = mysignificand2; 2716 if (myexponent==0) // denormal 2717 exponent = -1022; 2718 else 2719 significandParts()[0] |= 0x10000000000000LL; // integer bit 2720 if (myexponent2==0) 2721 exponent2 = -1022; 2722 else 2723 significandParts()[1] |= 0x10000000000000LL; // integer bit 2724 } 2725} 2726 2727void 2728APFloat::initFromDoubleAPInt(const APInt &api) 2729{ 2730 assert(api.getBitWidth()==64); 2731 uint64_t i = *api.getRawData(); 2732 uint64_t myexponent = (i >> 52) & 0x7ff; 2733 uint64_t mysignificand = i & 0xfffffffffffffLL; 2734 2735 initialize(&APFloat::IEEEdouble); 2736 assert(partCount()==1); 2737 2738 sign = static_cast<unsigned int>(i>>63); 2739 if (myexponent==0 && mysignificand==0) { 2740 // exponent, significand meaningless 2741 category = fcZero; 2742 } else if (myexponent==0x7ff && mysignificand==0) { 2743 // exponent, significand meaningless 2744 category = fcInfinity; 2745 } else if (myexponent==0x7ff && mysignificand!=0) { 2746 // exponent meaningless 2747 category = fcNaN; 2748 *significandParts() = mysignificand; 2749 } else { 2750 category = fcNormal; 2751 exponent = myexponent - 1023; 2752 *significandParts() = mysignificand; 2753 if (myexponent==0) // denormal 2754 exponent = -1022; 2755 else 2756 *significandParts() |= 0x10000000000000LL; // integer bit 2757 } 2758} 2759 2760void 2761APFloat::initFromFloatAPInt(const APInt & api) 2762{ 2763 assert(api.getBitWidth()==32); 2764 uint32_t i = (uint32_t)*api.getRawData(); 2765 uint32_t myexponent = (i >> 23) & 0xff; 2766 uint32_t mysignificand = i & 0x7fffff; 2767 2768 initialize(&APFloat::IEEEsingle); 2769 assert(partCount()==1); 2770 2771 sign = i >> 31; 2772 if (myexponent==0 && mysignificand==0) { 2773 // exponent, significand meaningless 2774 category = fcZero; 2775 } else if (myexponent==0xff && mysignificand==0) { 2776 // exponent, significand meaningless 2777 category = fcInfinity; 2778 } else if (myexponent==0xff && mysignificand!=0) { 2779 // sign, exponent, significand meaningless 2780 category = fcNaN; 2781 *significandParts() = mysignificand; 2782 } else { 2783 category = fcNormal; 2784 exponent = myexponent - 127; //bias 2785 *significandParts() = mysignificand; 2786 if (myexponent==0) // denormal 2787 exponent = -126; 2788 else 2789 *significandParts() |= 0x800000; // integer bit 2790 } 2791} 2792 2793/// Treat api as containing the bits of a floating point number. Currently 2794/// we infer the floating point type from the size of the APInt. The 2795/// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful 2796/// when the size is anything else). 2797void 2798APFloat::initFromAPInt(const APInt& api, bool isIEEE) 2799{ 2800 if (api.getBitWidth() == 32) 2801 return initFromFloatAPInt(api); 2802 else if (api.getBitWidth()==64) 2803 return initFromDoubleAPInt(api); 2804 else if (api.getBitWidth()==80) 2805 return initFromF80LongDoubleAPInt(api); 2806 else if (api.getBitWidth()==128 && !isIEEE) 2807 return initFromPPCDoubleDoubleAPInt(api); 2808 else 2809 assert(0); 2810} 2811 2812APFloat::APFloat(const APInt& api, bool isIEEE) 2813{ 2814 initFromAPInt(api, isIEEE); 2815} 2816 2817APFloat::APFloat(float f) 2818{ 2819 APInt api = APInt(32, 0); 2820 initFromAPInt(api.floatToBits(f)); 2821} 2822 2823APFloat::APFloat(double d) 2824{ 2825 APInt api = APInt(64, 0); 2826 initFromAPInt(api.doubleToBits(d)); 2827} 2828