APFloat.cpp revision 7a7bc0f7248881cff466e964c4409c9c7da9da85
1//===-- APFloat.cpp - Implement APFloat class -----------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Neil Booth and is distributed under the 6// University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements a class to represent arbitrary precision floating 11// point values and provide a variety of arithmetic operations on them. 12// 13//===----------------------------------------------------------------------===// 14 15#include <cassert> 16#include <cstring> 17#include "llvm/ADT/APFloat.h" 18#include "llvm/Support/MathExtras.h" 19 20using namespace llvm; 21 22#define convolve(lhs, rhs) ((lhs) * 4 + (rhs)) 23 24/* Assumed in hexadecimal significand parsing, and conversion to 25 hexadecimal strings. */ 26COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0); 27 28namespace llvm { 29 30 /* Represents floating point arithmetic semantics. */ 31 struct fltSemantics { 32 /* The largest E such that 2^E is representable; this matches the 33 definition of IEEE 754. */ 34 exponent_t maxExponent; 35 36 /* The smallest E such that 2^E is a normalized number; this 37 matches the definition of IEEE 754. */ 38 exponent_t minExponent; 39 40 /* Number of bits in the significand. This includes the integer 41 bit. */ 42 unsigned char precision; 43 44 /* If the target format has an implicit integer bit. */ 45 bool implicitIntegerBit; 46 }; 47 48 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true }; 49 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true }; 50 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true }; 51 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, false }; 52 const fltSemantics APFloat::Bogus = { 0, 0, 0, false }; 53} 54 55/* Put a bunch of private, handy routines in an anonymous namespace. */ 56namespace { 57 58 inline unsigned int 59 partCountForBits(unsigned int bits) 60 { 61 return ((bits) + integerPartWidth - 1) / integerPartWidth; 62 } 63 64 unsigned int 65 digitValue(unsigned int c) 66 { 67 unsigned int r; 68 69 r = c - '0'; 70 if(r <= 9) 71 return r; 72 73 return -1U; 74 } 75 76 unsigned int 77 hexDigitValue (unsigned int c) 78 { 79 unsigned int r; 80 81 r = c - '0'; 82 if(r <= 9) 83 return r; 84 85 r = c - 'A'; 86 if(r <= 5) 87 return r + 10; 88 89 r = c - 'a'; 90 if(r <= 5) 91 return r + 10; 92 93 return -1U; 94 } 95 96 /* This is ugly and needs cleaning up, but I don't immediately see 97 how whilst remaining safe. */ 98 static int 99 totalExponent(const char *p, int exponentAdjustment) 100 { 101 integerPart unsignedExponent; 102 bool negative, overflow; 103 long exponent; 104 105 /* Move past the exponent letter and sign to the digits. */ 106 p++; 107 negative = *p == '-'; 108 if(*p == '-' || *p == '+') 109 p++; 110 111 unsignedExponent = 0; 112 overflow = false; 113 for(;;) { 114 unsigned int value; 115 116 value = digitValue(*p); 117 if(value == -1U) 118 break; 119 120 p++; 121 unsignedExponent = unsignedExponent * 10 + value; 122 if(unsignedExponent > 65535) 123 overflow = true; 124 } 125 126 if(exponentAdjustment > 65535 || exponentAdjustment < -65536) 127 overflow = true; 128 129 if(!overflow) { 130 exponent = unsignedExponent; 131 if(negative) 132 exponent = -exponent; 133 exponent += exponentAdjustment; 134 if(exponent > 65535 || exponent < -65536) 135 overflow = true; 136 } 137 138 if(overflow) 139 exponent = negative ? -65536: 65535; 140 141 return exponent; 142 } 143 144 const char * 145 skipLeadingZeroesAndAnyDot(const char *p, const char **dot) 146 { 147 *dot = 0; 148 while(*p == '0') 149 p++; 150 151 if(*p == '.') { 152 *dot = p++; 153 while(*p == '0') 154 p++; 155 } 156 157 return p; 158 } 159 160 /* Return the trailing fraction of a hexadecimal number. 161 DIGITVALUE is the first hex digit of the fraction, P points to 162 the next digit. */ 163 lostFraction 164 trailingHexadecimalFraction(const char *p, unsigned int digitValue) 165 { 166 unsigned int hexDigit; 167 168 /* If the first trailing digit isn't 0 or 8 we can work out the 169 fraction immediately. */ 170 if(digitValue > 8) 171 return lfMoreThanHalf; 172 else if(digitValue < 8 && digitValue > 0) 173 return lfLessThanHalf; 174 175 /* Otherwise we need to find the first non-zero digit. */ 176 while(*p == '0') 177 p++; 178 179 hexDigit = hexDigitValue(*p); 180 181 /* If we ran off the end it is exactly zero or one-half, otherwise 182 a little more. */ 183 if(hexDigit == -1U) 184 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; 185 else 186 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; 187 } 188 189 /* Return the fraction lost were a bignum truncated losing the least 190 significant BITS bits. */ 191 lostFraction 192 lostFractionThroughTruncation(const integerPart *parts, 193 unsigned int partCount, 194 unsigned int bits) 195 { 196 unsigned int lsb; 197 198 lsb = APInt::tcLSB(parts, partCount); 199 200 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */ 201 if(bits <= lsb) 202 return lfExactlyZero; 203 if(bits == lsb + 1) 204 return lfExactlyHalf; 205 if(bits <= partCount * integerPartWidth 206 && APInt::tcExtractBit(parts, bits - 1)) 207 return lfMoreThanHalf; 208 209 return lfLessThanHalf; 210 } 211 212 /* Shift DST right BITS bits noting lost fraction. */ 213 lostFraction 214 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits) 215 { 216 lostFraction lost_fraction; 217 218 lost_fraction = lostFractionThroughTruncation(dst, parts, bits); 219 220 APInt::tcShiftRight(dst, parts, bits); 221 222 return lost_fraction; 223 } 224 225 /* Combine the effect of two lost fractions. */ 226 lostFraction 227 combineLostFractions(lostFraction moreSignificant, 228 lostFraction lessSignificant) 229 { 230 if(lessSignificant != lfExactlyZero) { 231 if(moreSignificant == lfExactlyZero) 232 moreSignificant = lfLessThanHalf; 233 else if(moreSignificant == lfExactlyHalf) 234 moreSignificant = lfMoreThanHalf; 235 } 236 237 return moreSignificant; 238 } 239 240 /* Zero at the end to avoid modular arithmetic when adding one; used 241 when rounding up during hexadecimal output. */ 242 static const char hexDigitsLower[] = "0123456789abcdef0"; 243 static const char hexDigitsUpper[] = "0123456789ABCDEF0"; 244 static const char infinityL[] = "infinity"; 245 static const char infinityU[] = "INFINITY"; 246 static const char NaNL[] = "nan"; 247 static const char NaNU[] = "NAN"; 248 249 /* Write out an integerPart in hexadecimal, starting with the most 250 significant nibble. Write out exactly COUNT hexdigits, return 251 COUNT. */ 252 static unsigned int 253 partAsHex (char *dst, integerPart part, unsigned int count, 254 const char *hexDigitChars) 255 { 256 unsigned int result = count; 257 258 assert (count != 0 && count <= integerPartWidth / 4); 259 260 part >>= (integerPartWidth - 4 * count); 261 while (count--) { 262 dst[count] = hexDigitChars[part & 0xf]; 263 part >>= 4; 264 } 265 266 return result; 267 } 268 269 /* Write out an unsigned decimal integer. */ 270 static char * 271 writeUnsignedDecimal (char *dst, unsigned int n) 272 { 273 char buff[40], *p; 274 275 p = buff; 276 do 277 *p++ = '0' + n % 10; 278 while (n /= 10); 279 280 do 281 *dst++ = *--p; 282 while (p != buff); 283 284 return dst; 285 } 286 287 /* Write out a signed decimal integer. */ 288 static char * 289 writeSignedDecimal (char *dst, int value) 290 { 291 if (value < 0) { 292 *dst++ = '-'; 293 dst = writeUnsignedDecimal(dst, -(unsigned) value); 294 } else 295 dst = writeUnsignedDecimal(dst, value); 296 297 return dst; 298 } 299} 300 301/* Constructors. */ 302void 303APFloat::initialize(const fltSemantics *ourSemantics) 304{ 305 unsigned int count; 306 307 semantics = ourSemantics; 308 count = partCount(); 309 if(count > 1) 310 significand.parts = new integerPart[count]; 311} 312 313void 314APFloat::freeSignificand() 315{ 316 if(partCount() > 1) 317 delete [] significand.parts; 318} 319 320void 321APFloat::assign(const APFloat &rhs) 322{ 323 assert(semantics == rhs.semantics); 324 325 sign = rhs.sign; 326 category = rhs.category; 327 exponent = rhs.exponent; 328 if(category == fcNormal || category == fcNaN) 329 copySignificand(rhs); 330} 331 332void 333APFloat::copySignificand(const APFloat &rhs) 334{ 335 assert(category == fcNormal || category == fcNaN); 336 assert(rhs.partCount() >= partCount()); 337 338 APInt::tcAssign(significandParts(), rhs.significandParts(), 339 partCount()); 340} 341 342APFloat & 343APFloat::operator=(const APFloat &rhs) 344{ 345 if(this != &rhs) { 346 if(semantics != rhs.semantics) { 347 freeSignificand(); 348 initialize(rhs.semantics); 349 } 350 assign(rhs); 351 } 352 353 return *this; 354} 355 356bool 357APFloat::bitwiseIsEqual(const APFloat &rhs) const { 358 if (this == &rhs) 359 return true; 360 if (semantics != rhs.semantics || 361 category != rhs.category || 362 sign != rhs.sign) 363 return false; 364 if (category==fcZero || category==fcInfinity) 365 return true; 366 else if (category==fcNormal && exponent!=rhs.exponent) 367 return false; 368 else { 369 int i= partCount(); 370 const integerPart* p=significandParts(); 371 const integerPart* q=rhs.significandParts(); 372 for (; i>0; i--, p++, q++) { 373 if (*p != *q) 374 return false; 375 } 376 return true; 377 } 378} 379 380APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) 381{ 382 initialize(&ourSemantics); 383 sign = 0; 384 zeroSignificand(); 385 exponent = ourSemantics.precision - 1; 386 significandParts()[0] = value; 387 normalize(rmNearestTiesToEven, lfExactlyZero); 388} 389 390APFloat::APFloat(const fltSemantics &ourSemantics, 391 fltCategory ourCategory, bool negative) 392{ 393 initialize(&ourSemantics); 394 category = ourCategory; 395 sign = negative; 396 if(category == fcNormal) 397 category = fcZero; 398} 399 400APFloat::APFloat(const fltSemantics &ourSemantics, const char *text) 401{ 402 initialize(&ourSemantics); 403 convertFromString(text, rmNearestTiesToEven); 404} 405 406APFloat::APFloat(const APFloat &rhs) 407{ 408 initialize(rhs.semantics); 409 assign(rhs); 410} 411 412APFloat::~APFloat() 413{ 414 freeSignificand(); 415} 416 417unsigned int 418APFloat::partCount() const 419{ 420 return partCountForBits(semantics->precision + 1); 421} 422 423unsigned int 424APFloat::semanticsPrecision(const fltSemantics &semantics) 425{ 426 return semantics.precision; 427} 428 429const integerPart * 430APFloat::significandParts() const 431{ 432 return const_cast<APFloat *>(this)->significandParts(); 433} 434 435integerPart * 436APFloat::significandParts() 437{ 438 assert(category == fcNormal || category == fcNaN); 439 440 if(partCount() > 1) 441 return significand.parts; 442 else 443 return &significand.part; 444} 445 446void 447APFloat::zeroSignificand() 448{ 449 category = fcNormal; 450 APInt::tcSet(significandParts(), 0, partCount()); 451} 452 453/* Increment an fcNormal floating point number's significand. */ 454void 455APFloat::incrementSignificand() 456{ 457 integerPart carry; 458 459 carry = APInt::tcIncrement(significandParts(), partCount()); 460 461 /* Our callers should never cause us to overflow. */ 462 assert(carry == 0); 463} 464 465/* Add the significand of the RHS. Returns the carry flag. */ 466integerPart 467APFloat::addSignificand(const APFloat &rhs) 468{ 469 integerPart *parts; 470 471 parts = significandParts(); 472 473 assert(semantics == rhs.semantics); 474 assert(exponent == rhs.exponent); 475 476 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount()); 477} 478 479/* Subtract the significand of the RHS with a borrow flag. Returns 480 the borrow flag. */ 481integerPart 482APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow) 483{ 484 integerPart *parts; 485 486 parts = significandParts(); 487 488 assert(semantics == rhs.semantics); 489 assert(exponent == rhs.exponent); 490 491 return APInt::tcSubtract(parts, rhs.significandParts(), borrow, 492 partCount()); 493} 494 495/* Multiply the significand of the RHS. If ADDEND is non-NULL, add it 496 on to the full-precision result of the multiplication. Returns the 497 lost fraction. */ 498lostFraction 499APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) 500{ 501 unsigned int omsb; // One, not zero, based MSB. 502 unsigned int partsCount, newPartsCount, precision; 503 integerPart *lhsSignificand; 504 integerPart scratch[4]; 505 integerPart *fullSignificand; 506 lostFraction lost_fraction; 507 508 assert(semantics == rhs.semantics); 509 510 precision = semantics->precision; 511 newPartsCount = partCountForBits(precision * 2); 512 513 if(newPartsCount > 4) 514 fullSignificand = new integerPart[newPartsCount]; 515 else 516 fullSignificand = scratch; 517 518 lhsSignificand = significandParts(); 519 partsCount = partCount(); 520 521 APInt::tcFullMultiply(fullSignificand, lhsSignificand, 522 rhs.significandParts(), partsCount, partsCount); 523 524 lost_fraction = lfExactlyZero; 525 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 526 exponent += rhs.exponent; 527 528 if(addend) { 529 Significand savedSignificand = significand; 530 const fltSemantics *savedSemantics = semantics; 531 fltSemantics extendedSemantics; 532 opStatus status; 533 unsigned int extendedPrecision; 534 535 /* Normalize our MSB. */ 536 extendedPrecision = precision + precision - 1; 537 if(omsb != extendedPrecision) 538 { 539 APInt::tcShiftLeft(fullSignificand, newPartsCount, 540 extendedPrecision - omsb); 541 exponent -= extendedPrecision - omsb; 542 } 543 544 /* Create new semantics. */ 545 extendedSemantics = *semantics; 546 extendedSemantics.precision = extendedPrecision; 547 548 if(newPartsCount == 1) 549 significand.part = fullSignificand[0]; 550 else 551 significand.parts = fullSignificand; 552 semantics = &extendedSemantics; 553 554 APFloat extendedAddend(*addend); 555 status = extendedAddend.convert(extendedSemantics, rmTowardZero); 556 assert(status == opOK); 557 lost_fraction = addOrSubtractSignificand(extendedAddend, false); 558 559 /* Restore our state. */ 560 if(newPartsCount == 1) 561 fullSignificand[0] = significand.part; 562 significand = savedSignificand; 563 semantics = savedSemantics; 564 565 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 566 } 567 568 exponent -= (precision - 1); 569 570 if(omsb > precision) { 571 unsigned int bits, significantParts; 572 lostFraction lf; 573 574 bits = omsb - precision; 575 significantParts = partCountForBits(omsb); 576 lf = shiftRight(fullSignificand, significantParts, bits); 577 lost_fraction = combineLostFractions(lf, lost_fraction); 578 exponent += bits; 579 } 580 581 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); 582 583 if(newPartsCount > 4) 584 delete [] fullSignificand; 585 586 return lost_fraction; 587} 588 589/* Multiply the significands of LHS and RHS to DST. */ 590lostFraction 591APFloat::divideSignificand(const APFloat &rhs) 592{ 593 unsigned int bit, i, partsCount; 594 const integerPart *rhsSignificand; 595 integerPart *lhsSignificand, *dividend, *divisor; 596 integerPart scratch[4]; 597 lostFraction lost_fraction; 598 599 assert(semantics == rhs.semantics); 600 601 lhsSignificand = significandParts(); 602 rhsSignificand = rhs.significandParts(); 603 partsCount = partCount(); 604 605 if(partsCount > 2) 606 dividend = new integerPart[partsCount * 2]; 607 else 608 dividend = scratch; 609 610 divisor = dividend + partsCount; 611 612 /* Copy the dividend and divisor as they will be modified in-place. */ 613 for(i = 0; i < partsCount; i++) { 614 dividend[i] = lhsSignificand[i]; 615 divisor[i] = rhsSignificand[i]; 616 lhsSignificand[i] = 0; 617 } 618 619 exponent -= rhs.exponent; 620 621 unsigned int precision = semantics->precision; 622 623 /* Normalize the divisor. */ 624 bit = precision - APInt::tcMSB(divisor, partsCount) - 1; 625 if(bit) { 626 exponent += bit; 627 APInt::tcShiftLeft(divisor, partsCount, bit); 628 } 629 630 /* Normalize the dividend. */ 631 bit = precision - APInt::tcMSB(dividend, partsCount) - 1; 632 if(bit) { 633 exponent -= bit; 634 APInt::tcShiftLeft(dividend, partsCount, bit); 635 } 636 637 if(APInt::tcCompare(dividend, divisor, partsCount) < 0) { 638 exponent--; 639 APInt::tcShiftLeft(dividend, partsCount, 1); 640 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); 641 } 642 643 /* Long division. */ 644 for(bit = precision; bit; bit -= 1) { 645 if(APInt::tcCompare(dividend, divisor, partsCount) >= 0) { 646 APInt::tcSubtract(dividend, divisor, 0, partsCount); 647 APInt::tcSetBit(lhsSignificand, bit - 1); 648 } 649 650 APInt::tcShiftLeft(dividend, partsCount, 1); 651 } 652 653 /* Figure out the lost fraction. */ 654 int cmp = APInt::tcCompare(dividend, divisor, partsCount); 655 656 if(cmp > 0) 657 lost_fraction = lfMoreThanHalf; 658 else if(cmp == 0) 659 lost_fraction = lfExactlyHalf; 660 else if(APInt::tcIsZero(dividend, partsCount)) 661 lost_fraction = lfExactlyZero; 662 else 663 lost_fraction = lfLessThanHalf; 664 665 if(partsCount > 2) 666 delete [] dividend; 667 668 return lost_fraction; 669} 670 671unsigned int 672APFloat::significandMSB() const 673{ 674 return APInt::tcMSB(significandParts(), partCount()); 675} 676 677unsigned int 678APFloat::significandLSB() const 679{ 680 return APInt::tcLSB(significandParts(), partCount()); 681} 682 683/* Note that a zero result is NOT normalized to fcZero. */ 684lostFraction 685APFloat::shiftSignificandRight(unsigned int bits) 686{ 687 /* Our exponent should not overflow. */ 688 assert((exponent_t) (exponent + bits) >= exponent); 689 690 exponent += bits; 691 692 return shiftRight(significandParts(), partCount(), bits); 693} 694 695/* Shift the significand left BITS bits, subtract BITS from its exponent. */ 696void 697APFloat::shiftSignificandLeft(unsigned int bits) 698{ 699 assert(bits < semantics->precision); 700 701 if(bits) { 702 unsigned int partsCount = partCount(); 703 704 APInt::tcShiftLeft(significandParts(), partsCount, bits); 705 exponent -= bits; 706 707 assert(!APInt::tcIsZero(significandParts(), partsCount)); 708 } 709} 710 711APFloat::cmpResult 712APFloat::compareAbsoluteValue(const APFloat &rhs) const 713{ 714 int compare; 715 716 assert(semantics == rhs.semantics); 717 assert(category == fcNormal); 718 assert(rhs.category == fcNormal); 719 720 compare = exponent - rhs.exponent; 721 722 /* If exponents are equal, do an unsigned bignum comparison of the 723 significands. */ 724 if(compare == 0) 725 compare = APInt::tcCompare(significandParts(), rhs.significandParts(), 726 partCount()); 727 728 if(compare > 0) 729 return cmpGreaterThan; 730 else if(compare < 0) 731 return cmpLessThan; 732 else 733 return cmpEqual; 734} 735 736/* Handle overflow. Sign is preserved. We either become infinity or 737 the largest finite number. */ 738APFloat::opStatus 739APFloat::handleOverflow(roundingMode rounding_mode) 740{ 741 /* Infinity? */ 742 if(rounding_mode == rmNearestTiesToEven 743 || rounding_mode == rmNearestTiesToAway 744 || (rounding_mode == rmTowardPositive && !sign) 745 || (rounding_mode == rmTowardNegative && sign)) 746 { 747 category = fcInfinity; 748 return (opStatus) (opOverflow | opInexact); 749 } 750 751 /* Otherwise we become the largest finite number. */ 752 category = fcNormal; 753 exponent = semantics->maxExponent; 754 APInt::tcSetLeastSignificantBits(significandParts(), partCount(), 755 semantics->precision); 756 757 return opInexact; 758} 759 760/* Returns TRUE if, when truncating the current number, with BIT the 761 new LSB, with the given lost fraction and rounding mode, the result 762 would need to be rounded away from zero (i.e., by increasing the 763 signficand). This routine must work for fcZero of both signs, and 764 fcNormal numbers. */ 765bool 766APFloat::roundAwayFromZero(roundingMode rounding_mode, 767 lostFraction lost_fraction, 768 unsigned int bit) const 769{ 770 /* NaNs and infinities should not have lost fractions. */ 771 assert(category == fcNormal || category == fcZero); 772 773 /* Current callers never pass this so we don't handle it. */ 774 assert(lost_fraction != lfExactlyZero); 775 776 switch(rounding_mode) { 777 default: 778 assert(0); 779 780 case rmNearestTiesToAway: 781 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; 782 783 case rmNearestTiesToEven: 784 if(lost_fraction == lfMoreThanHalf) 785 return true; 786 787 /* Our zeroes don't have a significand to test. */ 788 if(lost_fraction == lfExactlyHalf && category != fcZero) 789 return APInt::tcExtractBit(significandParts(), bit); 790 791 return false; 792 793 case rmTowardZero: 794 return false; 795 796 case rmTowardPositive: 797 return sign == false; 798 799 case rmTowardNegative: 800 return sign == true; 801 } 802} 803 804APFloat::opStatus 805APFloat::normalize(roundingMode rounding_mode, 806 lostFraction lost_fraction) 807{ 808 unsigned int omsb; /* One, not zero, based MSB. */ 809 int exponentChange; 810 811 if(category != fcNormal) 812 return opOK; 813 814 /* Before rounding normalize the exponent of fcNormal numbers. */ 815 omsb = significandMSB() + 1; 816 817 if(omsb) { 818 /* OMSB is numbered from 1. We want to place it in the integer 819 bit numbered PRECISON if possible, with a compensating change in 820 the exponent. */ 821 exponentChange = omsb - semantics->precision; 822 823 /* If the resulting exponent is too high, overflow according to 824 the rounding mode. */ 825 if(exponent + exponentChange > semantics->maxExponent) 826 return handleOverflow(rounding_mode); 827 828 /* Subnormal numbers have exponent minExponent, and their MSB 829 is forced based on that. */ 830 if(exponent + exponentChange < semantics->minExponent) 831 exponentChange = semantics->minExponent - exponent; 832 833 /* Shifting left is easy as we don't lose precision. */ 834 if(exponentChange < 0) { 835 assert(lost_fraction == lfExactlyZero); 836 837 shiftSignificandLeft(-exponentChange); 838 839 return opOK; 840 } 841 842 if(exponentChange > 0) { 843 lostFraction lf; 844 845 /* Shift right and capture any new lost fraction. */ 846 lf = shiftSignificandRight(exponentChange); 847 848 lost_fraction = combineLostFractions(lf, lost_fraction); 849 850 /* Keep OMSB up-to-date. */ 851 if(omsb > (unsigned) exponentChange) 852 omsb -= (unsigned) exponentChange; 853 else 854 omsb = 0; 855 } 856 } 857 858 /* Now round the number according to rounding_mode given the lost 859 fraction. */ 860 861 /* As specified in IEEE 754, since we do not trap we do not report 862 underflow for exact results. */ 863 if(lost_fraction == lfExactlyZero) { 864 /* Canonicalize zeroes. */ 865 if(omsb == 0) 866 category = fcZero; 867 868 return opOK; 869 } 870 871 /* Increment the significand if we're rounding away from zero. */ 872 if(roundAwayFromZero(rounding_mode, lost_fraction, 0)) { 873 if(omsb == 0) 874 exponent = semantics->minExponent; 875 876 incrementSignificand(); 877 omsb = significandMSB() + 1; 878 879 /* Did the significand increment overflow? */ 880 if(omsb == (unsigned) semantics->precision + 1) { 881 /* Renormalize by incrementing the exponent and shifting our 882 significand right one. However if we already have the 883 maximum exponent we overflow to infinity. */ 884 if(exponent == semantics->maxExponent) { 885 category = fcInfinity; 886 887 return (opStatus) (opOverflow | opInexact); 888 } 889 890 shiftSignificandRight(1); 891 892 return opInexact; 893 } 894 } 895 896 /* The normal case - we were and are not denormal, and any 897 significand increment above didn't overflow. */ 898 if(omsb == semantics->precision) 899 return opInexact; 900 901 /* We have a non-zero denormal. */ 902 assert(omsb < semantics->precision); 903 assert(exponent == semantics->minExponent); 904 905 /* Canonicalize zeroes. */ 906 if(omsb == 0) 907 category = fcZero; 908 909 /* The fcZero case is a denormal that underflowed to zero. */ 910 return (opStatus) (opUnderflow | opInexact); 911} 912 913APFloat::opStatus 914APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract) 915{ 916 switch(convolve(category, rhs.category)) { 917 default: 918 assert(0); 919 920 case convolve(fcNaN, fcZero): 921 case convolve(fcNaN, fcNormal): 922 case convolve(fcNaN, fcInfinity): 923 case convolve(fcNaN, fcNaN): 924 case convolve(fcNormal, fcZero): 925 case convolve(fcInfinity, fcNormal): 926 case convolve(fcInfinity, fcZero): 927 return opOK; 928 929 case convolve(fcZero, fcNaN): 930 case convolve(fcNormal, fcNaN): 931 case convolve(fcInfinity, fcNaN): 932 category = fcNaN; 933 copySignificand(rhs); 934 return opOK; 935 936 case convolve(fcNormal, fcInfinity): 937 case convolve(fcZero, fcInfinity): 938 category = fcInfinity; 939 sign = rhs.sign ^ subtract; 940 return opOK; 941 942 case convolve(fcZero, fcNormal): 943 assign(rhs); 944 sign = rhs.sign ^ subtract; 945 return opOK; 946 947 case convolve(fcZero, fcZero): 948 /* Sign depends on rounding mode; handled by caller. */ 949 return opOK; 950 951 case convolve(fcInfinity, fcInfinity): 952 /* Differently signed infinities can only be validly 953 subtracted. */ 954 if(sign ^ rhs.sign != subtract) { 955 category = fcNaN; 956 // Arbitrary but deterministic value for significand 957 APInt::tcSet(significandParts(), ~0U, partCount()); 958 return opInvalidOp; 959 } 960 961 return opOK; 962 963 case convolve(fcNormal, fcNormal): 964 return opDivByZero; 965 } 966} 967 968/* Add or subtract two normal numbers. */ 969lostFraction 970APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract) 971{ 972 integerPart carry; 973 lostFraction lost_fraction; 974 int bits; 975 976 /* Determine if the operation on the absolute values is effectively 977 an addition or subtraction. */ 978 subtract ^= (sign ^ rhs.sign); 979 980 /* Are we bigger exponent-wise than the RHS? */ 981 bits = exponent - rhs.exponent; 982 983 /* Subtraction is more subtle than one might naively expect. */ 984 if(subtract) { 985 APFloat temp_rhs(rhs); 986 bool reverse; 987 988 if (bits == 0) { 989 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan; 990 lost_fraction = lfExactlyZero; 991 } else if (bits > 0) { 992 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); 993 shiftSignificandLeft(1); 994 reverse = false; 995 } else { 996 lost_fraction = shiftSignificandRight(-bits - 1); 997 temp_rhs.shiftSignificandLeft(1); 998 reverse = true; 999 } 1000 1001 if (reverse) { 1002 carry = temp_rhs.subtractSignificand 1003 (*this, lost_fraction != lfExactlyZero); 1004 copySignificand(temp_rhs); 1005 sign = !sign; 1006 } else { 1007 carry = subtractSignificand 1008 (temp_rhs, lost_fraction != lfExactlyZero); 1009 } 1010 1011 /* Invert the lost fraction - it was on the RHS and 1012 subtracted. */ 1013 if(lost_fraction == lfLessThanHalf) 1014 lost_fraction = lfMoreThanHalf; 1015 else if(lost_fraction == lfMoreThanHalf) 1016 lost_fraction = lfLessThanHalf; 1017 1018 /* The code above is intended to ensure that no borrow is 1019 necessary. */ 1020 assert(!carry); 1021 } else { 1022 if(bits > 0) { 1023 APFloat temp_rhs(rhs); 1024 1025 lost_fraction = temp_rhs.shiftSignificandRight(bits); 1026 carry = addSignificand(temp_rhs); 1027 } else { 1028 lost_fraction = shiftSignificandRight(-bits); 1029 carry = addSignificand(rhs); 1030 } 1031 1032 /* We have a guard bit; generating a carry cannot happen. */ 1033 assert(!carry); 1034 } 1035 1036 return lost_fraction; 1037} 1038 1039APFloat::opStatus 1040APFloat::multiplySpecials(const APFloat &rhs) 1041{ 1042 switch(convolve(category, rhs.category)) { 1043 default: 1044 assert(0); 1045 1046 case convolve(fcNaN, fcZero): 1047 case convolve(fcNaN, fcNormal): 1048 case convolve(fcNaN, fcInfinity): 1049 case convolve(fcNaN, fcNaN): 1050 return opOK; 1051 1052 case convolve(fcZero, fcNaN): 1053 case convolve(fcNormal, fcNaN): 1054 case convolve(fcInfinity, fcNaN): 1055 category = fcNaN; 1056 copySignificand(rhs); 1057 return opOK; 1058 1059 case convolve(fcNormal, fcInfinity): 1060 case convolve(fcInfinity, fcNormal): 1061 case convolve(fcInfinity, fcInfinity): 1062 category = fcInfinity; 1063 return opOK; 1064 1065 case convolve(fcZero, fcNormal): 1066 case convolve(fcNormal, fcZero): 1067 case convolve(fcZero, fcZero): 1068 category = fcZero; 1069 return opOK; 1070 1071 case convolve(fcZero, fcInfinity): 1072 case convolve(fcInfinity, fcZero): 1073 category = fcNaN; 1074 // Arbitrary but deterministic value for significand 1075 APInt::tcSet(significandParts(), ~0U, partCount()); 1076 return opInvalidOp; 1077 1078 case convolve(fcNormal, fcNormal): 1079 return opOK; 1080 } 1081} 1082 1083APFloat::opStatus 1084APFloat::divideSpecials(const APFloat &rhs) 1085{ 1086 switch(convolve(category, rhs.category)) { 1087 default: 1088 assert(0); 1089 1090 case convolve(fcNaN, fcZero): 1091 case convolve(fcNaN, fcNormal): 1092 case convolve(fcNaN, fcInfinity): 1093 case convolve(fcNaN, fcNaN): 1094 case convolve(fcInfinity, fcZero): 1095 case convolve(fcInfinity, fcNormal): 1096 case convolve(fcZero, fcInfinity): 1097 case convolve(fcZero, fcNormal): 1098 return opOK; 1099 1100 case convolve(fcZero, fcNaN): 1101 case convolve(fcNormal, fcNaN): 1102 case convolve(fcInfinity, fcNaN): 1103 category = fcNaN; 1104 copySignificand(rhs); 1105 return opOK; 1106 1107 case convolve(fcNormal, fcInfinity): 1108 category = fcZero; 1109 return opOK; 1110 1111 case convolve(fcNormal, fcZero): 1112 category = fcInfinity; 1113 return opDivByZero; 1114 1115 case convolve(fcInfinity, fcInfinity): 1116 case convolve(fcZero, fcZero): 1117 category = fcNaN; 1118 // Arbitrary but deterministic value for significand 1119 APInt::tcSet(significandParts(), ~0U, partCount()); 1120 return opInvalidOp; 1121 1122 case convolve(fcNormal, fcNormal): 1123 return opOK; 1124 } 1125} 1126 1127/* Change sign. */ 1128void 1129APFloat::changeSign() 1130{ 1131 /* Look mummy, this one's easy. */ 1132 sign = !sign; 1133} 1134 1135void 1136APFloat::clearSign() 1137{ 1138 /* So is this one. */ 1139 sign = 0; 1140} 1141 1142void 1143APFloat::copySign(const APFloat &rhs) 1144{ 1145 /* And this one. */ 1146 sign = rhs.sign; 1147} 1148 1149/* Normalized addition or subtraction. */ 1150APFloat::opStatus 1151APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode, 1152 bool subtract) 1153{ 1154 opStatus fs; 1155 1156 fs = addOrSubtractSpecials(rhs, subtract); 1157 1158 /* This return code means it was not a simple case. */ 1159 if(fs == opDivByZero) { 1160 lostFraction lost_fraction; 1161 1162 lost_fraction = addOrSubtractSignificand(rhs, subtract); 1163 fs = normalize(rounding_mode, lost_fraction); 1164 1165 /* Can only be zero if we lost no fraction. */ 1166 assert(category != fcZero || lost_fraction == lfExactlyZero); 1167 } 1168 1169 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1170 positive zero unless rounding to minus infinity, except that 1171 adding two like-signed zeroes gives that zero. */ 1172 if(category == fcZero) { 1173 if(rhs.category != fcZero || (sign == rhs.sign) == subtract) 1174 sign = (rounding_mode == rmTowardNegative); 1175 } 1176 1177 return fs; 1178} 1179 1180/* Normalized addition. */ 1181APFloat::opStatus 1182APFloat::add(const APFloat &rhs, roundingMode rounding_mode) 1183{ 1184 return addOrSubtract(rhs, rounding_mode, false); 1185} 1186 1187/* Normalized subtraction. */ 1188APFloat::opStatus 1189APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode) 1190{ 1191 return addOrSubtract(rhs, rounding_mode, true); 1192} 1193 1194/* Normalized multiply. */ 1195APFloat::opStatus 1196APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode) 1197{ 1198 opStatus fs; 1199 1200 sign ^= rhs.sign; 1201 fs = multiplySpecials(rhs); 1202 1203 if(category == fcNormal) { 1204 lostFraction lost_fraction = multiplySignificand(rhs, 0); 1205 fs = normalize(rounding_mode, lost_fraction); 1206 if(lost_fraction != lfExactlyZero) 1207 fs = (opStatus) (fs | opInexact); 1208 } 1209 1210 return fs; 1211} 1212 1213/* Normalized divide. */ 1214APFloat::opStatus 1215APFloat::divide(const APFloat &rhs, roundingMode rounding_mode) 1216{ 1217 opStatus fs; 1218 1219 sign ^= rhs.sign; 1220 fs = divideSpecials(rhs); 1221 1222 if(category == fcNormal) { 1223 lostFraction lost_fraction = divideSignificand(rhs); 1224 fs = normalize(rounding_mode, lost_fraction); 1225 if(lost_fraction != lfExactlyZero) 1226 fs = (opStatus) (fs | opInexact); 1227 } 1228 1229 return fs; 1230} 1231 1232/* Normalized remainder. This is not currently doing TRT. */ 1233APFloat::opStatus 1234APFloat::mod(const APFloat &rhs, roundingMode rounding_mode) 1235{ 1236 opStatus fs; 1237 APFloat V = *this; 1238 unsigned int origSign = sign; 1239 fs = V.divide(rhs, rmNearestTiesToEven); 1240 if (fs == opDivByZero) 1241 return fs; 1242 1243 int parts = partCount(); 1244 integerPart *x = new integerPart[parts]; 1245 fs = V.convertToInteger(x, parts * integerPartWidth, true, 1246 rmNearestTiesToEven); 1247 if (fs==opInvalidOp) 1248 return fs; 1249 1250 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true, 1251 rmNearestTiesToEven); 1252 assert(fs==opOK); // should always work 1253 1254 fs = V.multiply(rhs, rounding_mode); 1255 assert(fs==opOK || fs==opInexact); // should not overflow or underflow 1256 1257 fs = subtract(V, rounding_mode); 1258 assert(fs==opOK || fs==opInexact); // likewise 1259 1260 if (isZero()) 1261 sign = origSign; // IEEE754 requires this 1262 delete[] x; 1263 return fs; 1264} 1265 1266/* Normalized fused-multiply-add. */ 1267APFloat::opStatus 1268APFloat::fusedMultiplyAdd(const APFloat &multiplicand, 1269 const APFloat &addend, 1270 roundingMode rounding_mode) 1271{ 1272 opStatus fs; 1273 1274 /* Post-multiplication sign, before addition. */ 1275 sign ^= multiplicand.sign; 1276 1277 /* If and only if all arguments are normal do we need to do an 1278 extended-precision calculation. */ 1279 if(category == fcNormal 1280 && multiplicand.category == fcNormal 1281 && addend.category == fcNormal) { 1282 lostFraction lost_fraction; 1283 1284 lost_fraction = multiplySignificand(multiplicand, &addend); 1285 fs = normalize(rounding_mode, lost_fraction); 1286 if(lost_fraction != lfExactlyZero) 1287 fs = (opStatus) (fs | opInexact); 1288 1289 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1290 positive zero unless rounding to minus infinity, except that 1291 adding two like-signed zeroes gives that zero. */ 1292 if(category == fcZero && sign != addend.sign) 1293 sign = (rounding_mode == rmTowardNegative); 1294 } else { 1295 fs = multiplySpecials(multiplicand); 1296 1297 /* FS can only be opOK or opInvalidOp. There is no more work 1298 to do in the latter case. The IEEE-754R standard says it is 1299 implementation-defined in this case whether, if ADDEND is a 1300 quiet NaN, we raise invalid op; this implementation does so. 1301 1302 If we need to do the addition we can do so with normal 1303 precision. */ 1304 if(fs == opOK) 1305 fs = addOrSubtract(addend, rounding_mode, false); 1306 } 1307 1308 return fs; 1309} 1310 1311/* Comparison requires normalized numbers. */ 1312APFloat::cmpResult 1313APFloat::compare(const APFloat &rhs) const 1314{ 1315 cmpResult result; 1316 1317 assert(semantics == rhs.semantics); 1318 1319 switch(convolve(category, rhs.category)) { 1320 default: 1321 assert(0); 1322 1323 case convolve(fcNaN, fcZero): 1324 case convolve(fcNaN, fcNormal): 1325 case convolve(fcNaN, fcInfinity): 1326 case convolve(fcNaN, fcNaN): 1327 case convolve(fcZero, fcNaN): 1328 case convolve(fcNormal, fcNaN): 1329 case convolve(fcInfinity, fcNaN): 1330 return cmpUnordered; 1331 1332 case convolve(fcInfinity, fcNormal): 1333 case convolve(fcInfinity, fcZero): 1334 case convolve(fcNormal, fcZero): 1335 if(sign) 1336 return cmpLessThan; 1337 else 1338 return cmpGreaterThan; 1339 1340 case convolve(fcNormal, fcInfinity): 1341 case convolve(fcZero, fcInfinity): 1342 case convolve(fcZero, fcNormal): 1343 if(rhs.sign) 1344 return cmpGreaterThan; 1345 else 1346 return cmpLessThan; 1347 1348 case convolve(fcInfinity, fcInfinity): 1349 if(sign == rhs.sign) 1350 return cmpEqual; 1351 else if(sign) 1352 return cmpLessThan; 1353 else 1354 return cmpGreaterThan; 1355 1356 case convolve(fcZero, fcZero): 1357 return cmpEqual; 1358 1359 case convolve(fcNormal, fcNormal): 1360 break; 1361 } 1362 1363 /* Two normal numbers. Do they have the same sign? */ 1364 if(sign != rhs.sign) { 1365 if(sign) 1366 result = cmpLessThan; 1367 else 1368 result = cmpGreaterThan; 1369 } else { 1370 /* Compare absolute values; invert result if negative. */ 1371 result = compareAbsoluteValue(rhs); 1372 1373 if(sign) { 1374 if(result == cmpLessThan) 1375 result = cmpGreaterThan; 1376 else if(result == cmpGreaterThan) 1377 result = cmpLessThan; 1378 } 1379 } 1380 1381 return result; 1382} 1383 1384APFloat::opStatus 1385APFloat::convert(const fltSemantics &toSemantics, 1386 roundingMode rounding_mode) 1387{ 1388 lostFraction lostFraction; 1389 unsigned int newPartCount, oldPartCount; 1390 opStatus fs; 1391 1392 lostFraction = lfExactlyZero; 1393 newPartCount = partCountForBits(toSemantics.precision + 1); 1394 oldPartCount = partCount(); 1395 1396 /* Handle storage complications. If our new form is wider, 1397 re-allocate our bit pattern into wider storage. If it is 1398 narrower, we ignore the excess parts, but if narrowing to a 1399 single part we need to free the old storage. 1400 Be careful not to reference significandParts for zeroes 1401 and infinities, since it aborts. */ 1402 if (newPartCount > oldPartCount) { 1403 integerPart *newParts; 1404 newParts = new integerPart[newPartCount]; 1405 APInt::tcSet(newParts, 0, newPartCount); 1406 if (category==fcNormal || category==fcNaN) 1407 APInt::tcAssign(newParts, significandParts(), oldPartCount); 1408 freeSignificand(); 1409 significand.parts = newParts; 1410 } else if (newPartCount < oldPartCount) { 1411 /* Capture any lost fraction through truncation of parts so we get 1412 correct rounding whilst normalizing. */ 1413 if (category==fcNormal) 1414 lostFraction = lostFractionThroughTruncation 1415 (significandParts(), oldPartCount, toSemantics.precision); 1416 if (newPartCount == 1) { 1417 integerPart newPart = 0; 1418 if (category==fcNormal || category==fcNaN) 1419 newPart = significandParts()[0]; 1420 freeSignificand(); 1421 significand.part = newPart; 1422 } 1423 } 1424 1425 if(category == fcNormal) { 1426 /* Re-interpret our bit-pattern. */ 1427 exponent += toSemantics.precision - semantics->precision; 1428 semantics = &toSemantics; 1429 fs = normalize(rounding_mode, lostFraction); 1430 } else if (category == fcNaN) { 1431 int shift = toSemantics.precision - semantics->precision; 1432 // No normalization here, just truncate 1433 if (shift>0) 1434 APInt::tcShiftLeft(significandParts(), newPartCount, shift); 1435 else if (shift < 0) 1436 APInt::tcShiftRight(significandParts(), newPartCount, -shift); 1437 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan) 1438 // does not give you back the same bits. This is dubious, and we 1439 // don't currently do it. You're really supposed to get 1440 // an invalid operation signal at runtime, but nobody does that. 1441 semantics = &toSemantics; 1442 fs = opOK; 1443 } else { 1444 semantics = &toSemantics; 1445 fs = opOK; 1446 } 1447 1448 return fs; 1449} 1450 1451/* Convert a floating point number to an integer according to the 1452 rounding mode. If the rounded integer value is out of range this 1453 returns an invalid operation exception. If the rounded value is in 1454 range but the floating point number is not the exact integer, the C 1455 standard doesn't require an inexact exception to be raised. IEEE 1456 854 does require it so we do that. 1457 1458 Note that for conversions to integer type the C standard requires 1459 round-to-zero to always be used. */ 1460APFloat::opStatus 1461APFloat::convertToInteger(integerPart *parts, unsigned int width, 1462 bool isSigned, 1463 roundingMode rounding_mode) const 1464{ 1465 lostFraction lost_fraction; 1466 unsigned int msb, partsCount; 1467 int bits; 1468 1469 partsCount = partCountForBits(width); 1470 1471 /* Handle the three special cases first. We produce 1472 a deterministic result even for the Invalid cases. */ 1473 if (category == fcNaN) { 1474 // Neither sign nor isSigned affects this. 1475 APInt::tcSet(parts, 0, partsCount); 1476 return opInvalidOp; 1477 } 1478 if (category == fcInfinity) { 1479 if (!sign && isSigned) 1480 APInt::tcSetLeastSignificantBits(parts, partsCount, width-1); 1481 else if (!sign && !isSigned) 1482 APInt::tcSetLeastSignificantBits(parts, partsCount, width); 1483 else if (sign && isSigned) { 1484 APInt::tcSetLeastSignificantBits(parts, partsCount, 1); 1485 APInt::tcShiftLeft(parts, partsCount, width-1); 1486 } else // sign && !isSigned 1487 APInt::tcSet(parts, 0, partsCount); 1488 return opInvalidOp; 1489 } 1490 if (category == fcZero) { 1491 APInt::tcSet(parts, 0, partsCount); 1492 return opOK; 1493 } 1494 1495 /* Shift the bit pattern so the fraction is lost. */ 1496 APFloat tmp(*this); 1497 1498 bits = (int) semantics->precision - 1 - exponent; 1499 1500 if(bits > 0) { 1501 lost_fraction = tmp.shiftSignificandRight(bits); 1502 } else { 1503 if (-bits >= semantics->precision) { 1504 // Unrepresentably large. 1505 if (!sign && isSigned) 1506 APInt::tcSetLeastSignificantBits(parts, partsCount, width-1); 1507 else if (!sign && !isSigned) 1508 APInt::tcSetLeastSignificantBits(parts, partsCount, width); 1509 else if (sign && isSigned) { 1510 APInt::tcSetLeastSignificantBits(parts, partsCount, 1); 1511 APInt::tcShiftLeft(parts, partsCount, width-1); 1512 } else // sign && !isSigned 1513 APInt::tcSet(parts, 0, partsCount); 1514 return (opStatus)(opOverflow | opInexact); 1515 } 1516 tmp.shiftSignificandLeft(-bits); 1517 lost_fraction = lfExactlyZero; 1518 } 1519 1520 if(lost_fraction != lfExactlyZero 1521 && tmp.roundAwayFromZero(rounding_mode, lost_fraction, 0)) 1522 tmp.incrementSignificand(); 1523 1524 msb = tmp.significandMSB(); 1525 1526 /* Negative numbers cannot be represented as unsigned. */ 1527 if(!isSigned && tmp.sign && msb != -1U) 1528 return opInvalidOp; 1529 1530 /* It takes exponent + 1 bits to represent the truncated floating 1531 point number without its sign. We lose a bit for the sign, but 1532 the maximally negative integer is a special case. */ 1533 if(msb + 1 > width) /* !! Not same as msb >= width !! */ 1534 return opInvalidOp; 1535 1536 if(isSigned && msb + 1 == width 1537 && (!tmp.sign || tmp.significandLSB() != msb)) 1538 return opInvalidOp; 1539 1540 APInt::tcAssign(parts, tmp.significandParts(), partsCount); 1541 1542 if(tmp.sign) 1543 APInt::tcNegate(parts, partsCount); 1544 1545 if(lost_fraction == lfExactlyZero) 1546 return opOK; 1547 else 1548 return opInexact; 1549} 1550 1551/* Convert an unsigned integer SRC to a floating point number, 1552 rounding according to ROUNDING_MODE. The sign of the floating 1553 point number is not modified. */ 1554APFloat::opStatus 1555APFloat::convertFromUnsignedParts(const integerPart *src, 1556 unsigned int srcCount, 1557 roundingMode rounding_mode) 1558{ 1559 unsigned int dstCount; 1560 lostFraction lost_fraction; 1561 integerPart *dst; 1562 1563 category = fcNormal; 1564 exponent = semantics->precision - 1; 1565 1566 dst = significandParts(); 1567 dstCount = partCount(); 1568 1569 /* We need to capture the non-zero most significant parts. */ 1570 while (srcCount > dstCount && src[srcCount - 1] == 0) 1571 srcCount--; 1572 1573 /* Copy the bit image of as many parts as we can. If we are wider, 1574 zero-out remaining parts. */ 1575 if (dstCount >= srcCount) { 1576 APInt::tcAssign(dst, src, srcCount); 1577 while (srcCount < dstCount) 1578 dst[srcCount++] = 0; 1579 lost_fraction = lfExactlyZero; 1580 } else { 1581 exponent += (srcCount - dstCount) * integerPartWidth; 1582 APInt::tcAssign(dst, src + (srcCount - dstCount), dstCount); 1583 lost_fraction = lostFractionThroughTruncation(src, srcCount, 1584 dstCount * integerPartWidth); 1585 } 1586 1587 return normalize(rounding_mode, lost_fraction); 1588} 1589 1590/* FIXME: should this just take a const APInt reference? */ 1591APFloat::opStatus 1592APFloat::convertFromZeroExtendedInteger(const integerPart *parts, 1593 unsigned int width, bool isSigned, 1594 roundingMode rounding_mode) 1595{ 1596 unsigned int partCount = partCountForBits(width); 1597 APInt api = APInt(width, partCount, parts); 1598 1599 sign = false; 1600 if(isSigned && APInt::tcExtractBit(parts, width - 1)) { 1601 sign = true; 1602 api = -api; 1603 } 1604 1605 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 1606} 1607 1608APFloat::opStatus 1609APFloat::convertFromHexadecimalString(const char *p, 1610 roundingMode rounding_mode) 1611{ 1612 lostFraction lost_fraction; 1613 integerPart *significand; 1614 unsigned int bitPos, partsCount; 1615 const char *dot, *firstSignificantDigit; 1616 1617 zeroSignificand(); 1618 exponent = 0; 1619 category = fcNormal; 1620 1621 significand = significandParts(); 1622 partsCount = partCount(); 1623 bitPos = partsCount * integerPartWidth; 1624 1625 /* Skip leading zeroes and any (hexa)decimal point. */ 1626 p = skipLeadingZeroesAndAnyDot(p, &dot); 1627 firstSignificantDigit = p; 1628 1629 for(;;) { 1630 integerPart hex_value; 1631 1632 if(*p == '.') { 1633 assert(dot == 0); 1634 dot = p++; 1635 } 1636 1637 hex_value = hexDigitValue(*p); 1638 if(hex_value == -1U) { 1639 lost_fraction = lfExactlyZero; 1640 break; 1641 } 1642 1643 p++; 1644 1645 /* Store the number whilst 4-bit nibbles remain. */ 1646 if(bitPos) { 1647 bitPos -= 4; 1648 hex_value <<= bitPos % integerPartWidth; 1649 significand[bitPos / integerPartWidth] |= hex_value; 1650 } else { 1651 lost_fraction = trailingHexadecimalFraction(p, hex_value); 1652 while(hexDigitValue(*p) != -1U) 1653 p++; 1654 break; 1655 } 1656 } 1657 1658 /* Hex floats require an exponent but not a hexadecimal point. */ 1659 assert(*p == 'p' || *p == 'P'); 1660 1661 /* Ignore the exponent if we are zero. */ 1662 if(p != firstSignificantDigit) { 1663 int expAdjustment; 1664 1665 /* Implicit hexadecimal point? */ 1666 if(!dot) 1667 dot = p; 1668 1669 /* Calculate the exponent adjustment implicit in the number of 1670 significant digits. */ 1671 expAdjustment = dot - firstSignificantDigit; 1672 if(expAdjustment < 0) 1673 expAdjustment++; 1674 expAdjustment = expAdjustment * 4 - 1; 1675 1676 /* Adjust for writing the significand starting at the most 1677 significant nibble. */ 1678 expAdjustment += semantics->precision; 1679 expAdjustment -= partsCount * integerPartWidth; 1680 1681 /* Adjust for the given exponent. */ 1682 exponent = totalExponent(p, expAdjustment); 1683 } 1684 1685 return normalize(rounding_mode, lost_fraction); 1686} 1687 1688APFloat::opStatus 1689APFloat::convertFromString(const char *p, roundingMode rounding_mode) 1690{ 1691 /* Handle a leading minus sign. */ 1692 if(*p == '-') 1693 sign = 1, p++; 1694 else 1695 sign = 0; 1696 1697 if(p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) 1698 return convertFromHexadecimalString(p + 2, rounding_mode); 1699 1700 assert(0 && "Decimal to binary conversions not yet implemented"); 1701 abort(); 1702} 1703 1704/* Write out a hexadecimal representation of the floating point value 1705 to DST, which must be of sufficient size, in the C99 form 1706 [-]0xh.hhhhp[+-]d. Return the number of characters written, 1707 excluding the terminating NUL. 1708 1709 If UPPERCASE, the output is in upper case, otherwise in lower case. 1710 1711 HEXDIGITS digits appear altogether, rounding the value if 1712 necessary. If HEXDIGITS is 0, the minimal precision to display the 1713 number precisely is used instead. If nothing would appear after 1714 the decimal point it is suppressed. 1715 1716 The decimal exponent is always printed and has at least one digit. 1717 Zero values display an exponent of zero. Infinities and NaNs 1718 appear as "infinity" or "nan" respectively. 1719 1720 The above rules are as specified by C99. There is ambiguity about 1721 what the leading hexadecimal digit should be. This implementation 1722 uses whatever is necessary so that the exponent is displayed as 1723 stored. This implies the exponent will fall within the IEEE format 1724 range, and the leading hexadecimal digit will be 0 (for denormals), 1725 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with 1726 any other digits zero). 1727*/ 1728unsigned int 1729APFloat::convertToHexString(char *dst, unsigned int hexDigits, 1730 bool upperCase, roundingMode rounding_mode) const 1731{ 1732 char *p; 1733 1734 p = dst; 1735 if (sign) 1736 *dst++ = '-'; 1737 1738 switch (category) { 1739 case fcInfinity: 1740 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); 1741 dst += sizeof infinityL - 1; 1742 break; 1743 1744 case fcNaN: 1745 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); 1746 dst += sizeof NaNU - 1; 1747 break; 1748 1749 case fcZero: 1750 *dst++ = '0'; 1751 *dst++ = upperCase ? 'X': 'x'; 1752 *dst++ = '0'; 1753 if (hexDigits > 1) { 1754 *dst++ = '.'; 1755 memset (dst, '0', hexDigits - 1); 1756 dst += hexDigits - 1; 1757 } 1758 *dst++ = upperCase ? 'P': 'p'; 1759 *dst++ = '0'; 1760 break; 1761 1762 case fcNormal: 1763 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); 1764 break; 1765 } 1766 1767 *dst = 0; 1768 1769 return dst - p; 1770} 1771 1772/* Does the hard work of outputting the correctly rounded hexadecimal 1773 form of a normal floating point number with the specified number of 1774 hexadecimal digits. If HEXDIGITS is zero the minimum number of 1775 digits necessary to print the value precisely is output. */ 1776char * 1777APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, 1778 bool upperCase, 1779 roundingMode rounding_mode) const 1780{ 1781 unsigned int count, valueBits, shift, partsCount, outputDigits; 1782 const char *hexDigitChars; 1783 const integerPart *significand; 1784 char *p; 1785 bool roundUp; 1786 1787 *dst++ = '0'; 1788 *dst++ = upperCase ? 'X': 'x'; 1789 1790 roundUp = false; 1791 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower; 1792 1793 significand = significandParts(); 1794 partsCount = partCount(); 1795 1796 /* +3 because the first digit only uses the single integer bit, so 1797 we have 3 virtual zero most-significant-bits. */ 1798 valueBits = semantics->precision + 3; 1799 shift = integerPartWidth - valueBits % integerPartWidth; 1800 1801 /* The natural number of digits required ignoring trailing 1802 insignificant zeroes. */ 1803 outputDigits = (valueBits - significandLSB () + 3) / 4; 1804 1805 /* hexDigits of zero means use the required number for the 1806 precision. Otherwise, see if we are truncating. If we are, 1807 find out if we need to round away from zero. */ 1808 if (hexDigits) { 1809 if (hexDigits < outputDigits) { 1810 /* We are dropping non-zero bits, so need to check how to round. 1811 "bits" is the number of dropped bits. */ 1812 unsigned int bits; 1813 lostFraction fraction; 1814 1815 bits = valueBits - hexDigits * 4; 1816 fraction = lostFractionThroughTruncation (significand, partsCount, bits); 1817 roundUp = roundAwayFromZero(rounding_mode, fraction, bits); 1818 } 1819 outputDigits = hexDigits; 1820 } 1821 1822 /* Write the digits consecutively, and start writing in the location 1823 of the hexadecimal point. We move the most significant digit 1824 left and add the hexadecimal point later. */ 1825 p = ++dst; 1826 1827 count = (valueBits + integerPartWidth - 1) / integerPartWidth; 1828 1829 while (outputDigits && count) { 1830 integerPart part; 1831 1832 /* Put the most significant integerPartWidth bits in "part". */ 1833 if (--count == partsCount) 1834 part = 0; /* An imaginary higher zero part. */ 1835 else 1836 part = significand[count] << shift; 1837 1838 if (count && shift) 1839 part |= significand[count - 1] >> (integerPartWidth - shift); 1840 1841 /* Convert as much of "part" to hexdigits as we can. */ 1842 unsigned int curDigits = integerPartWidth / 4; 1843 1844 if (curDigits > outputDigits) 1845 curDigits = outputDigits; 1846 dst += partAsHex (dst, part, curDigits, hexDigitChars); 1847 outputDigits -= curDigits; 1848 } 1849 1850 if (roundUp) { 1851 char *q = dst; 1852 1853 /* Note that hexDigitChars has a trailing '0'. */ 1854 do { 1855 q--; 1856 *q = hexDigitChars[hexDigitValue (*q) + 1]; 1857 } while (*q == '0'); 1858 assert (q >= p); 1859 } else { 1860 /* Add trailing zeroes. */ 1861 memset (dst, '0', outputDigits); 1862 dst += outputDigits; 1863 } 1864 1865 /* Move the most significant digit to before the point, and if there 1866 is something after the decimal point add it. This must come 1867 after rounding above. */ 1868 p[-1] = p[0]; 1869 if (dst -1 == p) 1870 dst--; 1871 else 1872 p[0] = '.'; 1873 1874 /* Finally output the exponent. */ 1875 *dst++ = upperCase ? 'P': 'p'; 1876 1877 return writeSignedDecimal (dst, exponent); 1878} 1879 1880// For good performance it is desirable for different APFloats 1881// to produce different integers. 1882uint32_t 1883APFloat::getHashValue() const 1884{ 1885 if (category==fcZero) return sign<<8 | semantics->precision ; 1886 else if (category==fcInfinity) return sign<<9 | semantics->precision; 1887 else if (category==fcNaN) return 1<<10 | semantics->precision; 1888 else { 1889 uint32_t hash = sign<<11 | semantics->precision | exponent<<12; 1890 const integerPart* p = significandParts(); 1891 for (int i=partCount(); i>0; i--, p++) 1892 hash ^= ((uint32_t)*p) ^ (*p)>>32; 1893 return hash; 1894 } 1895} 1896 1897// Conversion from APFloat to/from host float/double. It may eventually be 1898// possible to eliminate these and have everybody deal with APFloats, but that 1899// will take a while. This approach will not easily extend to long double. 1900// Current implementation requires integerPartWidth==64, which is correct at 1901// the moment but could be made more general. 1902 1903// Denormals have exponent minExponent in APFloat, but minExponent-1 in 1904// the actual IEEE respresentations. We compensate for that here. 1905 1906APInt 1907APFloat::convertF80LongDoubleAPFloatToAPInt() const 1908{ 1909 assert(semantics == (const llvm::fltSemantics* const)&x87DoubleExtended); 1910 assert (partCount()==2); 1911 1912 uint64_t myexponent, mysignificand; 1913 1914 if (category==fcNormal) { 1915 myexponent = exponent+16383; //bias 1916 mysignificand = significandParts()[0]; 1917 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) 1918 myexponent = 0; // denormal 1919 } else if (category==fcZero) { 1920 myexponent = 0; 1921 mysignificand = 0; 1922 } else if (category==fcInfinity) { 1923 myexponent = 0x7fff; 1924 mysignificand = 0x8000000000000000ULL; 1925 } else { 1926 assert(category == fcNaN && "Unknown category"); 1927 myexponent = 0x7fff; 1928 mysignificand = significandParts()[0]; 1929 } 1930 1931 uint64_t words[2]; 1932 words[0] = (((uint64_t)sign & 1) << 63) | 1933 ((myexponent & 0x7fff) << 48) | 1934 ((mysignificand >>16) & 0xffffffffffffLL); 1935 words[1] = mysignificand & 0xffff; 1936 return APInt(80, 2, words); 1937} 1938 1939APInt 1940APFloat::convertDoubleAPFloatToAPInt() const 1941{ 1942 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble); 1943 assert (partCount()==1); 1944 1945 uint64_t myexponent, mysignificand; 1946 1947 if (category==fcNormal) { 1948 myexponent = exponent+1023; //bias 1949 mysignificand = *significandParts(); 1950 if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) 1951 myexponent = 0; // denormal 1952 } else if (category==fcZero) { 1953 myexponent = 0; 1954 mysignificand = 0; 1955 } else if (category==fcInfinity) { 1956 myexponent = 0x7ff; 1957 mysignificand = 0; 1958 } else { 1959 assert(category == fcNaN && "Unknown category!"); 1960 myexponent = 0x7ff; 1961 mysignificand = *significandParts(); 1962 } 1963 1964 return APInt(64, (((((uint64_t)sign & 1) << 63) | 1965 ((myexponent & 0x7ff) << 52) | 1966 (mysignificand & 0xfffffffffffffLL)))); 1967} 1968 1969APInt 1970APFloat::convertFloatAPFloatToAPInt() const 1971{ 1972 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle); 1973 assert (partCount()==1); 1974 1975 uint32_t myexponent, mysignificand; 1976 1977 if (category==fcNormal) { 1978 myexponent = exponent+127; //bias 1979 mysignificand = *significandParts(); 1980 if (myexponent == 1 && !(mysignificand & 0x400000)) 1981 myexponent = 0; // denormal 1982 } else if (category==fcZero) { 1983 myexponent = 0; 1984 mysignificand = 0; 1985 } else if (category==fcInfinity) { 1986 myexponent = 0xff; 1987 mysignificand = 0; 1988 } else { 1989 assert(category == fcNaN && "Unknown category!"); 1990 myexponent = 0xff; 1991 mysignificand = *significandParts(); 1992 } 1993 1994 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) | 1995 (mysignificand & 0x7fffff))); 1996} 1997 1998APInt 1999APFloat::convertToAPInt() const 2000{ 2001 if (semantics == (const llvm::fltSemantics* const)&IEEEsingle) 2002 return convertFloatAPFloatToAPInt(); 2003 2004 if (semantics == (const llvm::fltSemantics* const)&IEEEdouble) 2005 return convertDoubleAPFloatToAPInt(); 2006 2007 assert(semantics == (const llvm::fltSemantics* const)&x87DoubleExtended && 2008 "unknown format!"); 2009 return convertF80LongDoubleAPFloatToAPInt(); 2010} 2011 2012float 2013APFloat::convertToFloat() const 2014{ 2015 assert(semantics == (const llvm::fltSemantics* const)&IEEEsingle); 2016 APInt api = convertToAPInt(); 2017 return api.bitsToFloat(); 2018} 2019 2020double 2021APFloat::convertToDouble() const 2022{ 2023 assert(semantics == (const llvm::fltSemantics* const)&IEEEdouble); 2024 APInt api = convertToAPInt(); 2025 return api.bitsToDouble(); 2026} 2027 2028/// Integer bit is explicit in this format. Current Intel book does not 2029/// define meaning of: 2030/// exponent = all 1's, integer bit not set. 2031/// exponent = 0, integer bit set. (formerly "psuedodenormals") 2032/// exponent!=0 nor all 1's, integer bit not set. (formerly "unnormals") 2033void 2034APFloat::initFromF80LongDoubleAPInt(const APInt &api) 2035{ 2036 assert(api.getBitWidth()==80); 2037 uint64_t i1 = api.getRawData()[0]; 2038 uint64_t i2 = api.getRawData()[1]; 2039 uint64_t myexponent = (i1 >> 48) & 0x7fff; 2040 uint64_t mysignificand = ((i1 << 16) & 0xffffffffffff0000ULL) | 2041 (i2 & 0xffff); 2042 2043 initialize(&APFloat::x87DoubleExtended); 2044 assert(partCount()==2); 2045 2046 sign = i1>>63; 2047 if (myexponent==0 && mysignificand==0) { 2048 // exponent, significand meaningless 2049 category = fcZero; 2050 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { 2051 // exponent, significand meaningless 2052 category = fcInfinity; 2053 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) { 2054 // exponent meaningless 2055 category = fcNaN; 2056 significandParts()[0] = mysignificand; 2057 significandParts()[1] = 0; 2058 } else { 2059 category = fcNormal; 2060 exponent = myexponent - 16383; 2061 significandParts()[0] = mysignificand; 2062 significandParts()[1] = 0; 2063 if (myexponent==0) // denormal 2064 exponent = -16382; 2065 } 2066} 2067 2068void 2069APFloat::initFromDoubleAPInt(const APInt &api) 2070{ 2071 assert(api.getBitWidth()==64); 2072 uint64_t i = *api.getRawData(); 2073 uint64_t myexponent = (i >> 52) & 0x7ff; 2074 uint64_t mysignificand = i & 0xfffffffffffffLL; 2075 2076 initialize(&APFloat::IEEEdouble); 2077 assert(partCount()==1); 2078 2079 sign = i>>63; 2080 if (myexponent==0 && mysignificand==0) { 2081 // exponent, significand meaningless 2082 category = fcZero; 2083 } else if (myexponent==0x7ff && mysignificand==0) { 2084 // exponent, significand meaningless 2085 category = fcInfinity; 2086 } else if (myexponent==0x7ff && mysignificand!=0) { 2087 // exponent meaningless 2088 category = fcNaN; 2089 *significandParts() = mysignificand; 2090 } else { 2091 category = fcNormal; 2092 exponent = myexponent - 1023; 2093 *significandParts() = mysignificand; 2094 if (myexponent==0) // denormal 2095 exponent = -1022; 2096 else 2097 *significandParts() |= 0x10000000000000LL; // integer bit 2098 } 2099} 2100 2101void 2102APFloat::initFromFloatAPInt(const APInt & api) 2103{ 2104 assert(api.getBitWidth()==32); 2105 uint32_t i = (uint32_t)*api.getRawData(); 2106 uint32_t myexponent = (i >> 23) & 0xff; 2107 uint32_t mysignificand = i & 0x7fffff; 2108 2109 initialize(&APFloat::IEEEsingle); 2110 assert(partCount()==1); 2111 2112 sign = i >> 31; 2113 if (myexponent==0 && mysignificand==0) { 2114 // exponent, significand meaningless 2115 category = fcZero; 2116 } else if (myexponent==0xff && mysignificand==0) { 2117 // exponent, significand meaningless 2118 category = fcInfinity; 2119 } else if (myexponent==0xff && mysignificand!=0) { 2120 // sign, exponent, significand meaningless 2121 category = fcNaN; 2122 *significandParts() = mysignificand; 2123 } else { 2124 category = fcNormal; 2125 exponent = myexponent - 127; //bias 2126 *significandParts() = mysignificand; 2127 if (myexponent==0) // denormal 2128 exponent = -126; 2129 else 2130 *significandParts() |= 0x800000; // integer bit 2131 } 2132} 2133 2134/// Treat api as containing the bits of a floating point number. Currently 2135/// we infer the floating point type from the size of the APInt. FIXME: This 2136/// breaks when we get to PPC128 and IEEE128 (but both cannot exist in the 2137/// same compile...) 2138void 2139APFloat::initFromAPInt(const APInt& api) 2140{ 2141 if (api.getBitWidth() == 32) 2142 return initFromFloatAPInt(api); 2143 else if (api.getBitWidth()==64) 2144 return initFromDoubleAPInt(api); 2145 else if (api.getBitWidth()==80) 2146 return initFromF80LongDoubleAPInt(api); 2147 else 2148 assert(0); 2149} 2150 2151APFloat::APFloat(const APInt& api) 2152{ 2153 initFromAPInt(api); 2154} 2155 2156APFloat::APFloat(float f) 2157{ 2158 APInt api = APInt(32, 0); 2159 initFromAPInt(api.floatToBits(f)); 2160} 2161 2162APFloat::APFloat(double d) 2163{ 2164 APInt api = APInt(64, 0); 2165 initFromAPInt(api.doubleToBits(d)); 2166} 2167