APFloat.cpp revision a11ef82207cd7ab8a2d39c946bf6dc34636f2904
1//===-- APFloat.cpp - Implement APFloat class -----------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Neil Booth and is distributed under the 6// University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements a class to represent arbitrary precision floating 11// point values and provide a variety of arithmetic operations on them. 12// 13//===----------------------------------------------------------------------===// 14 15#include <cassert> 16#include <cstring> 17#include "llvm/ADT/APFloat.h" 18#include "llvm/Support/MathExtras.h" 19 20using namespace llvm; 21 22#define convolve(lhs, rhs) ((lhs) * 4 + (rhs)) 23 24/* Assumed in hexadecimal significand parsing, and conversion to 25 hexadecimal strings. */ 26COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0); 27 28namespace llvm { 29 30 /* Represents floating point arithmetic semantics. */ 31 struct fltSemantics { 32 /* The largest E such that 2^E is representable; this matches the 33 definition of IEEE 754. */ 34 exponent_t maxExponent; 35 36 /* The smallest E such that 2^E is a normalized number; this 37 matches the definition of IEEE 754. */ 38 exponent_t minExponent; 39 40 /* Number of bits in the significand. This includes the integer 41 bit. */ 42 unsigned char precision; 43 44 /* If the target format has an implicit integer bit. */ 45 bool implicitIntegerBit; 46 }; 47 48 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true }; 49 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true }; 50 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true }; 51 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, false }; 52 const fltSemantics APFloat::Bogus = { 0, 0, 0, false }; 53} 54 55/* Put a bunch of private, handy routines in an anonymous namespace. */ 56namespace { 57 58 inline unsigned int 59 partCountForBits(unsigned int bits) 60 { 61 return ((bits) + integerPartWidth - 1) / integerPartWidth; 62 } 63 64 unsigned int 65 digitValue(unsigned int c) 66 { 67 unsigned int r; 68 69 r = c - '0'; 70 if(r <= 9) 71 return r; 72 73 return -1U; 74 } 75 76 unsigned int 77 hexDigitValue (unsigned int c) 78 { 79 unsigned int r; 80 81 r = c - '0'; 82 if(r <= 9) 83 return r; 84 85 r = c - 'A'; 86 if(r <= 5) 87 return r + 10; 88 89 r = c - 'a'; 90 if(r <= 5) 91 return r + 10; 92 93 return -1U; 94 } 95 96 /* This is ugly and needs cleaning up, but I don't immediately see 97 how whilst remaining safe. */ 98 static int 99 totalExponent(const char *p, int exponentAdjustment) 100 { 101 integerPart unsignedExponent; 102 bool negative, overflow; 103 long exponent; 104 105 /* Move past the exponent letter and sign to the digits. */ 106 p++; 107 negative = *p == '-'; 108 if(*p == '-' || *p == '+') 109 p++; 110 111 unsignedExponent = 0; 112 overflow = false; 113 for(;;) { 114 unsigned int value; 115 116 value = digitValue(*p); 117 if(value == -1U) 118 break; 119 120 p++; 121 unsignedExponent = unsignedExponent * 10 + value; 122 if(unsignedExponent > 65535) 123 overflow = true; 124 } 125 126 if(exponentAdjustment > 65535 || exponentAdjustment < -65536) 127 overflow = true; 128 129 if(!overflow) { 130 exponent = unsignedExponent; 131 if(negative) 132 exponent = -exponent; 133 exponent += exponentAdjustment; 134 if(exponent > 65535 || exponent < -65536) 135 overflow = true; 136 } 137 138 if(overflow) 139 exponent = negative ? -65536: 65535; 140 141 return exponent; 142 } 143 144 const char * 145 skipLeadingZeroesAndAnyDot(const char *p, const char **dot) 146 { 147 *dot = 0; 148 while(*p == '0') 149 p++; 150 151 if(*p == '.') { 152 *dot = p++; 153 while(*p == '0') 154 p++; 155 } 156 157 return p; 158 } 159 160 /* Return the trailing fraction of a hexadecimal number. 161 DIGITVALUE is the first hex digit of the fraction, P points to 162 the next digit. */ 163 lostFraction 164 trailingHexadecimalFraction(const char *p, unsigned int digitValue) 165 { 166 unsigned int hexDigit; 167 168 /* If the first trailing digit isn't 0 or 8 we can work out the 169 fraction immediately. */ 170 if(digitValue > 8) 171 return lfMoreThanHalf; 172 else if(digitValue < 8 && digitValue > 0) 173 return lfLessThanHalf; 174 175 /* Otherwise we need to find the first non-zero digit. */ 176 while(*p == '0') 177 p++; 178 179 hexDigit = hexDigitValue(*p); 180 181 /* If we ran off the end it is exactly zero or one-half, otherwise 182 a little more. */ 183 if(hexDigit == -1U) 184 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; 185 else 186 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; 187 } 188 189 /* Return the fraction lost were a bignum truncated losing the least 190 significant BITS bits. */ 191 lostFraction 192 lostFractionThroughTruncation(const integerPart *parts, 193 unsigned int partCount, 194 unsigned int bits) 195 { 196 unsigned int lsb; 197 198 lsb = APInt::tcLSB(parts, partCount); 199 200 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */ 201 if(bits <= lsb) 202 return lfExactlyZero; 203 if(bits == lsb + 1) 204 return lfExactlyHalf; 205 if(bits <= partCount * integerPartWidth 206 && APInt::tcExtractBit(parts, bits - 1)) 207 return lfMoreThanHalf; 208 209 return lfLessThanHalf; 210 } 211 212 /* Shift DST right BITS bits noting lost fraction. */ 213 lostFraction 214 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits) 215 { 216 lostFraction lost_fraction; 217 218 lost_fraction = lostFractionThroughTruncation(dst, parts, bits); 219 220 APInt::tcShiftRight(dst, parts, bits); 221 222 return lost_fraction; 223 } 224 225 226 /* Zero at the end to avoid modular arithmetic when adding one; used 227 when rounding up during hexadecimal output. */ 228 static const char hexDigitsLower[] = "0123456789abcdef0"; 229 static const char hexDigitsUpper[] = "0123456789ABCDEF0"; 230 static const char infinityL[] = "infinity"; 231 static const char infinityU[] = "INFINITY"; 232 static const char NaNL[] = "nan"; 233 static const char NaNU[] = "NAN"; 234 235 /* Write out an integerPart in hexadecimal, starting with the most 236 significant nibble. Write out exactly COUNT hexdigits, return 237 COUNT. */ 238 static unsigned int 239 partAsHex (char *dst, integerPart part, unsigned int count, 240 const char *hexDigitChars) 241 { 242 unsigned int result = count; 243 244 assert (count != 0 && count <= integerPartWidth / 4); 245 246 part >>= (integerPartWidth - 4 * count); 247 while (count--) { 248 dst[count] = hexDigitChars[part & 0xf]; 249 part >>= 4; 250 } 251 252 return result; 253 } 254 255 /* Write out a decimal exponent. */ 256 static char * 257 writeDecimalExponent (char *dst, int exponent) 258 { 259 assert (exponent >= -65536 && exponent <= 65535); 260 261 if (exponent < 0) { 262 *dst++ = '-'; 263 exponent = -exponent; 264 } 265 266 if (exponent == 0) { 267 *dst++ = '0'; 268 } else { 269 char buff[12], *p; 270 271 p = buff; 272 while (exponent) { 273 *p++ = '0' + exponent % 10; 274 exponent /= 10; 275 } 276 277 do 278 *dst++ = *--p; 279 while (p != buff); 280 } 281 282 return dst; 283 } 284} 285 286/* Constructors. */ 287void 288APFloat::initialize(const fltSemantics *ourSemantics) 289{ 290 unsigned int count; 291 292 semantics = ourSemantics; 293 count = partCount(); 294 if(count > 1) 295 significand.parts = new integerPart[count]; 296} 297 298void 299APFloat::freeSignificand() 300{ 301 if(partCount() > 1) 302 delete [] significand.parts; 303} 304 305void 306APFloat::assign(const APFloat &rhs) 307{ 308 assert(semantics == rhs.semantics); 309 310 sign = rhs.sign; 311 category = rhs.category; 312 exponent = rhs.exponent; 313 if(category == fcNormal || category == fcNaN) 314 copySignificand(rhs); 315} 316 317void 318APFloat::copySignificand(const APFloat &rhs) 319{ 320 assert(category == fcNormal || category == fcNaN); 321 assert(rhs.partCount() >= partCount()); 322 323 APInt::tcAssign(significandParts(), rhs.significandParts(), 324 partCount()); 325} 326 327APFloat & 328APFloat::operator=(const APFloat &rhs) 329{ 330 if(this != &rhs) { 331 if(semantics != rhs.semantics) { 332 freeSignificand(); 333 initialize(rhs.semantics); 334 } 335 assign(rhs); 336 } 337 338 return *this; 339} 340 341bool 342APFloat::bitwiseIsEqual(const APFloat &rhs) const { 343 if (this == &rhs) 344 return true; 345 if (semantics != rhs.semantics || 346 category != rhs.category || 347 sign != rhs.sign) 348 return false; 349 if (category==fcZero || category==fcInfinity) 350 return true; 351 else if (category==fcNormal && exponent!=rhs.exponent) 352 return false; 353 else { 354 int i= partCount(); 355 const integerPart* p=significandParts(); 356 const integerPart* q=rhs.significandParts(); 357 for (; i>0; i--, p++, q++) { 358 if (*p != *q) 359 return false; 360 } 361 return true; 362 } 363} 364 365APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) 366{ 367 initialize(&ourSemantics); 368 sign = 0; 369 zeroSignificand(); 370 exponent = ourSemantics.precision - 1; 371 significandParts()[0] = value; 372 normalize(rmNearestTiesToEven, lfExactlyZero); 373} 374 375APFloat::APFloat(const fltSemantics &ourSemantics, 376 fltCategory ourCategory, bool negative) 377{ 378 initialize(&ourSemantics); 379 category = ourCategory; 380 sign = negative; 381 if(category == fcNormal) 382 category = fcZero; 383} 384 385APFloat::APFloat(const fltSemantics &ourSemantics, const char *text) 386{ 387 initialize(&ourSemantics); 388 convertFromString(text, rmNearestTiesToEven); 389} 390 391APFloat::APFloat(const APFloat &rhs) 392{ 393 initialize(rhs.semantics); 394 assign(rhs); 395} 396 397APFloat::~APFloat() 398{ 399 freeSignificand(); 400} 401 402unsigned int 403APFloat::partCount() const 404{ 405 return partCountForBits(semantics->precision + 1); 406} 407 408unsigned int 409APFloat::semanticsPrecision(const fltSemantics &semantics) 410{ 411 return semantics.precision; 412} 413 414const integerPart * 415APFloat::significandParts() const 416{ 417 return const_cast<APFloat *>(this)->significandParts(); 418} 419 420integerPart * 421APFloat::significandParts() 422{ 423 assert(category == fcNormal || category == fcNaN); 424 425 if(partCount() > 1) 426 return significand.parts; 427 else 428 return &significand.part; 429} 430 431/* Combine the effect of two lost fractions. */ 432lostFraction 433APFloat::combineLostFractions(lostFraction moreSignificant, 434 lostFraction lessSignificant) 435{ 436 if(lessSignificant != lfExactlyZero) { 437 if(moreSignificant == lfExactlyZero) 438 moreSignificant = lfLessThanHalf; 439 else if(moreSignificant == lfExactlyHalf) 440 moreSignificant = lfMoreThanHalf; 441 } 442 443 return moreSignificant; 444} 445 446void 447APFloat::zeroSignificand() 448{ 449 category = fcNormal; 450 APInt::tcSet(significandParts(), 0, partCount()); 451} 452 453/* Increment an fcNormal floating point number's significand. */ 454void 455APFloat::incrementSignificand() 456{ 457 integerPart carry; 458 459 carry = APInt::tcIncrement(significandParts(), partCount()); 460 461 /* Our callers should never cause us to overflow. */ 462 assert(carry == 0); 463} 464 465/* Add the significand of the RHS. Returns the carry flag. */ 466integerPart 467APFloat::addSignificand(const APFloat &rhs) 468{ 469 integerPart *parts; 470 471 parts = significandParts(); 472 473 assert(semantics == rhs.semantics); 474 assert(exponent == rhs.exponent); 475 476 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount()); 477} 478 479/* Subtract the significand of the RHS with a borrow flag. Returns 480 the borrow flag. */ 481integerPart 482APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow) 483{ 484 integerPart *parts; 485 486 parts = significandParts(); 487 488 assert(semantics == rhs.semantics); 489 assert(exponent == rhs.exponent); 490 491 return APInt::tcSubtract(parts, rhs.significandParts(), borrow, 492 partCount()); 493} 494 495/* Multiply the significand of the RHS. If ADDEND is non-NULL, add it 496 on to the full-precision result of the multiplication. Returns the 497 lost fraction. */ 498lostFraction 499APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) 500{ 501 unsigned int omsb; // One, not zero, based MSB. 502 unsigned int partsCount, newPartsCount, precision; 503 integerPart *lhsSignificand; 504 integerPart scratch[4]; 505 integerPart *fullSignificand; 506 lostFraction lost_fraction; 507 508 assert(semantics == rhs.semantics); 509 510 precision = semantics->precision; 511 newPartsCount = partCountForBits(precision * 2); 512 513 if(newPartsCount > 4) 514 fullSignificand = new integerPart[newPartsCount]; 515 else 516 fullSignificand = scratch; 517 518 lhsSignificand = significandParts(); 519 partsCount = partCount(); 520 521 APInt::tcFullMultiply(fullSignificand, lhsSignificand, 522 rhs.significandParts(), partsCount, partsCount); 523 524 lost_fraction = lfExactlyZero; 525 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 526 exponent += rhs.exponent; 527 528 if(addend) { 529 Significand savedSignificand = significand; 530 const fltSemantics *savedSemantics = semantics; 531 fltSemantics extendedSemantics; 532 opStatus status; 533 unsigned int extendedPrecision; 534 535 /* Normalize our MSB. */ 536 extendedPrecision = precision + precision - 1; 537 if(omsb != extendedPrecision) 538 { 539 APInt::tcShiftLeft(fullSignificand, newPartsCount, 540 extendedPrecision - omsb); 541 exponent -= extendedPrecision - omsb; 542 } 543 544 /* Create new semantics. */ 545 extendedSemantics = *semantics; 546 extendedSemantics.precision = extendedPrecision; 547 548 if(newPartsCount == 1) 549 significand.part = fullSignificand[0]; 550 else 551 significand.parts = fullSignificand; 552 semantics = &extendedSemantics; 553 554 APFloat extendedAddend(*addend); 555 status = extendedAddend.convert(extendedSemantics, rmTowardZero); 556 assert(status == opOK); 557 lost_fraction = addOrSubtractSignificand(extendedAddend, false); 558 559 /* Restore our state. */ 560 if(newPartsCount == 1) 561 fullSignificand[0] = significand.part; 562 significand = savedSignificand; 563 semantics = savedSemantics; 564 565 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 566 } 567 568 exponent -= (precision - 1); 569 570 if(omsb > precision) { 571 unsigned int bits, significantParts; 572 lostFraction lf; 573 574 bits = omsb - precision; 575 significantParts = partCountForBits(omsb); 576 lf = shiftRight(fullSignificand, significantParts, bits); 577 lost_fraction = combineLostFractions(lf, lost_fraction); 578 exponent += bits; 579 } 580 581 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); 582 583 if(newPartsCount > 4) 584 delete [] fullSignificand; 585 586 return lost_fraction; 587} 588 589/* Multiply the significands of LHS and RHS to DST. */ 590lostFraction 591APFloat::divideSignificand(const APFloat &rhs) 592{ 593 unsigned int bit, i, partsCount; 594 const integerPart *rhsSignificand; 595 integerPart *lhsSignificand, *dividend, *divisor; 596 integerPart scratch[4]; 597 lostFraction lost_fraction; 598 599 assert(semantics == rhs.semantics); 600 601 lhsSignificand = significandParts(); 602 rhsSignificand = rhs.significandParts(); 603 partsCount = partCount(); 604 605 if(partsCount > 2) 606 dividend = new integerPart[partsCount * 2]; 607 else 608 dividend = scratch; 609 610 divisor = dividend + partsCount; 611 612 /* Copy the dividend and divisor as they will be modified in-place. */ 613 for(i = 0; i < partsCount; i++) { 614 dividend[i] = lhsSignificand[i]; 615 divisor[i] = rhsSignificand[i]; 616 lhsSignificand[i] = 0; 617 } 618 619 exponent -= rhs.exponent; 620 621 unsigned int precision = semantics->precision; 622 623 /* Normalize the divisor. */ 624 bit = precision - APInt::tcMSB(divisor, partsCount) - 1; 625 if(bit) { 626 exponent += bit; 627 APInt::tcShiftLeft(divisor, partsCount, bit); 628 } 629 630 /* Normalize the dividend. */ 631 bit = precision - APInt::tcMSB(dividend, partsCount) - 1; 632 if(bit) { 633 exponent -= bit; 634 APInt::tcShiftLeft(dividend, partsCount, bit); 635 } 636 637 if(APInt::tcCompare(dividend, divisor, partsCount) < 0) { 638 exponent--; 639 APInt::tcShiftLeft(dividend, partsCount, 1); 640 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); 641 } 642 643 /* Long division. */ 644 for(bit = precision; bit; bit -= 1) { 645 if(APInt::tcCompare(dividend, divisor, partsCount) >= 0) { 646 APInt::tcSubtract(dividend, divisor, 0, partsCount); 647 APInt::tcSetBit(lhsSignificand, bit - 1); 648 } 649 650 APInt::tcShiftLeft(dividend, partsCount, 1); 651 } 652 653 /* Figure out the lost fraction. */ 654 int cmp = APInt::tcCompare(dividend, divisor, partsCount); 655 656 if(cmp > 0) 657 lost_fraction = lfMoreThanHalf; 658 else if(cmp == 0) 659 lost_fraction = lfExactlyHalf; 660 else if(APInt::tcIsZero(dividend, partsCount)) 661 lost_fraction = lfExactlyZero; 662 else 663 lost_fraction = lfLessThanHalf; 664 665 if(partsCount > 2) 666 delete [] dividend; 667 668 return lost_fraction; 669} 670 671unsigned int 672APFloat::significandMSB() const 673{ 674 return APInt::tcMSB(significandParts(), partCount()); 675} 676 677unsigned int 678APFloat::significandLSB() const 679{ 680 return APInt::tcLSB(significandParts(), partCount()); 681} 682 683/* Note that a zero result is NOT normalized to fcZero. */ 684lostFraction 685APFloat::shiftSignificandRight(unsigned int bits) 686{ 687 /* Our exponent should not overflow. */ 688 assert((exponent_t) (exponent + bits) >= exponent); 689 690 exponent += bits; 691 692 return shiftRight(significandParts(), partCount(), bits); 693} 694 695/* Shift the significand left BITS bits, subtract BITS from its exponent. */ 696void 697APFloat::shiftSignificandLeft(unsigned int bits) 698{ 699 assert(bits < semantics->precision); 700 701 if(bits) { 702 unsigned int partsCount = partCount(); 703 704 APInt::tcShiftLeft(significandParts(), partsCount, bits); 705 exponent -= bits; 706 707 assert(!APInt::tcIsZero(significandParts(), partsCount)); 708 } 709} 710 711APFloat::cmpResult 712APFloat::compareAbsoluteValue(const APFloat &rhs) const 713{ 714 int compare; 715 716 assert(semantics == rhs.semantics); 717 assert(category == fcNormal); 718 assert(rhs.category == fcNormal); 719 720 compare = exponent - rhs.exponent; 721 722 /* If exponents are equal, do an unsigned bignum comparison of the 723 significands. */ 724 if(compare == 0) 725 compare = APInt::tcCompare(significandParts(), rhs.significandParts(), 726 partCount()); 727 728 if(compare > 0) 729 return cmpGreaterThan; 730 else if(compare < 0) 731 return cmpLessThan; 732 else 733 return cmpEqual; 734} 735 736/* Handle overflow. Sign is preserved. We either become infinity or 737 the largest finite number. */ 738APFloat::opStatus 739APFloat::handleOverflow(roundingMode rounding_mode) 740{ 741 /* Infinity? */ 742 if(rounding_mode == rmNearestTiesToEven 743 || rounding_mode == rmNearestTiesToAway 744 || (rounding_mode == rmTowardPositive && !sign) 745 || (rounding_mode == rmTowardNegative && sign)) 746 { 747 category = fcInfinity; 748 return (opStatus) (opOverflow | opInexact); 749 } 750 751 /* Otherwise we become the largest finite number. */ 752 category = fcNormal; 753 exponent = semantics->maxExponent; 754 APInt::tcSetLeastSignificantBits(significandParts(), partCount(), 755 semantics->precision); 756 757 return opInexact; 758} 759 760/* Returns TRUE if, when truncating the current number, with BIT the 761 new LSB, with the given lost fraction and rounding mode, the result 762 would need to be rounded away from zero (i.e., by increasing the 763 signficand). This routine must work for fcZero of both signs, and 764 fcNormal numbers. */ 765bool 766APFloat::roundAwayFromZero(roundingMode rounding_mode, 767 lostFraction lost_fraction, 768 unsigned int bit) const 769{ 770 /* NaNs and infinities should not have lost fractions. */ 771 assert(category == fcNormal || category == fcZero); 772 773 /* Current callers never pass this so we don't handle it. */ 774 assert(lost_fraction != lfExactlyZero); 775 776 switch(rounding_mode) { 777 default: 778 assert(0); 779 780 case rmNearestTiesToAway: 781 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; 782 783 case rmNearestTiesToEven: 784 if(lost_fraction == lfMoreThanHalf) 785 return true; 786 787 /* Our zeroes don't have a significand to test. */ 788 if(lost_fraction == lfExactlyHalf && category != fcZero) 789 return APInt::tcExtractBit(significandParts(), bit); 790 791 return false; 792 793 case rmTowardZero: 794 return false; 795 796 case rmTowardPositive: 797 return sign == false; 798 799 case rmTowardNegative: 800 return sign == true; 801 } 802} 803 804APFloat::opStatus 805APFloat::normalize(roundingMode rounding_mode, 806 lostFraction lost_fraction) 807{ 808 unsigned int omsb; /* One, not zero, based MSB. */ 809 int exponentChange; 810 811 if(category != fcNormal) 812 return opOK; 813 814 /* Before rounding normalize the exponent of fcNormal numbers. */ 815 omsb = significandMSB() + 1; 816 817 if(omsb) { 818 /* OMSB is numbered from 1. We want to place it in the integer 819 bit numbered PRECISON if possible, with a compensating change in 820 the exponent. */ 821 exponentChange = omsb - semantics->precision; 822 823 /* If the resulting exponent is too high, overflow according to 824 the rounding mode. */ 825 if(exponent + exponentChange > semantics->maxExponent) 826 return handleOverflow(rounding_mode); 827 828 /* Subnormal numbers have exponent minExponent, and their MSB 829 is forced based on that. */ 830 if(exponent + exponentChange < semantics->minExponent) 831 exponentChange = semantics->minExponent - exponent; 832 833 /* Shifting left is easy as we don't lose precision. */ 834 if(exponentChange < 0) { 835 assert(lost_fraction == lfExactlyZero); 836 837 shiftSignificandLeft(-exponentChange); 838 839 return opOK; 840 } 841 842 if(exponentChange > 0) { 843 lostFraction lf; 844 845 /* Shift right and capture any new lost fraction. */ 846 lf = shiftSignificandRight(exponentChange); 847 848 lost_fraction = combineLostFractions(lf, lost_fraction); 849 850 /* Keep OMSB up-to-date. */ 851 if(omsb > (unsigned) exponentChange) 852 omsb -= (unsigned) exponentChange; 853 else 854 omsb = 0; 855 } 856 } 857 858 /* Now round the number according to rounding_mode given the lost 859 fraction. */ 860 861 /* As specified in IEEE 754, since we do not trap we do not report 862 underflow for exact results. */ 863 if(lost_fraction == lfExactlyZero) { 864 /* Canonicalize zeroes. */ 865 if(omsb == 0) 866 category = fcZero; 867 868 return opOK; 869 } 870 871 /* Increment the significand if we're rounding away from zero. */ 872 if(roundAwayFromZero(rounding_mode, lost_fraction, 0)) { 873 if(omsb == 0) 874 exponent = semantics->minExponent; 875 876 incrementSignificand(); 877 omsb = significandMSB() + 1; 878 879 /* Did the significand increment overflow? */ 880 if(omsb == (unsigned) semantics->precision + 1) { 881 /* Renormalize by incrementing the exponent and shifting our 882 significand right one. However if we already have the 883 maximum exponent we overflow to infinity. */ 884 if(exponent == semantics->maxExponent) { 885 category = fcInfinity; 886 887 return (opStatus) (opOverflow | opInexact); 888 } 889 890 shiftSignificandRight(1); 891 892 return opInexact; 893 } 894 } 895 896 /* The normal case - we were and are not denormal, and any 897 significand increment above didn't overflow. */ 898 if(omsb == semantics->precision) 899 return opInexact; 900 901 /* We have a non-zero denormal. */ 902 assert(omsb < semantics->precision); 903 assert(exponent == semantics->minExponent); 904 905 /* Canonicalize zeroes. */ 906 if(omsb == 0) 907 category = fcZero; 908 909 /* The fcZero case is a denormal that underflowed to zero. */ 910 return (opStatus) (opUnderflow | opInexact); 911} 912 913APFloat::opStatus 914APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract) 915{ 916 switch(convolve(category, rhs.category)) { 917 default: 918 assert(0); 919 920 case convolve(fcNaN, fcZero): 921 case convolve(fcNaN, fcNormal): 922 case convolve(fcNaN, fcInfinity): 923 case convolve(fcNaN, fcNaN): 924 case convolve(fcNormal, fcZero): 925 case convolve(fcInfinity, fcNormal): 926 case convolve(fcInfinity, fcZero): 927 return opOK; 928 929 case convolve(fcZero, fcNaN): 930 case convolve(fcNormal, fcNaN): 931 case convolve(fcInfinity, fcNaN): 932 category = fcNaN; 933 copySignificand(rhs); 934 return opOK; 935 936 case convolve(fcNormal, fcInfinity): 937 case convolve(fcZero, fcInfinity): 938 category = fcInfinity; 939 sign = rhs.sign ^ subtract; 940 return opOK; 941 942 case convolve(fcZero, fcNormal): 943 assign(rhs); 944 sign = rhs.sign ^ subtract; 945 return opOK; 946 947 case convolve(fcZero, fcZero): 948 /* Sign depends on rounding mode; handled by caller. */ 949 return opOK; 950 951 case convolve(fcInfinity, fcInfinity): 952 /* Differently signed infinities can only be validly 953 subtracted. */ 954 if(sign ^ rhs.sign != subtract) { 955 category = fcNaN; 956 // Arbitrary but deterministic value for significand 957 APInt::tcSet(significandParts(), ~0U, partCount()); 958 return opInvalidOp; 959 } 960 961 return opOK; 962 963 case convolve(fcNormal, fcNormal): 964 return opDivByZero; 965 } 966} 967 968/* Add or subtract two normal numbers. */ 969lostFraction 970APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract) 971{ 972 integerPart carry; 973 lostFraction lost_fraction; 974 int bits; 975 976 /* Determine if the operation on the absolute values is effectively 977 an addition or subtraction. */ 978 subtract ^= (sign ^ rhs.sign); 979 980 /* Are we bigger exponent-wise than the RHS? */ 981 bits = exponent - rhs.exponent; 982 983 /* Subtraction is more subtle than one might naively expect. */ 984 if(subtract) { 985 APFloat temp_rhs(rhs); 986 bool reverse; 987 988 if (bits == 0) { 989 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan; 990 lost_fraction = lfExactlyZero; 991 } else if (bits > 0) { 992 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); 993 shiftSignificandLeft(1); 994 reverse = false; 995 } else { 996 lost_fraction = shiftSignificandRight(-bits - 1); 997 temp_rhs.shiftSignificandLeft(1); 998 reverse = true; 999 } 1000 1001 if (reverse) { 1002 carry = temp_rhs.subtractSignificand 1003 (*this, lost_fraction != lfExactlyZero); 1004 copySignificand(temp_rhs); 1005 sign = !sign; 1006 } else { 1007 carry = subtractSignificand 1008 (temp_rhs, lost_fraction != lfExactlyZero); 1009 } 1010 1011 /* Invert the lost fraction - it was on the RHS and 1012 subtracted. */ 1013 if(lost_fraction == lfLessThanHalf) 1014 lost_fraction = lfMoreThanHalf; 1015 else if(lost_fraction == lfMoreThanHalf) 1016 lost_fraction = lfLessThanHalf; 1017 1018 /* The code above is intended to ensure that no borrow is 1019 necessary. */ 1020 assert(!carry); 1021 } else { 1022 if(bits > 0) { 1023 APFloat temp_rhs(rhs); 1024 1025 lost_fraction = temp_rhs.shiftSignificandRight(bits); 1026 carry = addSignificand(temp_rhs); 1027 } else { 1028 lost_fraction = shiftSignificandRight(-bits); 1029 carry = addSignificand(rhs); 1030 } 1031 1032 /* We have a guard bit; generating a carry cannot happen. */ 1033 assert(!carry); 1034 } 1035 1036 return lost_fraction; 1037} 1038 1039APFloat::opStatus 1040APFloat::multiplySpecials(const APFloat &rhs) 1041{ 1042 switch(convolve(category, rhs.category)) { 1043 default: 1044 assert(0); 1045 1046 case convolve(fcNaN, fcZero): 1047 case convolve(fcNaN, fcNormal): 1048 case convolve(fcNaN, fcInfinity): 1049 case convolve(fcNaN, fcNaN): 1050 return opOK; 1051 1052 case convolve(fcZero, fcNaN): 1053 case convolve(fcNormal, fcNaN): 1054 case convolve(fcInfinity, fcNaN): 1055 category = fcNaN; 1056 copySignificand(rhs); 1057 return opOK; 1058 1059 case convolve(fcNormal, fcInfinity): 1060 case convolve(fcInfinity, fcNormal): 1061 case convolve(fcInfinity, fcInfinity): 1062 category = fcInfinity; 1063 return opOK; 1064 1065 case convolve(fcZero, fcNormal): 1066 case convolve(fcNormal, fcZero): 1067 case convolve(fcZero, fcZero): 1068 category = fcZero; 1069 return opOK; 1070 1071 case convolve(fcZero, fcInfinity): 1072 case convolve(fcInfinity, fcZero): 1073 category = fcNaN; 1074 // Arbitrary but deterministic value for significand 1075 APInt::tcSet(significandParts(), ~0U, partCount()); 1076 return opInvalidOp; 1077 1078 case convolve(fcNormal, fcNormal): 1079 return opOK; 1080 } 1081} 1082 1083APFloat::opStatus 1084APFloat::divideSpecials(const APFloat &rhs) 1085{ 1086 switch(convolve(category, rhs.category)) { 1087 default: 1088 assert(0); 1089 1090 case convolve(fcNaN, fcZero): 1091 case convolve(fcNaN, fcNormal): 1092 case convolve(fcNaN, fcInfinity): 1093 case convolve(fcNaN, fcNaN): 1094 case convolve(fcInfinity, fcZero): 1095 case convolve(fcInfinity, fcNormal): 1096 case convolve(fcZero, fcInfinity): 1097 case convolve(fcZero, fcNormal): 1098 return opOK; 1099 1100 case convolve(fcZero, fcNaN): 1101 case convolve(fcNormal, fcNaN): 1102 case convolve(fcInfinity, fcNaN): 1103 category = fcNaN; 1104 copySignificand(rhs); 1105 return opOK; 1106 1107 case convolve(fcNormal, fcInfinity): 1108 category = fcZero; 1109 return opOK; 1110 1111 case convolve(fcNormal, fcZero): 1112 category = fcInfinity; 1113 return opDivByZero; 1114 1115 case convolve(fcInfinity, fcInfinity): 1116 case convolve(fcZero, fcZero): 1117 category = fcNaN; 1118 // Arbitrary but deterministic value for significand 1119 APInt::tcSet(significandParts(), ~0U, partCount()); 1120 return opInvalidOp; 1121 1122 case convolve(fcNormal, fcNormal): 1123 return opOK; 1124 } 1125} 1126 1127/* Change sign. */ 1128void 1129APFloat::changeSign() 1130{ 1131 /* Look mummy, this one's easy. */ 1132 sign = !sign; 1133} 1134 1135void 1136APFloat::clearSign() 1137{ 1138 /* So is this one. */ 1139 sign = 0; 1140} 1141 1142void 1143APFloat::copySign(const APFloat &rhs) 1144{ 1145 /* And this one. */ 1146 sign = rhs.sign; 1147} 1148 1149/* Normalized addition or subtraction. */ 1150APFloat::opStatus 1151APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode, 1152 bool subtract) 1153{ 1154 opStatus fs; 1155 1156 fs = addOrSubtractSpecials(rhs, subtract); 1157 1158 /* This return code means it was not a simple case. */ 1159 if(fs == opDivByZero) { 1160 lostFraction lost_fraction; 1161 1162 lost_fraction = addOrSubtractSignificand(rhs, subtract); 1163 fs = normalize(rounding_mode, lost_fraction); 1164 1165 /* Can only be zero if we lost no fraction. */ 1166 assert(category != fcZero || lost_fraction == lfExactlyZero); 1167 } 1168 1169 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1170 positive zero unless rounding to minus infinity, except that 1171 adding two like-signed zeroes gives that zero. */ 1172 if(category == fcZero) { 1173 if(rhs.category != fcZero || (sign == rhs.sign) == subtract) 1174 sign = (rounding_mode == rmTowardNegative); 1175 } 1176 1177 return fs; 1178} 1179 1180/* Normalized addition. */ 1181APFloat::opStatus 1182APFloat::add(const APFloat &rhs, roundingMode rounding_mode) 1183{ 1184 return addOrSubtract(rhs, rounding_mode, false); 1185} 1186 1187/* Normalized subtraction. */ 1188APFloat::opStatus 1189APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode) 1190{ 1191 return addOrSubtract(rhs, rounding_mode, true); 1192} 1193 1194/* Normalized multiply. */ 1195APFloat::opStatus 1196APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode) 1197{ 1198 opStatus fs; 1199 1200 sign ^= rhs.sign; 1201 fs = multiplySpecials(rhs); 1202 1203 if(category == fcNormal) { 1204 lostFraction lost_fraction = multiplySignificand(rhs, 0); 1205 fs = normalize(rounding_mode, lost_fraction); 1206 if(lost_fraction != lfExactlyZero) 1207 fs = (opStatus) (fs | opInexact); 1208 } 1209 1210 return fs; 1211} 1212 1213/* Normalized divide. */ 1214APFloat::opStatus 1215APFloat::divide(const APFloat &rhs, roundingMode rounding_mode) 1216{ 1217 opStatus fs; 1218 1219 sign ^= rhs.sign; 1220 fs = divideSpecials(rhs); 1221 1222 if(category == fcNormal) { 1223 lostFraction lost_fraction = divideSignificand(rhs); 1224 fs = normalize(rounding_mode, lost_fraction); 1225 if(lost_fraction != lfExactlyZero) 1226 fs = (opStatus) (fs | opInexact); 1227 } 1228 1229 return fs; 1230} 1231 1232/* Normalized remainder. This is not currently doing TRT. */ 1233APFloat::opStatus 1234APFloat::mod(const APFloat &rhs, roundingMode rounding_mode) 1235{ 1236 opStatus fs; 1237 APFloat V = *this; 1238 unsigned int origSign = sign; 1239 fs = V.divide(rhs, rmNearestTiesToEven); 1240 if (fs == opDivByZero) 1241 return fs; 1242 1243 int parts = partCount(); 1244 integerPart *x = new integerPart[parts]; 1245 fs = V.convertToInteger(x, parts * integerPartWidth, true, 1246 rmNearestTiesToEven); 1247 if (fs==opInvalidOp) 1248 return fs; 1249 1250 fs = V.convertFromInteger(x, parts * integerPartWidth, true, 1251 rmNearestTiesToEven); 1252 assert(fs==opOK); // should always work 1253 1254 fs = V.multiply(rhs, rounding_mode); 1255 assert(fs==opOK || fs==opInexact); // should not overflow or underflow 1256 1257 fs = subtract(V, rounding_mode); 1258 assert(fs==opOK || fs==opInexact); // likewise 1259 1260 if (isZero()) 1261 sign = origSign; // IEEE754 requires this 1262 delete[] x; 1263 return fs; 1264} 1265 1266/* Normalized fused-multiply-add. */ 1267APFloat::opStatus 1268APFloat::fusedMultiplyAdd(const APFloat &multiplicand, 1269 const APFloat &addend, 1270 roundingMode rounding_mode) 1271{ 1272 opStatus fs; 1273 1274 /* Post-multiplication sign, before addition. */ 1275 sign ^= multiplicand.sign; 1276 1277 /* If and only if all arguments are normal do we need to do an 1278 extended-precision calculation. */ 1279 if(category == fcNormal 1280 && multiplicand.category == fcNormal 1281 && addend.category == fcNormal) { 1282 lostFraction lost_fraction; 1283 1284 lost_fraction = multiplySignificand(multiplicand, &addend); 1285 fs = normalize(rounding_mode, lost_fraction); 1286 if(lost_fraction != lfExactlyZero) 1287 fs = (opStatus) (fs | opInexact); 1288 1289 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1290 positive zero unless rounding to minus infinity, except that 1291 adding two like-signed zeroes gives that zero. */ 1292 if(category == fcZero && sign != addend.sign) 1293 sign = (rounding_mode == rmTowardNegative); 1294 } else { 1295 fs = multiplySpecials(multiplicand); 1296 1297 /* FS can only be opOK or opInvalidOp. There is no more work 1298 to do in the latter case. The IEEE-754R standard says it is 1299 implementation-defined in this case whether, if ADDEND is a 1300 quiet NaN, we raise invalid op; this implementation does so. 1301 1302 If we need to do the addition we can do so with normal 1303 precision. */ 1304 if(fs == opOK) 1305 fs = addOrSubtract(addend, rounding_mode, false); 1306 } 1307 1308 return fs; 1309} 1310 1311/* Comparison requires normalized numbers. */ 1312APFloat::cmpResult 1313APFloat::compare(const APFloat &rhs) const 1314{ 1315 cmpResult result; 1316 1317 assert(semantics == rhs.semantics); 1318 1319 switch(convolve(category, rhs.category)) { 1320 default: 1321 assert(0); 1322 1323 case convolve(fcNaN, fcZero): 1324 case convolve(fcNaN, fcNormal): 1325 case convolve(fcNaN, fcInfinity): 1326 case convolve(fcNaN, fcNaN): 1327 case convolve(fcZero, fcNaN): 1328 case convolve(fcNormal, fcNaN): 1329 case convolve(fcInfinity, fcNaN): 1330 return cmpUnordered; 1331 1332 case convolve(fcInfinity, fcNormal): 1333 case convolve(fcInfinity, fcZero): 1334 case convolve(fcNormal, fcZero): 1335 if(sign) 1336 return cmpLessThan; 1337 else 1338 return cmpGreaterThan; 1339 1340 case convolve(fcNormal, fcInfinity): 1341 case convolve(fcZero, fcInfinity): 1342 case convolve(fcZero, fcNormal): 1343 if(rhs.sign) 1344 return cmpGreaterThan; 1345 else 1346 return cmpLessThan; 1347 1348 case convolve(fcInfinity, fcInfinity): 1349 if(sign == rhs.sign) 1350 return cmpEqual; 1351 else if(sign) 1352 return cmpLessThan; 1353 else 1354 return cmpGreaterThan; 1355 1356 case convolve(fcZero, fcZero): 1357 return cmpEqual; 1358 1359 case convolve(fcNormal, fcNormal): 1360 break; 1361 } 1362 1363 /* Two normal numbers. Do they have the same sign? */ 1364 if(sign != rhs.sign) { 1365 if(sign) 1366 result = cmpLessThan; 1367 else 1368 result = cmpGreaterThan; 1369 } else { 1370 /* Compare absolute values; invert result if negative. */ 1371 result = compareAbsoluteValue(rhs); 1372 1373 if(sign) { 1374 if(result == cmpLessThan) 1375 result = cmpGreaterThan; 1376 else if(result == cmpGreaterThan) 1377 result = cmpLessThan; 1378 } 1379 } 1380 1381 return result; 1382} 1383 1384APFloat::opStatus 1385APFloat::convert(const fltSemantics &toSemantics, 1386 roundingMode rounding_mode) 1387{ 1388 lostFraction lostFraction; 1389 unsigned int newPartCount, oldPartCount; 1390 opStatus fs; 1391 1392 lostFraction = lfExactlyZero; 1393 newPartCount = partCountForBits(toSemantics.precision + 1); 1394 oldPartCount = partCount(); 1395 1396 /* Handle storage complications. If our new form is wider, 1397 re-allocate our bit pattern into wider storage. If it is 1398 narrower, we ignore the excess parts, but if narrowing to a 1399 single part we need to free the old storage. 1400 Be careful not to reference significandParts for zeroes 1401 and infinities, since it aborts. */ 1402 if (newPartCount > oldPartCount) { 1403 integerPart *newParts; 1404 newParts = new integerPart[newPartCount]; 1405 APInt::tcSet(newParts, 0, newPartCount); 1406 if (category==fcNormal || category==fcNaN) 1407 APInt::tcAssign(newParts, significandParts(), oldPartCount); 1408 freeSignificand(); 1409 significand.parts = newParts; 1410 } else if (newPartCount < oldPartCount) { 1411 /* Capture any lost fraction through truncation of parts so we get 1412 correct rounding whilst normalizing. */ 1413 if (category==fcNormal) 1414 lostFraction = lostFractionThroughTruncation 1415 (significandParts(), oldPartCount, toSemantics.precision); 1416 if (newPartCount == 1) { 1417 integerPart newPart = 0; 1418 if (category==fcNormal || category==fcNaN) 1419 newPart = significandParts()[0]; 1420 freeSignificand(); 1421 significand.part = newPart; 1422 } 1423 } 1424 1425 if(category == fcNormal) { 1426 /* Re-interpret our bit-pattern. */ 1427 exponent += toSemantics.precision - semantics->precision; 1428 semantics = &toSemantics; 1429 fs = normalize(rounding_mode, lostFraction); 1430 } else if (category == fcNaN) { 1431 int shift = toSemantics.precision - semantics->precision; 1432 // No normalization here, just truncate 1433 if (shift>0) 1434 APInt::tcShiftLeft(significandParts(), newPartCount, shift); 1435 else if (shift < 0) 1436 APInt::tcShiftRight(significandParts(), newPartCount, -shift); 1437 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan) 1438 // does not give you back the same bits. This is dubious, and we 1439 // don't currently do it. You're really supposed to get 1440 // an invalid operation signal at runtime, but nobody does that. 1441 semantics = &toSemantics; 1442 fs = opOK; 1443 } else { 1444 semantics = &toSemantics; 1445 fs = opOK; 1446 } 1447 1448 return fs; 1449} 1450 1451/* Convert a floating point number to an integer according to the 1452 rounding mode. If the rounded integer value is out of range this 1453 returns an invalid operation exception. If the rounded value is in 1454 range but the floating point number is not the exact integer, the C 1455 standard doesn't require an inexact exception to be raised. IEEE 1456 854 does require it so we do that. 1457 1458 Note that for conversions to integer type the C standard requires 1459 round-to-zero to always be used. */ 1460APFloat::opStatus 1461APFloat::convertToInteger(integerPart *parts, unsigned int width, 1462 bool isSigned, 1463 roundingMode rounding_mode) const 1464{ 1465 lostFraction lost_fraction; 1466 unsigned int msb, partsCount; 1467 int bits; 1468 1469 partsCount = partCountForBits(width); 1470 1471 /* Handle the three special cases first. We produce 1472 a deterministic result even for the Invalid cases. */ 1473 if (category == fcNaN) { 1474 // Neither sign nor isSigned affects this. 1475 APInt::tcSet(parts, 0, partsCount); 1476 return opInvalidOp; 1477 } 1478 if (category == fcInfinity) { 1479 if (!sign && isSigned) 1480 APInt::tcSetLeastSignificantBits(parts, partsCount, width-1); 1481 else if (!sign && !isSigned) 1482 APInt::tcSetLeastSignificantBits(parts, partsCount, width); 1483 else if (sign && isSigned) { 1484 APInt::tcSetLeastSignificantBits(parts, partsCount, 1); 1485 APInt::tcShiftLeft(parts, partsCount, width-1); 1486 } else // sign && !isSigned 1487 APInt::tcSet(parts, 0, partsCount); 1488 return opInvalidOp; 1489 } 1490 if (category == fcZero) { 1491 APInt::tcSet(parts, 0, partsCount); 1492 return opOK; 1493 } 1494 1495 /* Shift the bit pattern so the fraction is lost. */ 1496 APFloat tmp(*this); 1497 1498 bits = (int) semantics->precision - 1 - exponent; 1499 1500 if(bits > 0) { 1501 lost_fraction = tmp.shiftSignificandRight(bits); 1502 } else { 1503 if (-bits >= semantics->precision) { 1504 // Unrepresentably large. 1505 if (!sign && isSigned) 1506 APInt::tcSetLeastSignificantBits(parts, partsCount, width-1); 1507 else if (!sign && !isSigned) 1508 APInt::tcSetLeastSignificantBits(parts, partsCount, width); 1509 else if (sign && isSigned) { 1510 APInt::tcSetLeastSignificantBits(parts, partsCount, 1); 1511 APInt::tcShiftLeft(parts, partsCount, width-1); 1512 } else // sign && !isSigned 1513 APInt::tcSet(parts, 0, partsCount); 1514 return (opStatus)(opOverflow | opInexact); 1515 } 1516 tmp.shiftSignificandLeft(-bits); 1517 lost_fraction = lfExactlyZero; 1518 } 1519 1520 if(lost_fraction != lfExactlyZero 1521 && tmp.roundAwayFromZero(rounding_mode, lost_fraction, 0)) 1522 tmp.incrementSignificand(); 1523 1524 msb = tmp.significandMSB(); 1525 1526 /* Negative numbers cannot be represented as unsigned. */ 1527 if(!isSigned && tmp.sign && msb != -1U) 1528 return opInvalidOp; 1529 1530 /* It takes exponent + 1 bits to represent the truncated floating 1531 point number without its sign. We lose a bit for the sign, but 1532 the maximally negative integer is a special case. */ 1533 if(msb + 1 > width) /* !! Not same as msb >= width !! */ 1534 return opInvalidOp; 1535 1536 if(isSigned && msb + 1 == width 1537 && (!tmp.sign || tmp.significandLSB() != msb)) 1538 return opInvalidOp; 1539 1540 APInt::tcAssign(parts, tmp.significandParts(), partsCount); 1541 1542 if(tmp.sign) 1543 APInt::tcNegate(parts, partsCount); 1544 1545 if(lost_fraction == lfExactlyZero) 1546 return opOK; 1547 else 1548 return opInexact; 1549} 1550 1551APFloat::opStatus 1552APFloat::convertFromUnsignedInteger(integerPart *parts, 1553 unsigned int partCount, 1554 roundingMode rounding_mode) 1555{ 1556 unsigned int msb, precision; 1557 lostFraction lost_fraction; 1558 1559 msb = APInt::tcMSB(parts, partCount) + 1; 1560 precision = semantics->precision; 1561 1562 category = fcNormal; 1563 exponent = precision - 1; 1564 1565 if(msb > precision) { 1566 exponent += (msb - precision); 1567 lost_fraction = shiftRight(parts, partCount, msb - precision); 1568 msb = precision; 1569 } else 1570 lost_fraction = lfExactlyZero; 1571 1572 /* Copy the bit image. */ 1573 zeroSignificand(); 1574 APInt::tcAssign(significandParts(), parts, partCountForBits(msb)); 1575 1576 return normalize(rounding_mode, lost_fraction); 1577} 1578 1579APFloat::opStatus 1580APFloat::convertFromInteger(const integerPart *parts, unsigned int width, 1581 bool isSigned, roundingMode rounding_mode) 1582{ 1583 unsigned int partCount = partCountForBits(width); 1584 opStatus status; 1585 APInt api = APInt(width, partCount, parts); 1586 integerPart *copy = new integerPart[partCount]; 1587 1588 sign = false; 1589 if(isSigned && APInt::tcExtractBit(parts, width - 1)) { 1590 sign = true; 1591 api = -api; 1592 } 1593 1594 APInt::tcAssign(copy, api.getRawData(), partCount); 1595 status = convertFromUnsignedInteger(copy, partCount, rounding_mode); 1596 return status; 1597} 1598 1599APFloat::opStatus 1600APFloat::convertFromHexadecimalString(const char *p, 1601 roundingMode rounding_mode) 1602{ 1603 lostFraction lost_fraction; 1604 integerPart *significand; 1605 unsigned int bitPos, partsCount; 1606 const char *dot, *firstSignificantDigit; 1607 1608 zeroSignificand(); 1609 exponent = 0; 1610 category = fcNormal; 1611 1612 significand = significandParts(); 1613 partsCount = partCount(); 1614 bitPos = partsCount * integerPartWidth; 1615 1616 /* Skip leading zeroes and any(hexa)decimal point. */ 1617 p = skipLeadingZeroesAndAnyDot(p, &dot); 1618 firstSignificantDigit = p; 1619 1620 for(;;) { 1621 integerPart hex_value; 1622 1623 if(*p == '.') { 1624 assert(dot == 0); 1625 dot = p++; 1626 } 1627 1628 hex_value = hexDigitValue(*p); 1629 if(hex_value == -1U) { 1630 lost_fraction = lfExactlyZero; 1631 break; 1632 } 1633 1634 p++; 1635 1636 /* Store the number whilst 4-bit nibbles remain. */ 1637 if(bitPos) { 1638 bitPos -= 4; 1639 hex_value <<= bitPos % integerPartWidth; 1640 significand[bitPos / integerPartWidth] |= hex_value; 1641 } else { 1642 lost_fraction = trailingHexadecimalFraction(p, hex_value); 1643 while(hexDigitValue(*p) != -1U) 1644 p++; 1645 break; 1646 } 1647 } 1648 1649 /* Hex floats require an exponent but not a hexadecimal point. */ 1650 assert(*p == 'p' || *p == 'P'); 1651 1652 /* Ignore the exponent if we are zero. */ 1653 if(p != firstSignificantDigit) { 1654 int expAdjustment; 1655 1656 /* Implicit hexadecimal point? */ 1657 if(!dot) 1658 dot = p; 1659 1660 /* Calculate the exponent adjustment implicit in the number of 1661 significant digits. */ 1662 expAdjustment = dot - firstSignificantDigit; 1663 if(expAdjustment < 0) 1664 expAdjustment++; 1665 expAdjustment = expAdjustment * 4 - 1; 1666 1667 /* Adjust for writing the significand starting at the most 1668 significant nibble. */ 1669 expAdjustment += semantics->precision; 1670 expAdjustment -= partsCount * integerPartWidth; 1671 1672 /* Adjust for the given exponent. */ 1673 exponent = totalExponent(p, expAdjustment); 1674 } 1675 1676 return normalize(rounding_mode, lost_fraction); 1677} 1678 1679APFloat::opStatus 1680APFloat::convertFromString(const char *p, roundingMode rounding_mode) 1681{ 1682 /* Handle a leading minus sign. */ 1683 if(*p == '-') 1684 sign = 1, p++; 1685 else 1686 sign = 0; 1687 1688 if(p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) 1689 return convertFromHexadecimalString(p + 2, rounding_mode); 1690 1691 assert(0 && "Decimal to binary conversions not yet implemented"); 1692 abort(); 1693} 1694 1695/* Write out a hexadecimal representation of the floating point value 1696 to DST, which must be of sufficient size, in the C99 form 1697 [-]0xh.hhhhp[+-]d. Return the number of characters written, 1698 excluding the terminating NUL. 1699 1700 If UPPERCASE, the output is in upper case, otherwise in lower case. 1701 1702 HEXDIGITS digits appear altogether, rounding the value if 1703 necessary. If HEXDIGITS is 0, the minimal precision to display the 1704 number precisely is used instead. If nothing would appear after 1705 the decimal point it is suppressed. 1706 1707 The decimal exponent is always printed and has at least one digit. 1708 Zero values display an exponent of zero. Infinities and NaNs 1709 appear as "infinity" or "nan" respectively. 1710 1711 The above rules are as specified by C99. There is ambiguity about 1712 what the leading hexadecimal digit should be. This implementation 1713 uses whatever is necessary so that the exponent is displayed as 1714 stored. This implies the exponent will fall within the IEEE format 1715 range, and the leading hexadecimal digit will be 0 (for denormals), 1716 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with 1717 any other digits zero). 1718*/ 1719unsigned int 1720APFloat::convertToHexString(char *dst, unsigned int hexDigits, 1721 bool upperCase, roundingMode rounding_mode) const 1722{ 1723 char *p; 1724 1725 p = dst; 1726 if (sign) 1727 *dst++ = '-'; 1728 1729 switch (category) { 1730 case fcInfinity: 1731 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); 1732 dst += sizeof infinityL - 1; 1733 break; 1734 1735 case fcNaN: 1736 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); 1737 dst += sizeof NaNU - 1; 1738 break; 1739 1740 case fcZero: 1741 *dst++ = '0'; 1742 *dst++ = upperCase ? 'X': 'x'; 1743 *dst++ = '0'; 1744 if (hexDigits > 1) { 1745 *dst++ = '.'; 1746 memset (dst, '0', hexDigits - 1); 1747 dst += hexDigits - 1; 1748 } 1749 *dst++ = upperCase ? 'P': 'p'; 1750 *dst++ = '0'; 1751 break; 1752 1753 case fcNormal: 1754 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); 1755 break; 1756 } 1757 1758 *dst = 0; 1759 1760 return dst - p; 1761} 1762 1763/* Does the hard work of outputting the correctly rounded hexadecimal 1764 form of a normal floating point number with the specified number of 1765 hexadecimal digits. If HEXDIGITS is zero the minimum number of 1766 digits necessary to print the value precisely is output. */ 1767char * 1768APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, 1769 bool upperCase, 1770 roundingMode rounding_mode) const 1771{ 1772 unsigned int count, valueBits, shift, partsCount, outputDigits; 1773 const char *hexDigitChars; 1774 const integerPart *significand; 1775 char *p; 1776 bool roundUp; 1777 1778 *dst++ = '0'; 1779 *dst++ = upperCase ? 'X': 'x'; 1780 1781 roundUp = false; 1782 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower; 1783 1784 significand = significandParts(); 1785 partsCount = partCount(); 1786 1787 /* +3 because the first digit only uses the single integer bit, so 1788 we have 3 virtual zero most-significant-bits. */ 1789 valueBits = semantics->precision + 3; 1790 shift = integerPartWidth - valueBits % integerPartWidth; 1791 1792 /* The natural number of digits required ignoring trailing 1793 insignificant zeroes. */ 1794 outputDigits = (valueBits - significandLSB () + 3) / 4; 1795 1796 /* hexDigits of zero means use the required number for the 1797 precision. Otherwise, see if we are truncating. If we are, 1798 find out if we need to round away from zero. */ 1799 if (hexDigits) { 1800 if (hexDigits < outputDigits) { 1801 /* We are dropping non-zero bits, so need to check how to round. 1802 "bits" is the number of dropped bits. */ 1803 unsigned int bits; 1804 lostFraction fraction; 1805 1806 bits = valueBits - hexDigits * 4; 1807 fraction = lostFractionThroughTruncation (significand, partsCount, bits); 1808 roundUp = roundAwayFromZero(rounding_mode, fraction, bits); 1809 } 1810 outputDigits = hexDigits; 1811 } 1812 1813 /* Write the digits consecutively, and start writing in the location 1814 of the hexadecimal point. We move the most significant digit 1815 left and add the hexadecimal point later. */ 1816 p = ++dst; 1817 1818 count = (valueBits + integerPartWidth - 1) / integerPartWidth; 1819 1820 while (outputDigits && count) { 1821 integerPart part; 1822 1823 /* Put the most significant integerPartWidth bits in "part". */ 1824 if (--count == partsCount) 1825 part = 0; /* An imaginary higher zero part. */ 1826 else 1827 part = significand[count] << shift; 1828 1829 if (count && shift) 1830 part |= significand[count - 1] >> (integerPartWidth - shift); 1831 1832 /* Convert as much of "part" to hexdigits as we can. */ 1833 unsigned int curDigits = integerPartWidth / 4; 1834 1835 if (curDigits > outputDigits) 1836 curDigits = outputDigits; 1837 dst += partAsHex (dst, part, curDigits, hexDigitChars); 1838 outputDigits -= curDigits; 1839 } 1840 1841 if (roundUp) { 1842 char *q = dst; 1843 1844 /* Note that hexDigitChars has a trailing '0'. */ 1845 do { 1846 q--; 1847 *q = hexDigitChars[hexDigitValue (*q) + 1]; 1848 } while (*q == '0'); 1849 assert (q >= p); 1850 } else { 1851 /* Add trailing zeroes. */ 1852 memset (dst, '0', outputDigits); 1853 dst += outputDigits; 1854 } 1855 1856 /* Move the most significant digit to before the point, and if there 1857 is something after the decimal point add it. This must come 1858 after rounding above. */ 1859 p[-1] = p[0]; 1860 if (dst -1 == p) 1861 dst--; 1862 else 1863 p[0] = '.'; 1864 1865 /* Finally output the exponent. */ 1866 *dst++ = upperCase ? 'P': 'p'; 1867 1868 return writeDecimalExponent (dst, exponent); 1869} 1870 1871// For good performance it is desirable for different APFloats 1872// to produce different integers. 1873uint32_t 1874APFloat::getHashValue() const 1875{ 1876 if (category==fcZero) return sign<<8 | semantics->precision ; 1877 else if (category==fcInfinity) return sign<<9 | semantics->precision; 1878 else if (category==fcNaN) return 1<<10 | semantics->precision; 1879 else { 1880 uint32_t hash = sign<<11 | semantics->precision | exponent<<12; 1881 const integerPart* p = significandParts(); 1882 for (int i=partCount(); i>0; i--, p++) 1883 hash ^= ((uint32_t)*p) ^ (*p)>>32; 1884 return hash; 1885 } 1886} 1887 1888// Conversion from APFloat to/from host float/double. It may eventually be 1889// possible to eliminate these and have everybody deal with APFloats, but that 1890// will take a while. This approach will not easily extend to long double. 1891// Current implementation requires integerPartWidth==64, which is correct at 1892// the moment but could be made more general. 1893 1894// Denormals have exponent minExponent in APFloat, but minExponent-1 in 1895// the actual IEEE respresentations. We compensate for that here. 1896 1897APInt 1898APFloat::convertF80LongDoubleAPFloatToAPInt() const 1899{ 1900 assert(semantics == (const llvm::fltSemantics* const)&x87DoubleExtended); 1901 assert (partCount()==2); 1902 1903 uint64_t myexponent, mysignificand; 1904 1905 if (category==fcNormal) { 1906 myexponent = exponent+16383; //bias 1907 mysignificand = significandParts()[0]; 1908 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) 1909 myexponent = 0; // denormal 1910 } else if (category==fcZero) { 1911 myexponent = 0; 1912 mysignificand = 0; 1913 } else if (category==fcInfinity) { 1914 myexponent = 0x7fff; 1915 mysignificand = 0x8000000000000000ULL; 1916 } else { 1917 assert(category == fcNaN && "Unknown category"); 1918 myexponent = 0x7fff; 1919 mysignificand = significandParts()[0]; 1920 } 1921 1922 uint64_t words[2]; 1923 words[0] = (((uint64_t)sign & 1) << 63) | 1924 ((myexponent & 0x7fff) << 48) | 1925 ((mysignificand >>16) & 0xffffffffffffLL); 1926 words[1] = mysignificand & 0xffff; 1927 return APInt(80, 2, words); 1928} 1929 1930APInt 1931APFloat::convertDoubleAPFloatToAPInt() const 1932{ 1933 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble); 1934 assert (partCount()==1); 1935 1936 uint64_t myexponent, mysignificand; 1937 1938 if (category==fcNormal) { 1939 myexponent = exponent+1023; //bias 1940 mysignificand = *significandParts(); 1941 if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) 1942 myexponent = 0; // denormal 1943 } else if (category==fcZero) { 1944 myexponent = 0; 1945 mysignificand = 0; 1946 } else if (category==fcInfinity) { 1947 myexponent = 0x7ff; 1948 mysignificand = 0; 1949 } else { 1950 assert(category == fcNaN && "Unknown category!"); 1951 myexponent = 0x7ff; 1952 mysignificand = *significandParts(); 1953 } 1954 1955 return APInt(64, (((((uint64_t)sign & 1) << 63) | 1956 ((myexponent & 0x7ff) << 52) | 1957 (mysignificand & 0xfffffffffffffLL)))); 1958} 1959 1960APInt 1961APFloat::convertFloatAPFloatToAPInt() const 1962{ 1963 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle); 1964 assert (partCount()==1); 1965 1966 uint32_t myexponent, mysignificand; 1967 1968 if (category==fcNormal) { 1969 myexponent = exponent+127; //bias 1970 mysignificand = *significandParts(); 1971 if (myexponent == 1 && !(mysignificand & 0x400000)) 1972 myexponent = 0; // denormal 1973 } else if (category==fcZero) { 1974 myexponent = 0; 1975 mysignificand = 0; 1976 } else if (category==fcInfinity) { 1977 myexponent = 0xff; 1978 mysignificand = 0; 1979 } else { 1980 assert(category == fcNaN && "Unknown category!"); 1981 myexponent = 0xff; 1982 mysignificand = *significandParts(); 1983 } 1984 1985 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) | 1986 (mysignificand & 0x7fffff))); 1987} 1988 1989APInt 1990APFloat::convertToAPInt() const 1991{ 1992 if (semantics == (const llvm::fltSemantics* const)&IEEEsingle) 1993 return convertFloatAPFloatToAPInt(); 1994 1995 if (semantics == (const llvm::fltSemantics* const)&IEEEdouble) 1996 return convertDoubleAPFloatToAPInt(); 1997 1998 assert(semantics == (const llvm::fltSemantics* const)&x87DoubleExtended && 1999 "unknown format!"); 2000 return convertF80LongDoubleAPFloatToAPInt(); 2001} 2002 2003float 2004APFloat::convertToFloat() const 2005{ 2006 assert(semantics == (const llvm::fltSemantics* const)&IEEEsingle); 2007 APInt api = convertToAPInt(); 2008 return api.bitsToFloat(); 2009} 2010 2011double 2012APFloat::convertToDouble() const 2013{ 2014 assert(semantics == (const llvm::fltSemantics* const)&IEEEdouble); 2015 APInt api = convertToAPInt(); 2016 return api.bitsToDouble(); 2017} 2018 2019/// Integer bit is explicit in this format. Current Intel book does not 2020/// define meaning of: 2021/// exponent = all 1's, integer bit not set. 2022/// exponent = 0, integer bit set. (formerly "psuedodenormals") 2023/// exponent!=0 nor all 1's, integer bit not set. (formerly "unnormals") 2024void 2025APFloat::initFromF80LongDoubleAPInt(const APInt &api) 2026{ 2027 assert(api.getBitWidth()==80); 2028 uint64_t i1 = api.getRawData()[0]; 2029 uint64_t i2 = api.getRawData()[1]; 2030 uint64_t myexponent = (i1 >> 48) & 0x7fff; 2031 uint64_t mysignificand = ((i1 << 16) & 0xffffffffffff0000ULL) | 2032 (i2 & 0xffff); 2033 2034 initialize(&APFloat::x87DoubleExtended); 2035 assert(partCount()==2); 2036 2037 sign = i1>>63; 2038 if (myexponent==0 && mysignificand==0) { 2039 // exponent, significand meaningless 2040 category = fcZero; 2041 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { 2042 // exponent, significand meaningless 2043 category = fcInfinity; 2044 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) { 2045 // exponent meaningless 2046 category = fcNaN; 2047 significandParts()[0] = mysignificand; 2048 significandParts()[1] = 0; 2049 } else { 2050 category = fcNormal; 2051 exponent = myexponent - 16383; 2052 significandParts()[0] = mysignificand; 2053 significandParts()[1] = 0; 2054 if (myexponent==0) // denormal 2055 exponent = -16382; 2056 } 2057} 2058 2059void 2060APFloat::initFromDoubleAPInt(const APInt &api) 2061{ 2062 assert(api.getBitWidth()==64); 2063 uint64_t i = *api.getRawData(); 2064 uint64_t myexponent = (i >> 52) & 0x7ff; 2065 uint64_t mysignificand = i & 0xfffffffffffffLL; 2066 2067 initialize(&APFloat::IEEEdouble); 2068 assert(partCount()==1); 2069 2070 sign = i>>63; 2071 if (myexponent==0 && mysignificand==0) { 2072 // exponent, significand meaningless 2073 category = fcZero; 2074 } else if (myexponent==0x7ff && mysignificand==0) { 2075 // exponent, significand meaningless 2076 category = fcInfinity; 2077 } else if (myexponent==0x7ff && mysignificand!=0) { 2078 // exponent meaningless 2079 category = fcNaN; 2080 *significandParts() = mysignificand; 2081 } else { 2082 category = fcNormal; 2083 exponent = myexponent - 1023; 2084 *significandParts() = mysignificand; 2085 if (myexponent==0) // denormal 2086 exponent = -1022; 2087 else 2088 *significandParts() |= 0x10000000000000LL; // integer bit 2089 } 2090} 2091 2092void 2093APFloat::initFromFloatAPInt(const APInt & api) 2094{ 2095 assert(api.getBitWidth()==32); 2096 uint32_t i = (uint32_t)*api.getRawData(); 2097 uint32_t myexponent = (i >> 23) & 0xff; 2098 uint32_t mysignificand = i & 0x7fffff; 2099 2100 initialize(&APFloat::IEEEsingle); 2101 assert(partCount()==1); 2102 2103 sign = i >> 31; 2104 if (myexponent==0 && mysignificand==0) { 2105 // exponent, significand meaningless 2106 category = fcZero; 2107 } else if (myexponent==0xff && mysignificand==0) { 2108 // exponent, significand meaningless 2109 category = fcInfinity; 2110 } else if (myexponent==0xff && mysignificand!=0) { 2111 // sign, exponent, significand meaningless 2112 category = fcNaN; 2113 *significandParts() = mysignificand; 2114 } else { 2115 category = fcNormal; 2116 exponent = myexponent - 127; //bias 2117 *significandParts() = mysignificand; 2118 if (myexponent==0) // denormal 2119 exponent = -126; 2120 else 2121 *significandParts() |= 0x800000; // integer bit 2122 } 2123} 2124 2125/// Treat api as containing the bits of a floating point number. Currently 2126/// we infer the floating point type from the size of the APInt. FIXME: This 2127/// breaks when we get to PPC128 and IEEE128 (but both cannot exist in the 2128/// same compile...) 2129void 2130APFloat::initFromAPInt(const APInt& api) 2131{ 2132 if (api.getBitWidth() == 32) 2133 return initFromFloatAPInt(api); 2134 else if (api.getBitWidth()==64) 2135 return initFromDoubleAPInt(api); 2136 else if (api.getBitWidth()==80) 2137 return initFromF80LongDoubleAPInt(api); 2138 else 2139 assert(0); 2140} 2141 2142APFloat::APFloat(const APInt& api) 2143{ 2144 initFromAPInt(api); 2145} 2146 2147APFloat::APFloat(float f) 2148{ 2149 APInt api = APInt(32, 0); 2150 initFromAPInt(api.floatToBits(f)); 2151} 2152 2153APFloat::APFloat(double d) 2154{ 2155 APInt api = APInt(64, 0); 2156 initFromAPInt(api.doubleToBits(d)); 2157} 2158