APFloat.cpp revision 92f7e8d92581e031beeee3dc6c170f5c2cc7ea18
1//===-- APFloat.cpp - Implement APFloat class -----------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Neil Booth and is distributed under the 6// University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements a class to represent arbitrary precision floating 11// point values and provide a variety of arithmetic operations on them. 12// 13//===----------------------------------------------------------------------===// 14 15#include <cassert> 16#include <cstring> 17#include "llvm/ADT/APFloat.h" 18#include "llvm/Support/MathExtras.h" 19 20using namespace llvm; 21 22#define convolve(lhs, rhs) ((lhs) * 4 + (rhs)) 23 24/* Assumed in hexadecimal significand parsing, and conversion to 25 hexadecimal strings. */ 26COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0); 27 28namespace llvm { 29 30 /* Represents floating point arithmetic semantics. */ 31 struct fltSemantics { 32 /* The largest E such that 2^E is representable; this matches the 33 definition of IEEE 754. */ 34 exponent_t maxExponent; 35 36 /* The smallest E such that 2^E is a normalized number; this 37 matches the definition of IEEE 754. */ 38 exponent_t minExponent; 39 40 /* Number of bits in the significand. This includes the integer 41 bit. */ 42 unsigned char precision; 43 44 /* If the target format has an implicit integer bit. */ 45 bool implicitIntegerBit; 46 }; 47 48 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true }; 49 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true }; 50 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true }; 51 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, false }; 52 const fltSemantics APFloat::Bogus = { 0, 0, 0, false }; 53} 54 55/* Put a bunch of private, handy routines in an anonymous namespace. */ 56namespace { 57 58 inline unsigned int 59 partCountForBits(unsigned int bits) 60 { 61 return ((bits) + integerPartWidth - 1) / integerPartWidth; 62 } 63 64 unsigned int 65 digitValue(unsigned int c) 66 { 67 unsigned int r; 68 69 r = c - '0'; 70 if(r <= 9) 71 return r; 72 73 return -1U; 74 } 75 76 unsigned int 77 hexDigitValue (unsigned int c) 78 { 79 unsigned int r; 80 81 r = c - '0'; 82 if(r <= 9) 83 return r; 84 85 r = c - 'A'; 86 if(r <= 5) 87 return r + 10; 88 89 r = c - 'a'; 90 if(r <= 5) 91 return r + 10; 92 93 return -1U; 94 } 95 96 /* This is ugly and needs cleaning up, but I don't immediately see 97 how whilst remaining safe. */ 98 static int 99 totalExponent(const char *p, int exponentAdjustment) 100 { 101 integerPart unsignedExponent; 102 bool negative, overflow; 103 long exponent; 104 105 /* Move past the exponent letter and sign to the digits. */ 106 p++; 107 negative = *p == '-'; 108 if(*p == '-' || *p == '+') 109 p++; 110 111 unsignedExponent = 0; 112 overflow = false; 113 for(;;) { 114 unsigned int value; 115 116 value = digitValue(*p); 117 if(value == -1U) 118 break; 119 120 p++; 121 unsignedExponent = unsignedExponent * 10 + value; 122 if(unsignedExponent > 65535) 123 overflow = true; 124 } 125 126 if(exponentAdjustment > 65535 || exponentAdjustment < -65536) 127 overflow = true; 128 129 if(!overflow) { 130 exponent = unsignedExponent; 131 if(negative) 132 exponent = -exponent; 133 exponent += exponentAdjustment; 134 if(exponent > 65535 || exponent < -65536) 135 overflow = true; 136 } 137 138 if(overflow) 139 exponent = negative ? -65536: 65535; 140 141 return exponent; 142 } 143 144 const char * 145 skipLeadingZeroesAndAnyDot(const char *p, const char **dot) 146 { 147 *dot = 0; 148 while(*p == '0') 149 p++; 150 151 if(*p == '.') { 152 *dot = p++; 153 while(*p == '0') 154 p++; 155 } 156 157 return p; 158 } 159 160 /* Return the trailing fraction of a hexadecimal number. 161 DIGITVALUE is the first hex digit of the fraction, P points to 162 the next digit. */ 163 lostFraction 164 trailingHexadecimalFraction(const char *p, unsigned int digitValue) 165 { 166 unsigned int hexDigit; 167 168 /* If the first trailing digit isn't 0 or 8 we can work out the 169 fraction immediately. */ 170 if(digitValue > 8) 171 return lfMoreThanHalf; 172 else if(digitValue < 8 && digitValue > 0) 173 return lfLessThanHalf; 174 175 /* Otherwise we need to find the first non-zero digit. */ 176 while(*p == '0') 177 p++; 178 179 hexDigit = hexDigitValue(*p); 180 181 /* If we ran off the end it is exactly zero or one-half, otherwise 182 a little more. */ 183 if(hexDigit == -1U) 184 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; 185 else 186 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; 187 } 188 189 /* Return the fraction lost were a bignum truncated losing the least 190 significant BITS bits. */ 191 lostFraction 192 lostFractionThroughTruncation(const integerPart *parts, 193 unsigned int partCount, 194 unsigned int bits) 195 { 196 unsigned int lsb; 197 198 lsb = APInt::tcLSB(parts, partCount); 199 200 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */ 201 if(bits <= lsb) 202 return lfExactlyZero; 203 if(bits == lsb + 1) 204 return lfExactlyHalf; 205 if(bits <= partCount * integerPartWidth 206 && APInt::tcExtractBit(parts, bits - 1)) 207 return lfMoreThanHalf; 208 209 return lfLessThanHalf; 210 } 211 212 /* Shift DST right BITS bits noting lost fraction. */ 213 lostFraction 214 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits) 215 { 216 lostFraction lost_fraction; 217 218 lost_fraction = lostFractionThroughTruncation(dst, parts, bits); 219 220 APInt::tcShiftRight(dst, parts, bits); 221 222 return lost_fraction; 223 } 224 225 226 /* Zero at the end to avoid modular arithmetic when adding one; used 227 when rounding up during hexadecimal output. */ 228 static const char hexDigitsLower[] = "0123456789abcdef0"; 229 static const char hexDigitsUpper[] = "0123456789ABCDEF0"; 230 static const char infinityL[] = "infinity"; 231 static const char infinityU[] = "INFINITY"; 232 static const char NaNL[] = "nan"; 233 static const char NaNU[] = "NAN"; 234 235 /* Write out an integerPart in hexadecimal, starting with the most 236 significant nibble. Write out exactly COUNT hexdigits, return 237 COUNT. */ 238 static unsigned int 239 partAsHex (char *dst, integerPart part, unsigned int count, 240 const char *hexDigitChars) 241 { 242 unsigned int result = count; 243 244 assert (count != 0 && count <= integerPartWidth / 4); 245 246 part >>= (integerPartWidth - 4 * count); 247 while (count--) { 248 dst[count] = hexDigitChars[part & 0xf]; 249 part >>= 4; 250 } 251 252 return result; 253 } 254 255 /* Write out an unsigned decimal integer. */ 256 static char * 257 writeUnsignedDecimal (char *dst, unsigned int n) 258 { 259 char buff[40], *p; 260 261 p = buff; 262 do 263 *p++ = '0' + n % 10; 264 while (n /= 10); 265 266 do 267 *dst++ = *--p; 268 while (p != buff); 269 270 return dst; 271 } 272 273 /* Write out a signed decimal integer. */ 274 static char * 275 writeSignedDecimal (char *dst, int value) 276 { 277 if (value < 0) { 278 *dst++ = '-'; 279 dst = writeUnsignedDecimal(dst, -(unsigned) value); 280 } else 281 dst = writeUnsignedDecimal(dst, value); 282 283 return dst; 284 } 285} 286 287/* Constructors. */ 288void 289APFloat::initialize(const fltSemantics *ourSemantics) 290{ 291 unsigned int count; 292 293 semantics = ourSemantics; 294 count = partCount(); 295 if(count > 1) 296 significand.parts = new integerPart[count]; 297} 298 299void 300APFloat::freeSignificand() 301{ 302 if(partCount() > 1) 303 delete [] significand.parts; 304} 305 306void 307APFloat::assign(const APFloat &rhs) 308{ 309 assert(semantics == rhs.semantics); 310 311 sign = rhs.sign; 312 category = rhs.category; 313 exponent = rhs.exponent; 314 if(category == fcNormal || category == fcNaN) 315 copySignificand(rhs); 316} 317 318void 319APFloat::copySignificand(const APFloat &rhs) 320{ 321 assert(category == fcNormal || category == fcNaN); 322 assert(rhs.partCount() >= partCount()); 323 324 APInt::tcAssign(significandParts(), rhs.significandParts(), 325 partCount()); 326} 327 328APFloat & 329APFloat::operator=(const APFloat &rhs) 330{ 331 if(this != &rhs) { 332 if(semantics != rhs.semantics) { 333 freeSignificand(); 334 initialize(rhs.semantics); 335 } 336 assign(rhs); 337 } 338 339 return *this; 340} 341 342bool 343APFloat::bitwiseIsEqual(const APFloat &rhs) const { 344 if (this == &rhs) 345 return true; 346 if (semantics != rhs.semantics || 347 category != rhs.category || 348 sign != rhs.sign) 349 return false; 350 if (category==fcZero || category==fcInfinity) 351 return true; 352 else if (category==fcNormal && exponent!=rhs.exponent) 353 return false; 354 else { 355 int i= partCount(); 356 const integerPart* p=significandParts(); 357 const integerPart* q=rhs.significandParts(); 358 for (; i>0; i--, p++, q++) { 359 if (*p != *q) 360 return false; 361 } 362 return true; 363 } 364} 365 366APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) 367{ 368 initialize(&ourSemantics); 369 sign = 0; 370 zeroSignificand(); 371 exponent = ourSemantics.precision - 1; 372 significandParts()[0] = value; 373 normalize(rmNearestTiesToEven, lfExactlyZero); 374} 375 376APFloat::APFloat(const fltSemantics &ourSemantics, 377 fltCategory ourCategory, bool negative) 378{ 379 initialize(&ourSemantics); 380 category = ourCategory; 381 sign = negative; 382 if(category == fcNormal) 383 category = fcZero; 384} 385 386APFloat::APFloat(const fltSemantics &ourSemantics, const char *text) 387{ 388 initialize(&ourSemantics); 389 convertFromString(text, rmNearestTiesToEven); 390} 391 392APFloat::APFloat(const APFloat &rhs) 393{ 394 initialize(rhs.semantics); 395 assign(rhs); 396} 397 398APFloat::~APFloat() 399{ 400 freeSignificand(); 401} 402 403unsigned int 404APFloat::partCount() const 405{ 406 return partCountForBits(semantics->precision + 1); 407} 408 409unsigned int 410APFloat::semanticsPrecision(const fltSemantics &semantics) 411{ 412 return semantics.precision; 413} 414 415const integerPart * 416APFloat::significandParts() const 417{ 418 return const_cast<APFloat *>(this)->significandParts(); 419} 420 421integerPart * 422APFloat::significandParts() 423{ 424 assert(category == fcNormal || category == fcNaN); 425 426 if(partCount() > 1) 427 return significand.parts; 428 else 429 return &significand.part; 430} 431 432/* Combine the effect of two lost fractions. */ 433lostFraction 434APFloat::combineLostFractions(lostFraction moreSignificant, 435 lostFraction lessSignificant) 436{ 437 if(lessSignificant != lfExactlyZero) { 438 if(moreSignificant == lfExactlyZero) 439 moreSignificant = lfLessThanHalf; 440 else if(moreSignificant == lfExactlyHalf) 441 moreSignificant = lfMoreThanHalf; 442 } 443 444 return moreSignificant; 445} 446 447void 448APFloat::zeroSignificand() 449{ 450 category = fcNormal; 451 APInt::tcSet(significandParts(), 0, partCount()); 452} 453 454/* Increment an fcNormal floating point number's significand. */ 455void 456APFloat::incrementSignificand() 457{ 458 integerPart carry; 459 460 carry = APInt::tcIncrement(significandParts(), partCount()); 461 462 /* Our callers should never cause us to overflow. */ 463 assert(carry == 0); 464} 465 466/* Add the significand of the RHS. Returns the carry flag. */ 467integerPart 468APFloat::addSignificand(const APFloat &rhs) 469{ 470 integerPart *parts; 471 472 parts = significandParts(); 473 474 assert(semantics == rhs.semantics); 475 assert(exponent == rhs.exponent); 476 477 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount()); 478} 479 480/* Subtract the significand of the RHS with a borrow flag. Returns 481 the borrow flag. */ 482integerPart 483APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow) 484{ 485 integerPart *parts; 486 487 parts = significandParts(); 488 489 assert(semantics == rhs.semantics); 490 assert(exponent == rhs.exponent); 491 492 return APInt::tcSubtract(parts, rhs.significandParts(), borrow, 493 partCount()); 494} 495 496/* Multiply the significand of the RHS. If ADDEND is non-NULL, add it 497 on to the full-precision result of the multiplication. Returns the 498 lost fraction. */ 499lostFraction 500APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) 501{ 502 unsigned int omsb; // One, not zero, based MSB. 503 unsigned int partsCount, newPartsCount, precision; 504 integerPart *lhsSignificand; 505 integerPart scratch[4]; 506 integerPart *fullSignificand; 507 lostFraction lost_fraction; 508 509 assert(semantics == rhs.semantics); 510 511 precision = semantics->precision; 512 newPartsCount = partCountForBits(precision * 2); 513 514 if(newPartsCount > 4) 515 fullSignificand = new integerPart[newPartsCount]; 516 else 517 fullSignificand = scratch; 518 519 lhsSignificand = significandParts(); 520 partsCount = partCount(); 521 522 APInt::tcFullMultiply(fullSignificand, lhsSignificand, 523 rhs.significandParts(), partsCount, partsCount); 524 525 lost_fraction = lfExactlyZero; 526 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 527 exponent += rhs.exponent; 528 529 if(addend) { 530 Significand savedSignificand = significand; 531 const fltSemantics *savedSemantics = semantics; 532 fltSemantics extendedSemantics; 533 opStatus status; 534 unsigned int extendedPrecision; 535 536 /* Normalize our MSB. */ 537 extendedPrecision = precision + precision - 1; 538 if(omsb != extendedPrecision) 539 { 540 APInt::tcShiftLeft(fullSignificand, newPartsCount, 541 extendedPrecision - omsb); 542 exponent -= extendedPrecision - omsb; 543 } 544 545 /* Create new semantics. */ 546 extendedSemantics = *semantics; 547 extendedSemantics.precision = extendedPrecision; 548 549 if(newPartsCount == 1) 550 significand.part = fullSignificand[0]; 551 else 552 significand.parts = fullSignificand; 553 semantics = &extendedSemantics; 554 555 APFloat extendedAddend(*addend); 556 status = extendedAddend.convert(extendedSemantics, rmTowardZero); 557 assert(status == opOK); 558 lost_fraction = addOrSubtractSignificand(extendedAddend, false); 559 560 /* Restore our state. */ 561 if(newPartsCount == 1) 562 fullSignificand[0] = significand.part; 563 significand = savedSignificand; 564 semantics = savedSemantics; 565 566 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 567 } 568 569 exponent -= (precision - 1); 570 571 if(omsb > precision) { 572 unsigned int bits, significantParts; 573 lostFraction lf; 574 575 bits = omsb - precision; 576 significantParts = partCountForBits(omsb); 577 lf = shiftRight(fullSignificand, significantParts, bits); 578 lost_fraction = combineLostFractions(lf, lost_fraction); 579 exponent += bits; 580 } 581 582 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); 583 584 if(newPartsCount > 4) 585 delete [] fullSignificand; 586 587 return lost_fraction; 588} 589 590/* Multiply the significands of LHS and RHS to DST. */ 591lostFraction 592APFloat::divideSignificand(const APFloat &rhs) 593{ 594 unsigned int bit, i, partsCount; 595 const integerPart *rhsSignificand; 596 integerPart *lhsSignificand, *dividend, *divisor; 597 integerPart scratch[4]; 598 lostFraction lost_fraction; 599 600 assert(semantics == rhs.semantics); 601 602 lhsSignificand = significandParts(); 603 rhsSignificand = rhs.significandParts(); 604 partsCount = partCount(); 605 606 if(partsCount > 2) 607 dividend = new integerPart[partsCount * 2]; 608 else 609 dividend = scratch; 610 611 divisor = dividend + partsCount; 612 613 /* Copy the dividend and divisor as they will be modified in-place. */ 614 for(i = 0; i < partsCount; i++) { 615 dividend[i] = lhsSignificand[i]; 616 divisor[i] = rhsSignificand[i]; 617 lhsSignificand[i] = 0; 618 } 619 620 exponent -= rhs.exponent; 621 622 unsigned int precision = semantics->precision; 623 624 /* Normalize the divisor. */ 625 bit = precision - APInt::tcMSB(divisor, partsCount) - 1; 626 if(bit) { 627 exponent += bit; 628 APInt::tcShiftLeft(divisor, partsCount, bit); 629 } 630 631 /* Normalize the dividend. */ 632 bit = precision - APInt::tcMSB(dividend, partsCount) - 1; 633 if(bit) { 634 exponent -= bit; 635 APInt::tcShiftLeft(dividend, partsCount, bit); 636 } 637 638 if(APInt::tcCompare(dividend, divisor, partsCount) < 0) { 639 exponent--; 640 APInt::tcShiftLeft(dividend, partsCount, 1); 641 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); 642 } 643 644 /* Long division. */ 645 for(bit = precision; bit; bit -= 1) { 646 if(APInt::tcCompare(dividend, divisor, partsCount) >= 0) { 647 APInt::tcSubtract(dividend, divisor, 0, partsCount); 648 APInt::tcSetBit(lhsSignificand, bit - 1); 649 } 650 651 APInt::tcShiftLeft(dividend, partsCount, 1); 652 } 653 654 /* Figure out the lost fraction. */ 655 int cmp = APInt::tcCompare(dividend, divisor, partsCount); 656 657 if(cmp > 0) 658 lost_fraction = lfMoreThanHalf; 659 else if(cmp == 0) 660 lost_fraction = lfExactlyHalf; 661 else if(APInt::tcIsZero(dividend, partsCount)) 662 lost_fraction = lfExactlyZero; 663 else 664 lost_fraction = lfLessThanHalf; 665 666 if(partsCount > 2) 667 delete [] dividend; 668 669 return lost_fraction; 670} 671 672unsigned int 673APFloat::significandMSB() const 674{ 675 return APInt::tcMSB(significandParts(), partCount()); 676} 677 678unsigned int 679APFloat::significandLSB() const 680{ 681 return APInt::tcLSB(significandParts(), partCount()); 682} 683 684/* Note that a zero result is NOT normalized to fcZero. */ 685lostFraction 686APFloat::shiftSignificandRight(unsigned int bits) 687{ 688 /* Our exponent should not overflow. */ 689 assert((exponent_t) (exponent + bits) >= exponent); 690 691 exponent += bits; 692 693 return shiftRight(significandParts(), partCount(), bits); 694} 695 696/* Shift the significand left BITS bits, subtract BITS from its exponent. */ 697void 698APFloat::shiftSignificandLeft(unsigned int bits) 699{ 700 assert(bits < semantics->precision); 701 702 if(bits) { 703 unsigned int partsCount = partCount(); 704 705 APInt::tcShiftLeft(significandParts(), partsCount, bits); 706 exponent -= bits; 707 708 assert(!APInt::tcIsZero(significandParts(), partsCount)); 709 } 710} 711 712APFloat::cmpResult 713APFloat::compareAbsoluteValue(const APFloat &rhs) const 714{ 715 int compare; 716 717 assert(semantics == rhs.semantics); 718 assert(category == fcNormal); 719 assert(rhs.category == fcNormal); 720 721 compare = exponent - rhs.exponent; 722 723 /* If exponents are equal, do an unsigned bignum comparison of the 724 significands. */ 725 if(compare == 0) 726 compare = APInt::tcCompare(significandParts(), rhs.significandParts(), 727 partCount()); 728 729 if(compare > 0) 730 return cmpGreaterThan; 731 else if(compare < 0) 732 return cmpLessThan; 733 else 734 return cmpEqual; 735} 736 737/* Handle overflow. Sign is preserved. We either become infinity or 738 the largest finite number. */ 739APFloat::opStatus 740APFloat::handleOverflow(roundingMode rounding_mode) 741{ 742 /* Infinity? */ 743 if(rounding_mode == rmNearestTiesToEven 744 || rounding_mode == rmNearestTiesToAway 745 || (rounding_mode == rmTowardPositive && !sign) 746 || (rounding_mode == rmTowardNegative && sign)) 747 { 748 category = fcInfinity; 749 return (opStatus) (opOverflow | opInexact); 750 } 751 752 /* Otherwise we become the largest finite number. */ 753 category = fcNormal; 754 exponent = semantics->maxExponent; 755 APInt::tcSetLeastSignificantBits(significandParts(), partCount(), 756 semantics->precision); 757 758 return opInexact; 759} 760 761/* Returns TRUE if, when truncating the current number, with BIT the 762 new LSB, with the given lost fraction and rounding mode, the result 763 would need to be rounded away from zero (i.e., by increasing the 764 signficand). This routine must work for fcZero of both signs, and 765 fcNormal numbers. */ 766bool 767APFloat::roundAwayFromZero(roundingMode rounding_mode, 768 lostFraction lost_fraction, 769 unsigned int bit) const 770{ 771 /* NaNs and infinities should not have lost fractions. */ 772 assert(category == fcNormal || category == fcZero); 773 774 /* Current callers never pass this so we don't handle it. */ 775 assert(lost_fraction != lfExactlyZero); 776 777 switch(rounding_mode) { 778 default: 779 assert(0); 780 781 case rmNearestTiesToAway: 782 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; 783 784 case rmNearestTiesToEven: 785 if(lost_fraction == lfMoreThanHalf) 786 return true; 787 788 /* Our zeroes don't have a significand to test. */ 789 if(lost_fraction == lfExactlyHalf && category != fcZero) 790 return APInt::tcExtractBit(significandParts(), bit); 791 792 return false; 793 794 case rmTowardZero: 795 return false; 796 797 case rmTowardPositive: 798 return sign == false; 799 800 case rmTowardNegative: 801 return sign == true; 802 } 803} 804 805APFloat::opStatus 806APFloat::normalize(roundingMode rounding_mode, 807 lostFraction lost_fraction) 808{ 809 unsigned int omsb; /* One, not zero, based MSB. */ 810 int exponentChange; 811 812 if(category != fcNormal) 813 return opOK; 814 815 /* Before rounding normalize the exponent of fcNormal numbers. */ 816 omsb = significandMSB() + 1; 817 818 if(omsb) { 819 /* OMSB is numbered from 1. We want to place it in the integer 820 bit numbered PRECISON if possible, with a compensating change in 821 the exponent. */ 822 exponentChange = omsb - semantics->precision; 823 824 /* If the resulting exponent is too high, overflow according to 825 the rounding mode. */ 826 if(exponent + exponentChange > semantics->maxExponent) 827 return handleOverflow(rounding_mode); 828 829 /* Subnormal numbers have exponent minExponent, and their MSB 830 is forced based on that. */ 831 if(exponent + exponentChange < semantics->minExponent) 832 exponentChange = semantics->minExponent - exponent; 833 834 /* Shifting left is easy as we don't lose precision. */ 835 if(exponentChange < 0) { 836 assert(lost_fraction == lfExactlyZero); 837 838 shiftSignificandLeft(-exponentChange); 839 840 return opOK; 841 } 842 843 if(exponentChange > 0) { 844 lostFraction lf; 845 846 /* Shift right and capture any new lost fraction. */ 847 lf = shiftSignificandRight(exponentChange); 848 849 lost_fraction = combineLostFractions(lf, lost_fraction); 850 851 /* Keep OMSB up-to-date. */ 852 if(omsb > (unsigned) exponentChange) 853 omsb -= (unsigned) exponentChange; 854 else 855 omsb = 0; 856 } 857 } 858 859 /* Now round the number according to rounding_mode given the lost 860 fraction. */ 861 862 /* As specified in IEEE 754, since we do not trap we do not report 863 underflow for exact results. */ 864 if(lost_fraction == lfExactlyZero) { 865 /* Canonicalize zeroes. */ 866 if(omsb == 0) 867 category = fcZero; 868 869 return opOK; 870 } 871 872 /* Increment the significand if we're rounding away from zero. */ 873 if(roundAwayFromZero(rounding_mode, lost_fraction, 0)) { 874 if(omsb == 0) 875 exponent = semantics->minExponent; 876 877 incrementSignificand(); 878 omsb = significandMSB() + 1; 879 880 /* Did the significand increment overflow? */ 881 if(omsb == (unsigned) semantics->precision + 1) { 882 /* Renormalize by incrementing the exponent and shifting our 883 significand right one. However if we already have the 884 maximum exponent we overflow to infinity. */ 885 if(exponent == semantics->maxExponent) { 886 category = fcInfinity; 887 888 return (opStatus) (opOverflow | opInexact); 889 } 890 891 shiftSignificandRight(1); 892 893 return opInexact; 894 } 895 } 896 897 /* The normal case - we were and are not denormal, and any 898 significand increment above didn't overflow. */ 899 if(omsb == semantics->precision) 900 return opInexact; 901 902 /* We have a non-zero denormal. */ 903 assert(omsb < semantics->precision); 904 assert(exponent == semantics->minExponent); 905 906 /* Canonicalize zeroes. */ 907 if(omsb == 0) 908 category = fcZero; 909 910 /* The fcZero case is a denormal that underflowed to zero. */ 911 return (opStatus) (opUnderflow | opInexact); 912} 913 914APFloat::opStatus 915APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract) 916{ 917 switch(convolve(category, rhs.category)) { 918 default: 919 assert(0); 920 921 case convolve(fcNaN, fcZero): 922 case convolve(fcNaN, fcNormal): 923 case convolve(fcNaN, fcInfinity): 924 case convolve(fcNaN, fcNaN): 925 case convolve(fcNormal, fcZero): 926 case convolve(fcInfinity, fcNormal): 927 case convolve(fcInfinity, fcZero): 928 return opOK; 929 930 case convolve(fcZero, fcNaN): 931 case convolve(fcNormal, fcNaN): 932 case convolve(fcInfinity, fcNaN): 933 category = fcNaN; 934 copySignificand(rhs); 935 return opOK; 936 937 case convolve(fcNormal, fcInfinity): 938 case convolve(fcZero, fcInfinity): 939 category = fcInfinity; 940 sign = rhs.sign ^ subtract; 941 return opOK; 942 943 case convolve(fcZero, fcNormal): 944 assign(rhs); 945 sign = rhs.sign ^ subtract; 946 return opOK; 947 948 case convolve(fcZero, fcZero): 949 /* Sign depends on rounding mode; handled by caller. */ 950 return opOK; 951 952 case convolve(fcInfinity, fcInfinity): 953 /* Differently signed infinities can only be validly 954 subtracted. */ 955 if(sign ^ rhs.sign != subtract) { 956 category = fcNaN; 957 // Arbitrary but deterministic value for significand 958 APInt::tcSet(significandParts(), ~0U, partCount()); 959 return opInvalidOp; 960 } 961 962 return opOK; 963 964 case convolve(fcNormal, fcNormal): 965 return opDivByZero; 966 } 967} 968 969/* Add or subtract two normal numbers. */ 970lostFraction 971APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract) 972{ 973 integerPart carry; 974 lostFraction lost_fraction; 975 int bits; 976 977 /* Determine if the operation on the absolute values is effectively 978 an addition or subtraction. */ 979 subtract ^= (sign ^ rhs.sign); 980 981 /* Are we bigger exponent-wise than the RHS? */ 982 bits = exponent - rhs.exponent; 983 984 /* Subtraction is more subtle than one might naively expect. */ 985 if(subtract) { 986 APFloat temp_rhs(rhs); 987 bool reverse; 988 989 if (bits == 0) { 990 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan; 991 lost_fraction = lfExactlyZero; 992 } else if (bits > 0) { 993 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); 994 shiftSignificandLeft(1); 995 reverse = false; 996 } else { 997 lost_fraction = shiftSignificandRight(-bits - 1); 998 temp_rhs.shiftSignificandLeft(1); 999 reverse = true; 1000 } 1001 1002 if (reverse) { 1003 carry = temp_rhs.subtractSignificand 1004 (*this, lost_fraction != lfExactlyZero); 1005 copySignificand(temp_rhs); 1006 sign = !sign; 1007 } else { 1008 carry = subtractSignificand 1009 (temp_rhs, lost_fraction != lfExactlyZero); 1010 } 1011 1012 /* Invert the lost fraction - it was on the RHS and 1013 subtracted. */ 1014 if(lost_fraction == lfLessThanHalf) 1015 lost_fraction = lfMoreThanHalf; 1016 else if(lost_fraction == lfMoreThanHalf) 1017 lost_fraction = lfLessThanHalf; 1018 1019 /* The code above is intended to ensure that no borrow is 1020 necessary. */ 1021 assert(!carry); 1022 } else { 1023 if(bits > 0) { 1024 APFloat temp_rhs(rhs); 1025 1026 lost_fraction = temp_rhs.shiftSignificandRight(bits); 1027 carry = addSignificand(temp_rhs); 1028 } else { 1029 lost_fraction = shiftSignificandRight(-bits); 1030 carry = addSignificand(rhs); 1031 } 1032 1033 /* We have a guard bit; generating a carry cannot happen. */ 1034 assert(!carry); 1035 } 1036 1037 return lost_fraction; 1038} 1039 1040APFloat::opStatus 1041APFloat::multiplySpecials(const APFloat &rhs) 1042{ 1043 switch(convolve(category, rhs.category)) { 1044 default: 1045 assert(0); 1046 1047 case convolve(fcNaN, fcZero): 1048 case convolve(fcNaN, fcNormal): 1049 case convolve(fcNaN, fcInfinity): 1050 case convolve(fcNaN, fcNaN): 1051 return opOK; 1052 1053 case convolve(fcZero, fcNaN): 1054 case convolve(fcNormal, fcNaN): 1055 case convolve(fcInfinity, fcNaN): 1056 category = fcNaN; 1057 copySignificand(rhs); 1058 return opOK; 1059 1060 case convolve(fcNormal, fcInfinity): 1061 case convolve(fcInfinity, fcNormal): 1062 case convolve(fcInfinity, fcInfinity): 1063 category = fcInfinity; 1064 return opOK; 1065 1066 case convolve(fcZero, fcNormal): 1067 case convolve(fcNormal, fcZero): 1068 case convolve(fcZero, fcZero): 1069 category = fcZero; 1070 return opOK; 1071 1072 case convolve(fcZero, fcInfinity): 1073 case convolve(fcInfinity, fcZero): 1074 category = fcNaN; 1075 // Arbitrary but deterministic value for significand 1076 APInt::tcSet(significandParts(), ~0U, partCount()); 1077 return opInvalidOp; 1078 1079 case convolve(fcNormal, fcNormal): 1080 return opOK; 1081 } 1082} 1083 1084APFloat::opStatus 1085APFloat::divideSpecials(const APFloat &rhs) 1086{ 1087 switch(convolve(category, rhs.category)) { 1088 default: 1089 assert(0); 1090 1091 case convolve(fcNaN, fcZero): 1092 case convolve(fcNaN, fcNormal): 1093 case convolve(fcNaN, fcInfinity): 1094 case convolve(fcNaN, fcNaN): 1095 case convolve(fcInfinity, fcZero): 1096 case convolve(fcInfinity, fcNormal): 1097 case convolve(fcZero, fcInfinity): 1098 case convolve(fcZero, fcNormal): 1099 return opOK; 1100 1101 case convolve(fcZero, fcNaN): 1102 case convolve(fcNormal, fcNaN): 1103 case convolve(fcInfinity, fcNaN): 1104 category = fcNaN; 1105 copySignificand(rhs); 1106 return opOK; 1107 1108 case convolve(fcNormal, fcInfinity): 1109 category = fcZero; 1110 return opOK; 1111 1112 case convolve(fcNormal, fcZero): 1113 category = fcInfinity; 1114 return opDivByZero; 1115 1116 case convolve(fcInfinity, fcInfinity): 1117 case convolve(fcZero, fcZero): 1118 category = fcNaN; 1119 // Arbitrary but deterministic value for significand 1120 APInt::tcSet(significandParts(), ~0U, partCount()); 1121 return opInvalidOp; 1122 1123 case convolve(fcNormal, fcNormal): 1124 return opOK; 1125 } 1126} 1127 1128/* Change sign. */ 1129void 1130APFloat::changeSign() 1131{ 1132 /* Look mummy, this one's easy. */ 1133 sign = !sign; 1134} 1135 1136void 1137APFloat::clearSign() 1138{ 1139 /* So is this one. */ 1140 sign = 0; 1141} 1142 1143void 1144APFloat::copySign(const APFloat &rhs) 1145{ 1146 /* And this one. */ 1147 sign = rhs.sign; 1148} 1149 1150/* Normalized addition or subtraction. */ 1151APFloat::opStatus 1152APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode, 1153 bool subtract) 1154{ 1155 opStatus fs; 1156 1157 fs = addOrSubtractSpecials(rhs, subtract); 1158 1159 /* This return code means it was not a simple case. */ 1160 if(fs == opDivByZero) { 1161 lostFraction lost_fraction; 1162 1163 lost_fraction = addOrSubtractSignificand(rhs, subtract); 1164 fs = normalize(rounding_mode, lost_fraction); 1165 1166 /* Can only be zero if we lost no fraction. */ 1167 assert(category != fcZero || lost_fraction == lfExactlyZero); 1168 } 1169 1170 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1171 positive zero unless rounding to minus infinity, except that 1172 adding two like-signed zeroes gives that zero. */ 1173 if(category == fcZero) { 1174 if(rhs.category != fcZero || (sign == rhs.sign) == subtract) 1175 sign = (rounding_mode == rmTowardNegative); 1176 } 1177 1178 return fs; 1179} 1180 1181/* Normalized addition. */ 1182APFloat::opStatus 1183APFloat::add(const APFloat &rhs, roundingMode rounding_mode) 1184{ 1185 return addOrSubtract(rhs, rounding_mode, false); 1186} 1187 1188/* Normalized subtraction. */ 1189APFloat::opStatus 1190APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode) 1191{ 1192 return addOrSubtract(rhs, rounding_mode, true); 1193} 1194 1195/* Normalized multiply. */ 1196APFloat::opStatus 1197APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode) 1198{ 1199 opStatus fs; 1200 1201 sign ^= rhs.sign; 1202 fs = multiplySpecials(rhs); 1203 1204 if(category == fcNormal) { 1205 lostFraction lost_fraction = multiplySignificand(rhs, 0); 1206 fs = normalize(rounding_mode, lost_fraction); 1207 if(lost_fraction != lfExactlyZero) 1208 fs = (opStatus) (fs | opInexact); 1209 } 1210 1211 return fs; 1212} 1213 1214/* Normalized divide. */ 1215APFloat::opStatus 1216APFloat::divide(const APFloat &rhs, roundingMode rounding_mode) 1217{ 1218 opStatus fs; 1219 1220 sign ^= rhs.sign; 1221 fs = divideSpecials(rhs); 1222 1223 if(category == fcNormal) { 1224 lostFraction lost_fraction = divideSignificand(rhs); 1225 fs = normalize(rounding_mode, lost_fraction); 1226 if(lost_fraction != lfExactlyZero) 1227 fs = (opStatus) (fs | opInexact); 1228 } 1229 1230 return fs; 1231} 1232 1233/* Normalized remainder. This is not currently doing TRT. */ 1234APFloat::opStatus 1235APFloat::mod(const APFloat &rhs, roundingMode rounding_mode) 1236{ 1237 opStatus fs; 1238 APFloat V = *this; 1239 unsigned int origSign = sign; 1240 fs = V.divide(rhs, rmNearestTiesToEven); 1241 if (fs == opDivByZero) 1242 return fs; 1243 1244 int parts = partCount(); 1245 integerPart *x = new integerPart[parts]; 1246 fs = V.convertToInteger(x, parts * integerPartWidth, true, 1247 rmNearestTiesToEven); 1248 if (fs==opInvalidOp) 1249 return fs; 1250 1251 fs = V.convertFromInteger(x, parts * integerPartWidth, true, 1252 rmNearestTiesToEven); 1253 assert(fs==opOK); // should always work 1254 1255 fs = V.multiply(rhs, rounding_mode); 1256 assert(fs==opOK || fs==opInexact); // should not overflow or underflow 1257 1258 fs = subtract(V, rounding_mode); 1259 assert(fs==opOK || fs==opInexact); // likewise 1260 1261 if (isZero()) 1262 sign = origSign; // IEEE754 requires this 1263 delete[] x; 1264 return fs; 1265} 1266 1267/* Normalized fused-multiply-add. */ 1268APFloat::opStatus 1269APFloat::fusedMultiplyAdd(const APFloat &multiplicand, 1270 const APFloat &addend, 1271 roundingMode rounding_mode) 1272{ 1273 opStatus fs; 1274 1275 /* Post-multiplication sign, before addition. */ 1276 sign ^= multiplicand.sign; 1277 1278 /* If and only if all arguments are normal do we need to do an 1279 extended-precision calculation. */ 1280 if(category == fcNormal 1281 && multiplicand.category == fcNormal 1282 && addend.category == fcNormal) { 1283 lostFraction lost_fraction; 1284 1285 lost_fraction = multiplySignificand(multiplicand, &addend); 1286 fs = normalize(rounding_mode, lost_fraction); 1287 if(lost_fraction != lfExactlyZero) 1288 fs = (opStatus) (fs | opInexact); 1289 1290 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1291 positive zero unless rounding to minus infinity, except that 1292 adding two like-signed zeroes gives that zero. */ 1293 if(category == fcZero && sign != addend.sign) 1294 sign = (rounding_mode == rmTowardNegative); 1295 } else { 1296 fs = multiplySpecials(multiplicand); 1297 1298 /* FS can only be opOK or opInvalidOp. There is no more work 1299 to do in the latter case. The IEEE-754R standard says it is 1300 implementation-defined in this case whether, if ADDEND is a 1301 quiet NaN, we raise invalid op; this implementation does so. 1302 1303 If we need to do the addition we can do so with normal 1304 precision. */ 1305 if(fs == opOK) 1306 fs = addOrSubtract(addend, rounding_mode, false); 1307 } 1308 1309 return fs; 1310} 1311 1312/* Comparison requires normalized numbers. */ 1313APFloat::cmpResult 1314APFloat::compare(const APFloat &rhs) const 1315{ 1316 cmpResult result; 1317 1318 assert(semantics == rhs.semantics); 1319 1320 switch(convolve(category, rhs.category)) { 1321 default: 1322 assert(0); 1323 1324 case convolve(fcNaN, fcZero): 1325 case convolve(fcNaN, fcNormal): 1326 case convolve(fcNaN, fcInfinity): 1327 case convolve(fcNaN, fcNaN): 1328 case convolve(fcZero, fcNaN): 1329 case convolve(fcNormal, fcNaN): 1330 case convolve(fcInfinity, fcNaN): 1331 return cmpUnordered; 1332 1333 case convolve(fcInfinity, fcNormal): 1334 case convolve(fcInfinity, fcZero): 1335 case convolve(fcNormal, fcZero): 1336 if(sign) 1337 return cmpLessThan; 1338 else 1339 return cmpGreaterThan; 1340 1341 case convolve(fcNormal, fcInfinity): 1342 case convolve(fcZero, fcInfinity): 1343 case convolve(fcZero, fcNormal): 1344 if(rhs.sign) 1345 return cmpGreaterThan; 1346 else 1347 return cmpLessThan; 1348 1349 case convolve(fcInfinity, fcInfinity): 1350 if(sign == rhs.sign) 1351 return cmpEqual; 1352 else if(sign) 1353 return cmpLessThan; 1354 else 1355 return cmpGreaterThan; 1356 1357 case convolve(fcZero, fcZero): 1358 return cmpEqual; 1359 1360 case convolve(fcNormal, fcNormal): 1361 break; 1362 } 1363 1364 /* Two normal numbers. Do they have the same sign? */ 1365 if(sign != rhs.sign) { 1366 if(sign) 1367 result = cmpLessThan; 1368 else 1369 result = cmpGreaterThan; 1370 } else { 1371 /* Compare absolute values; invert result if negative. */ 1372 result = compareAbsoluteValue(rhs); 1373 1374 if(sign) { 1375 if(result == cmpLessThan) 1376 result = cmpGreaterThan; 1377 else if(result == cmpGreaterThan) 1378 result = cmpLessThan; 1379 } 1380 } 1381 1382 return result; 1383} 1384 1385APFloat::opStatus 1386APFloat::convert(const fltSemantics &toSemantics, 1387 roundingMode rounding_mode) 1388{ 1389 lostFraction lostFraction; 1390 unsigned int newPartCount, oldPartCount; 1391 opStatus fs; 1392 1393 lostFraction = lfExactlyZero; 1394 newPartCount = partCountForBits(toSemantics.precision + 1); 1395 oldPartCount = partCount(); 1396 1397 /* Handle storage complications. If our new form is wider, 1398 re-allocate our bit pattern into wider storage. If it is 1399 narrower, we ignore the excess parts, but if narrowing to a 1400 single part we need to free the old storage. 1401 Be careful not to reference significandParts for zeroes 1402 and infinities, since it aborts. */ 1403 if (newPartCount > oldPartCount) { 1404 integerPart *newParts; 1405 newParts = new integerPart[newPartCount]; 1406 APInt::tcSet(newParts, 0, newPartCount); 1407 if (category==fcNormal || category==fcNaN) 1408 APInt::tcAssign(newParts, significandParts(), oldPartCount); 1409 freeSignificand(); 1410 significand.parts = newParts; 1411 } else if (newPartCount < oldPartCount) { 1412 /* Capture any lost fraction through truncation of parts so we get 1413 correct rounding whilst normalizing. */ 1414 if (category==fcNormal) 1415 lostFraction = lostFractionThroughTruncation 1416 (significandParts(), oldPartCount, toSemantics.precision); 1417 if (newPartCount == 1) { 1418 integerPart newPart = 0; 1419 if (category==fcNormal || category==fcNaN) 1420 newPart = significandParts()[0]; 1421 freeSignificand(); 1422 significand.part = newPart; 1423 } 1424 } 1425 1426 if(category == fcNormal) { 1427 /* Re-interpret our bit-pattern. */ 1428 exponent += toSemantics.precision - semantics->precision; 1429 semantics = &toSemantics; 1430 fs = normalize(rounding_mode, lostFraction); 1431 } else if (category == fcNaN) { 1432 int shift = toSemantics.precision - semantics->precision; 1433 // No normalization here, just truncate 1434 if (shift>0) 1435 APInt::tcShiftLeft(significandParts(), newPartCount, shift); 1436 else if (shift < 0) 1437 APInt::tcShiftRight(significandParts(), newPartCount, -shift); 1438 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan) 1439 // does not give you back the same bits. This is dubious, and we 1440 // don't currently do it. You're really supposed to get 1441 // an invalid operation signal at runtime, but nobody does that. 1442 semantics = &toSemantics; 1443 fs = opOK; 1444 } else { 1445 semantics = &toSemantics; 1446 fs = opOK; 1447 } 1448 1449 return fs; 1450} 1451 1452/* Convert a floating point number to an integer according to the 1453 rounding mode. If the rounded integer value is out of range this 1454 returns an invalid operation exception. If the rounded value is in 1455 range but the floating point number is not the exact integer, the C 1456 standard doesn't require an inexact exception to be raised. IEEE 1457 854 does require it so we do that. 1458 1459 Note that for conversions to integer type the C standard requires 1460 round-to-zero to always be used. */ 1461APFloat::opStatus 1462APFloat::convertToInteger(integerPart *parts, unsigned int width, 1463 bool isSigned, 1464 roundingMode rounding_mode) const 1465{ 1466 lostFraction lost_fraction; 1467 unsigned int msb, partsCount; 1468 int bits; 1469 1470 partsCount = partCountForBits(width); 1471 1472 /* Handle the three special cases first. We produce 1473 a deterministic result even for the Invalid cases. */ 1474 if (category == fcNaN) { 1475 // Neither sign nor isSigned affects this. 1476 APInt::tcSet(parts, 0, partsCount); 1477 return opInvalidOp; 1478 } 1479 if (category == fcInfinity) { 1480 if (!sign && isSigned) 1481 APInt::tcSetLeastSignificantBits(parts, partsCount, width-1); 1482 else if (!sign && !isSigned) 1483 APInt::tcSetLeastSignificantBits(parts, partsCount, width); 1484 else if (sign && isSigned) { 1485 APInt::tcSetLeastSignificantBits(parts, partsCount, 1); 1486 APInt::tcShiftLeft(parts, partsCount, width-1); 1487 } else // sign && !isSigned 1488 APInt::tcSet(parts, 0, partsCount); 1489 return opInvalidOp; 1490 } 1491 if (category == fcZero) { 1492 APInt::tcSet(parts, 0, partsCount); 1493 return opOK; 1494 } 1495 1496 /* Shift the bit pattern so the fraction is lost. */ 1497 APFloat tmp(*this); 1498 1499 bits = (int) semantics->precision - 1 - exponent; 1500 1501 if(bits > 0) { 1502 lost_fraction = tmp.shiftSignificandRight(bits); 1503 } else { 1504 if (-bits >= semantics->precision) { 1505 // Unrepresentably large. 1506 if (!sign && isSigned) 1507 APInt::tcSetLeastSignificantBits(parts, partsCount, width-1); 1508 else if (!sign && !isSigned) 1509 APInt::tcSetLeastSignificantBits(parts, partsCount, width); 1510 else if (sign && isSigned) { 1511 APInt::tcSetLeastSignificantBits(parts, partsCount, 1); 1512 APInt::tcShiftLeft(parts, partsCount, width-1); 1513 } else // sign && !isSigned 1514 APInt::tcSet(parts, 0, partsCount); 1515 return (opStatus)(opOverflow | opInexact); 1516 } 1517 tmp.shiftSignificandLeft(-bits); 1518 lost_fraction = lfExactlyZero; 1519 } 1520 1521 if(lost_fraction != lfExactlyZero 1522 && tmp.roundAwayFromZero(rounding_mode, lost_fraction, 0)) 1523 tmp.incrementSignificand(); 1524 1525 msb = tmp.significandMSB(); 1526 1527 /* Negative numbers cannot be represented as unsigned. */ 1528 if(!isSigned && tmp.sign && msb != -1U) 1529 return opInvalidOp; 1530 1531 /* It takes exponent + 1 bits to represent the truncated floating 1532 point number without its sign. We lose a bit for the sign, but 1533 the maximally negative integer is a special case. */ 1534 if(msb + 1 > width) /* !! Not same as msb >= width !! */ 1535 return opInvalidOp; 1536 1537 if(isSigned && msb + 1 == width 1538 && (!tmp.sign || tmp.significandLSB() != msb)) 1539 return opInvalidOp; 1540 1541 APInt::tcAssign(parts, tmp.significandParts(), partsCount); 1542 1543 if(tmp.sign) 1544 APInt::tcNegate(parts, partsCount); 1545 1546 if(lost_fraction == lfExactlyZero) 1547 return opOK; 1548 else 1549 return opInexact; 1550} 1551 1552APFloat::opStatus 1553APFloat::convertFromUnsignedInteger(integerPart *parts, 1554 unsigned int partCount, 1555 roundingMode rounding_mode) 1556{ 1557 unsigned int msb, precision; 1558 lostFraction lost_fraction; 1559 1560 msb = APInt::tcMSB(parts, partCount) + 1; 1561 precision = semantics->precision; 1562 1563 category = fcNormal; 1564 exponent = precision - 1; 1565 1566 if(msb > precision) { 1567 exponent += (msb - precision); 1568 lost_fraction = shiftRight(parts, partCount, msb - precision); 1569 msb = precision; 1570 } else 1571 lost_fraction = lfExactlyZero; 1572 1573 /* Copy the bit image. */ 1574 zeroSignificand(); 1575 APInt::tcAssign(significandParts(), parts, partCountForBits(msb)); 1576 1577 return normalize(rounding_mode, lost_fraction); 1578} 1579 1580APFloat::opStatus 1581APFloat::convertFromInteger(const integerPart *parts, unsigned int width, 1582 bool isSigned, roundingMode rounding_mode) 1583{ 1584 unsigned int partCount = partCountForBits(width); 1585 opStatus status; 1586 APInt api = APInt(width, partCount, parts); 1587 integerPart *copy = new integerPart[partCount]; 1588 1589 sign = false; 1590 if(isSigned && APInt::tcExtractBit(parts, width - 1)) { 1591 sign = true; 1592 api = -api; 1593 } 1594 1595 APInt::tcAssign(copy, api.getRawData(), partCount); 1596 status = convertFromUnsignedInteger(copy, partCount, rounding_mode); 1597 return status; 1598} 1599 1600APFloat::opStatus 1601APFloat::convertFromHexadecimalString(const char *p, 1602 roundingMode rounding_mode) 1603{ 1604 lostFraction lost_fraction; 1605 integerPart *significand; 1606 unsigned int bitPos, partsCount; 1607 const char *dot, *firstSignificantDigit; 1608 1609 zeroSignificand(); 1610 exponent = 0; 1611 category = fcNormal; 1612 1613 significand = significandParts(); 1614 partsCount = partCount(); 1615 bitPos = partsCount * integerPartWidth; 1616 1617 /* Skip leading zeroes and any(hexa)decimal point. */ 1618 p = skipLeadingZeroesAndAnyDot(p, &dot); 1619 firstSignificantDigit = p; 1620 1621 for(;;) { 1622 integerPart hex_value; 1623 1624 if(*p == '.') { 1625 assert(dot == 0); 1626 dot = p++; 1627 } 1628 1629 hex_value = hexDigitValue(*p); 1630 if(hex_value == -1U) { 1631 lost_fraction = lfExactlyZero; 1632 break; 1633 } 1634 1635 p++; 1636 1637 /* Store the number whilst 4-bit nibbles remain. */ 1638 if(bitPos) { 1639 bitPos -= 4; 1640 hex_value <<= bitPos % integerPartWidth; 1641 significand[bitPos / integerPartWidth] |= hex_value; 1642 } else { 1643 lost_fraction = trailingHexadecimalFraction(p, hex_value); 1644 while(hexDigitValue(*p) != -1U) 1645 p++; 1646 break; 1647 } 1648 } 1649 1650 /* Hex floats require an exponent but not a hexadecimal point. */ 1651 assert(*p == 'p' || *p == 'P'); 1652 1653 /* Ignore the exponent if we are zero. */ 1654 if(p != firstSignificantDigit) { 1655 int expAdjustment; 1656 1657 /* Implicit hexadecimal point? */ 1658 if(!dot) 1659 dot = p; 1660 1661 /* Calculate the exponent adjustment implicit in the number of 1662 significant digits. */ 1663 expAdjustment = dot - firstSignificantDigit; 1664 if(expAdjustment < 0) 1665 expAdjustment++; 1666 expAdjustment = expAdjustment * 4 - 1; 1667 1668 /* Adjust for writing the significand starting at the most 1669 significant nibble. */ 1670 expAdjustment += semantics->precision; 1671 expAdjustment -= partsCount * integerPartWidth; 1672 1673 /* Adjust for the given exponent. */ 1674 exponent = totalExponent(p, expAdjustment); 1675 } 1676 1677 return normalize(rounding_mode, lost_fraction); 1678} 1679 1680APFloat::opStatus 1681APFloat::convertFromString(const char *p, roundingMode rounding_mode) 1682{ 1683 /* Handle a leading minus sign. */ 1684 if(*p == '-') 1685 sign = 1, p++; 1686 else 1687 sign = 0; 1688 1689 if(p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) 1690 return convertFromHexadecimalString(p + 2, rounding_mode); 1691 1692 assert(0 && "Decimal to binary conversions not yet implemented"); 1693 abort(); 1694} 1695 1696/* Write out a hexadecimal representation of the floating point value 1697 to DST, which must be of sufficient size, in the C99 form 1698 [-]0xh.hhhhp[+-]d. Return the number of characters written, 1699 excluding the terminating NUL. 1700 1701 If UPPERCASE, the output is in upper case, otherwise in lower case. 1702 1703 HEXDIGITS digits appear altogether, rounding the value if 1704 necessary. If HEXDIGITS is 0, the minimal precision to display the 1705 number precisely is used instead. If nothing would appear after 1706 the decimal point it is suppressed. 1707 1708 The decimal exponent is always printed and has at least one digit. 1709 Zero values display an exponent of zero. Infinities and NaNs 1710 appear as "infinity" or "nan" respectively. 1711 1712 The above rules are as specified by C99. There is ambiguity about 1713 what the leading hexadecimal digit should be. This implementation 1714 uses whatever is necessary so that the exponent is displayed as 1715 stored. This implies the exponent will fall within the IEEE format 1716 range, and the leading hexadecimal digit will be 0 (for denormals), 1717 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with 1718 any other digits zero). 1719*/ 1720unsigned int 1721APFloat::convertToHexString(char *dst, unsigned int hexDigits, 1722 bool upperCase, roundingMode rounding_mode) const 1723{ 1724 char *p; 1725 1726 p = dst; 1727 if (sign) 1728 *dst++ = '-'; 1729 1730 switch (category) { 1731 case fcInfinity: 1732 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); 1733 dst += sizeof infinityL - 1; 1734 break; 1735 1736 case fcNaN: 1737 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); 1738 dst += sizeof NaNU - 1; 1739 break; 1740 1741 case fcZero: 1742 *dst++ = '0'; 1743 *dst++ = upperCase ? 'X': 'x'; 1744 *dst++ = '0'; 1745 if (hexDigits > 1) { 1746 *dst++ = '.'; 1747 memset (dst, '0', hexDigits - 1); 1748 dst += hexDigits - 1; 1749 } 1750 *dst++ = upperCase ? 'P': 'p'; 1751 *dst++ = '0'; 1752 break; 1753 1754 case fcNormal: 1755 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); 1756 break; 1757 } 1758 1759 *dst = 0; 1760 1761 return dst - p; 1762} 1763 1764/* Does the hard work of outputting the correctly rounded hexadecimal 1765 form of a normal floating point number with the specified number of 1766 hexadecimal digits. If HEXDIGITS is zero the minimum number of 1767 digits necessary to print the value precisely is output. */ 1768char * 1769APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, 1770 bool upperCase, 1771 roundingMode rounding_mode) const 1772{ 1773 unsigned int count, valueBits, shift, partsCount, outputDigits; 1774 const char *hexDigitChars; 1775 const integerPart *significand; 1776 char *p; 1777 bool roundUp; 1778 1779 *dst++ = '0'; 1780 *dst++ = upperCase ? 'X': 'x'; 1781 1782 roundUp = false; 1783 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower; 1784 1785 significand = significandParts(); 1786 partsCount = partCount(); 1787 1788 /* +3 because the first digit only uses the single integer bit, so 1789 we have 3 virtual zero most-significant-bits. */ 1790 valueBits = semantics->precision + 3; 1791 shift = integerPartWidth - valueBits % integerPartWidth; 1792 1793 /* The natural number of digits required ignoring trailing 1794 insignificant zeroes. */ 1795 outputDigits = (valueBits - significandLSB () + 3) / 4; 1796 1797 /* hexDigits of zero means use the required number for the 1798 precision. Otherwise, see if we are truncating. If we are, 1799 find out if we need to round away from zero. */ 1800 if (hexDigits) { 1801 if (hexDigits < outputDigits) { 1802 /* We are dropping non-zero bits, so need to check how to round. 1803 "bits" is the number of dropped bits. */ 1804 unsigned int bits; 1805 lostFraction fraction; 1806 1807 bits = valueBits - hexDigits * 4; 1808 fraction = lostFractionThroughTruncation (significand, partsCount, bits); 1809 roundUp = roundAwayFromZero(rounding_mode, fraction, bits); 1810 } 1811 outputDigits = hexDigits; 1812 } 1813 1814 /* Write the digits consecutively, and start writing in the location 1815 of the hexadecimal point. We move the most significant digit 1816 left and add the hexadecimal point later. */ 1817 p = ++dst; 1818 1819 count = (valueBits + integerPartWidth - 1) / integerPartWidth; 1820 1821 while (outputDigits && count) { 1822 integerPart part; 1823 1824 /* Put the most significant integerPartWidth bits in "part". */ 1825 if (--count == partsCount) 1826 part = 0; /* An imaginary higher zero part. */ 1827 else 1828 part = significand[count] << shift; 1829 1830 if (count && shift) 1831 part |= significand[count - 1] >> (integerPartWidth - shift); 1832 1833 /* Convert as much of "part" to hexdigits as we can. */ 1834 unsigned int curDigits = integerPartWidth / 4; 1835 1836 if (curDigits > outputDigits) 1837 curDigits = outputDigits; 1838 dst += partAsHex (dst, part, curDigits, hexDigitChars); 1839 outputDigits -= curDigits; 1840 } 1841 1842 if (roundUp) { 1843 char *q = dst; 1844 1845 /* Note that hexDigitChars has a trailing '0'. */ 1846 do { 1847 q--; 1848 *q = hexDigitChars[hexDigitValue (*q) + 1]; 1849 } while (*q == '0'); 1850 assert (q >= p); 1851 } else { 1852 /* Add trailing zeroes. */ 1853 memset (dst, '0', outputDigits); 1854 dst += outputDigits; 1855 } 1856 1857 /* Move the most significant digit to before the point, and if there 1858 is something after the decimal point add it. This must come 1859 after rounding above. */ 1860 p[-1] = p[0]; 1861 if (dst -1 == p) 1862 dst--; 1863 else 1864 p[0] = '.'; 1865 1866 /* Finally output the exponent. */ 1867 *dst++ = upperCase ? 'P': 'p'; 1868 1869 return writeSignedDecimal (dst, exponent); 1870} 1871 1872// For good performance it is desirable for different APFloats 1873// to produce different integers. 1874uint32_t 1875APFloat::getHashValue() const 1876{ 1877 if (category==fcZero) return sign<<8 | semantics->precision ; 1878 else if (category==fcInfinity) return sign<<9 | semantics->precision; 1879 else if (category==fcNaN) return 1<<10 | semantics->precision; 1880 else { 1881 uint32_t hash = sign<<11 | semantics->precision | exponent<<12; 1882 const integerPart* p = significandParts(); 1883 for (int i=partCount(); i>0; i--, p++) 1884 hash ^= ((uint32_t)*p) ^ (*p)>>32; 1885 return hash; 1886 } 1887} 1888 1889// Conversion from APFloat to/from host float/double. It may eventually be 1890// possible to eliminate these and have everybody deal with APFloats, but that 1891// will take a while. This approach will not easily extend to long double. 1892// Current implementation requires integerPartWidth==64, which is correct at 1893// the moment but could be made more general. 1894 1895// Denormals have exponent minExponent in APFloat, but minExponent-1 in 1896// the actual IEEE respresentations. We compensate for that here. 1897 1898APInt 1899APFloat::convertF80LongDoubleAPFloatToAPInt() const 1900{ 1901 assert(semantics == (const llvm::fltSemantics* const)&x87DoubleExtended); 1902 assert (partCount()==2); 1903 1904 uint64_t myexponent, mysignificand; 1905 1906 if (category==fcNormal) { 1907 myexponent = exponent+16383; //bias 1908 mysignificand = significandParts()[0]; 1909 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) 1910 myexponent = 0; // denormal 1911 } else if (category==fcZero) { 1912 myexponent = 0; 1913 mysignificand = 0; 1914 } else if (category==fcInfinity) { 1915 myexponent = 0x7fff; 1916 mysignificand = 0x8000000000000000ULL; 1917 } else { 1918 assert(category == fcNaN && "Unknown category"); 1919 myexponent = 0x7fff; 1920 mysignificand = significandParts()[0]; 1921 } 1922 1923 uint64_t words[2]; 1924 words[0] = (((uint64_t)sign & 1) << 63) | 1925 ((myexponent & 0x7fff) << 48) | 1926 ((mysignificand >>16) & 0xffffffffffffLL); 1927 words[1] = mysignificand & 0xffff; 1928 return APInt(80, 2, words); 1929} 1930 1931APInt 1932APFloat::convertDoubleAPFloatToAPInt() const 1933{ 1934 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble); 1935 assert (partCount()==1); 1936 1937 uint64_t myexponent, mysignificand; 1938 1939 if (category==fcNormal) { 1940 myexponent = exponent+1023; //bias 1941 mysignificand = *significandParts(); 1942 if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) 1943 myexponent = 0; // denormal 1944 } else if (category==fcZero) { 1945 myexponent = 0; 1946 mysignificand = 0; 1947 } else if (category==fcInfinity) { 1948 myexponent = 0x7ff; 1949 mysignificand = 0; 1950 } else { 1951 assert(category == fcNaN && "Unknown category!"); 1952 myexponent = 0x7ff; 1953 mysignificand = *significandParts(); 1954 } 1955 1956 return APInt(64, (((((uint64_t)sign & 1) << 63) | 1957 ((myexponent & 0x7ff) << 52) | 1958 (mysignificand & 0xfffffffffffffLL)))); 1959} 1960 1961APInt 1962APFloat::convertFloatAPFloatToAPInt() const 1963{ 1964 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle); 1965 assert (partCount()==1); 1966 1967 uint32_t myexponent, mysignificand; 1968 1969 if (category==fcNormal) { 1970 myexponent = exponent+127; //bias 1971 mysignificand = *significandParts(); 1972 if (myexponent == 1 && !(mysignificand & 0x400000)) 1973 myexponent = 0; // denormal 1974 } else if (category==fcZero) { 1975 myexponent = 0; 1976 mysignificand = 0; 1977 } else if (category==fcInfinity) { 1978 myexponent = 0xff; 1979 mysignificand = 0; 1980 } else { 1981 assert(category == fcNaN && "Unknown category!"); 1982 myexponent = 0xff; 1983 mysignificand = *significandParts(); 1984 } 1985 1986 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) | 1987 (mysignificand & 0x7fffff))); 1988} 1989 1990APInt 1991APFloat::convertToAPInt() const 1992{ 1993 if (semantics == (const llvm::fltSemantics* const)&IEEEsingle) 1994 return convertFloatAPFloatToAPInt(); 1995 1996 if (semantics == (const llvm::fltSemantics* const)&IEEEdouble) 1997 return convertDoubleAPFloatToAPInt(); 1998 1999 assert(semantics == (const llvm::fltSemantics* const)&x87DoubleExtended && 2000 "unknown format!"); 2001 return convertF80LongDoubleAPFloatToAPInt(); 2002} 2003 2004float 2005APFloat::convertToFloat() const 2006{ 2007 assert(semantics == (const llvm::fltSemantics* const)&IEEEsingle); 2008 APInt api = convertToAPInt(); 2009 return api.bitsToFloat(); 2010} 2011 2012double 2013APFloat::convertToDouble() const 2014{ 2015 assert(semantics == (const llvm::fltSemantics* const)&IEEEdouble); 2016 APInt api = convertToAPInt(); 2017 return api.bitsToDouble(); 2018} 2019 2020/// Integer bit is explicit in this format. Current Intel book does not 2021/// define meaning of: 2022/// exponent = all 1's, integer bit not set. 2023/// exponent = 0, integer bit set. (formerly "psuedodenormals") 2024/// exponent!=0 nor all 1's, integer bit not set. (formerly "unnormals") 2025void 2026APFloat::initFromF80LongDoubleAPInt(const APInt &api) 2027{ 2028 assert(api.getBitWidth()==80); 2029 uint64_t i1 = api.getRawData()[0]; 2030 uint64_t i2 = api.getRawData()[1]; 2031 uint64_t myexponent = (i1 >> 48) & 0x7fff; 2032 uint64_t mysignificand = ((i1 << 16) & 0xffffffffffff0000ULL) | 2033 (i2 & 0xffff); 2034 2035 initialize(&APFloat::x87DoubleExtended); 2036 assert(partCount()==2); 2037 2038 sign = i1>>63; 2039 if (myexponent==0 && mysignificand==0) { 2040 // exponent, significand meaningless 2041 category = fcZero; 2042 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { 2043 // exponent, significand meaningless 2044 category = fcInfinity; 2045 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) { 2046 // exponent meaningless 2047 category = fcNaN; 2048 significandParts()[0] = mysignificand; 2049 significandParts()[1] = 0; 2050 } else { 2051 category = fcNormal; 2052 exponent = myexponent - 16383; 2053 significandParts()[0] = mysignificand; 2054 significandParts()[1] = 0; 2055 if (myexponent==0) // denormal 2056 exponent = -16382; 2057 } 2058} 2059 2060void 2061APFloat::initFromDoubleAPInt(const APInt &api) 2062{ 2063 assert(api.getBitWidth()==64); 2064 uint64_t i = *api.getRawData(); 2065 uint64_t myexponent = (i >> 52) & 0x7ff; 2066 uint64_t mysignificand = i & 0xfffffffffffffLL; 2067 2068 initialize(&APFloat::IEEEdouble); 2069 assert(partCount()==1); 2070 2071 sign = i>>63; 2072 if (myexponent==0 && mysignificand==0) { 2073 // exponent, significand meaningless 2074 category = fcZero; 2075 } else if (myexponent==0x7ff && mysignificand==0) { 2076 // exponent, significand meaningless 2077 category = fcInfinity; 2078 } else if (myexponent==0x7ff && mysignificand!=0) { 2079 // exponent meaningless 2080 category = fcNaN; 2081 *significandParts() = mysignificand; 2082 } else { 2083 category = fcNormal; 2084 exponent = myexponent - 1023; 2085 *significandParts() = mysignificand; 2086 if (myexponent==0) // denormal 2087 exponent = -1022; 2088 else 2089 *significandParts() |= 0x10000000000000LL; // integer bit 2090 } 2091} 2092 2093void 2094APFloat::initFromFloatAPInt(const APInt & api) 2095{ 2096 assert(api.getBitWidth()==32); 2097 uint32_t i = (uint32_t)*api.getRawData(); 2098 uint32_t myexponent = (i >> 23) & 0xff; 2099 uint32_t mysignificand = i & 0x7fffff; 2100 2101 initialize(&APFloat::IEEEsingle); 2102 assert(partCount()==1); 2103 2104 sign = i >> 31; 2105 if (myexponent==0 && mysignificand==0) { 2106 // exponent, significand meaningless 2107 category = fcZero; 2108 } else if (myexponent==0xff && mysignificand==0) { 2109 // exponent, significand meaningless 2110 category = fcInfinity; 2111 } else if (myexponent==0xff && mysignificand!=0) { 2112 // sign, exponent, significand meaningless 2113 category = fcNaN; 2114 *significandParts() = mysignificand; 2115 } else { 2116 category = fcNormal; 2117 exponent = myexponent - 127; //bias 2118 *significandParts() = mysignificand; 2119 if (myexponent==0) // denormal 2120 exponent = -126; 2121 else 2122 *significandParts() |= 0x800000; // integer bit 2123 } 2124} 2125 2126/// Treat api as containing the bits of a floating point number. Currently 2127/// we infer the floating point type from the size of the APInt. FIXME: This 2128/// breaks when we get to PPC128 and IEEE128 (but both cannot exist in the 2129/// same compile...) 2130void 2131APFloat::initFromAPInt(const APInt& api) 2132{ 2133 if (api.getBitWidth() == 32) 2134 return initFromFloatAPInt(api); 2135 else if (api.getBitWidth()==64) 2136 return initFromDoubleAPInt(api); 2137 else if (api.getBitWidth()==80) 2138 return initFromF80LongDoubleAPInt(api); 2139 else 2140 assert(0); 2141} 2142 2143APFloat::APFloat(const APInt& api) 2144{ 2145 initFromAPInt(api); 2146} 2147 2148APFloat::APFloat(float f) 2149{ 2150 APInt api = APInt(32, 0); 2151 initFromAPInt(api.floatToBits(f)); 2152} 2153 2154APFloat::APFloat(double d) 2155{ 2156 APInt api = APInt(64, 0); 2157 initFromAPInt(api.doubleToBits(d)); 2158} 2159