APFloat.h revision 418d360518811121ad9352af57fdd7ba58a4f917
1//== llvm/Support/APFloat.h - Arbitrary Precision Floating Point -*- C++ -*-==// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Neil Booth and is distributed under the 6// University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file declares a class to represent arbitrary precision floating 11// point values and provide a variety of arithmetic operations on them. 12// 13//===----------------------------------------------------------------------===// 14 15/* A self-contained host- and target-independent arbitrary-precision 16 floating-point software implementation using bignum integer 17 arithmetic, as provided by static functions in the APInt class. 18 The library will work with bignum integers whose parts are any 19 unsigned type at least 16 bits wide. 64 bits is recommended. 20 21 Written for clarity rather than speed, in particular with a view 22 to use in the front-end of a cross compiler so that target 23 arithmetic can be correctly performed on the host. Performance 24 should nonetheless be reasonable, particularly for its intended 25 use. It may be useful as a base implementation for a run-time 26 library during development of a faster target-specific one. 27 28 All 5 rounding modes in the IEEE-754R draft are handled correctly 29 for all implemented operations. Currently implemented operations 30 are add, subtract, multiply, divide, fused-multiply-add, 31 conversion-to-float, conversion-to-integer and 32 conversion-from-integer. New rounding modes (e.g. away from zero) 33 can be added with three or four lines of code. The library reads 34 and correctly rounds hexadecimal floating point numbers as per 35 C99; syntax is required to have been validated by the caller. 36 Conversion from decimal is not currently implemented. 37 38 Four formats are built-in: IEEE single precision, double 39 precision, quadruple precision, and x87 80-bit extended double 40 (when operating with full extended precision). Adding a new 41 format that obeys IEEE semantics only requires adding two lines of 42 code: a declaration and definition of the format. 43 44 All operations return the status of that operation as an exception 45 bit-mask, so multiple operations can be done consecutively with 46 their results or-ed together. The returned status can be useful 47 for compiler diagnostics; e.g., inexact, underflow and overflow 48 can be easily diagnosed on constant folding, and compiler 49 optimizers can determine what exceptions would be raised by 50 folding operations and optimize, or perhaps not optimize, 51 accordingly. 52 53 At present, underflow tininess is detected after rounding; it 54 should be straight forward to add support for the before-rounding 55 case too. 56 57 Non-zero finite numbers are represented internally as a sign bit, 58 a 16-bit signed exponent, and the significand as an array of 59 integer parts. After normalization of a number of precision P the 60 exponent is within the range of the format, and if the number is 61 not denormal the P-th bit of the significand is set as an explicit 62 integer bit. For denormals the most significant bit is shifted 63 right so that the exponent is maintained at the format's minimum, 64 so that the smallest denormal has just the least significant bit 65 of the significand set. The sign of zeroes and infinities is 66 significant; the exponent and significand of such numbers is 67 not stored, but has a known implicit (deterministic) value: 68 0 for the significands, 0 for zero exponent, all 1 bits for 69 infinity exponent. For NaNs the sign and significand are 70 deterministic, although not really meaningful; the exponent is 71 implicitly all 1 bits. 72 73 TODO 74 ==== 75 76 Some features that may or may not be worth adding: 77 78 Conversions to and from decimal strings (hard). 79 80 Conversions to hexadecimal string. 81 82 Read and write IEEE-format in-memory representations. 83 84 Optional ability to detect underflow tininess before rounding. 85 86 New formats: x87 in single and double precision mode (IEEE apart 87 from extended exponent range) and IBM two-double extended 88 precision (hard). 89 90 New operations: sqrt, copysign, nextafter, nexttoward. 91*/ 92 93#ifndef LLVM_FLOAT_H 94#define LLVM_FLOAT_H 95 96// APInt contains static functions implementing bignum arithmetic. 97#include "llvm/ADT/APInt.h" 98#include "llvm/CodeGen/ValueTypes.h" 99 100namespace llvm { 101 102 /* Exponents are stored as signed numbers. */ 103 typedef signed short exponent_t; 104 105 struct fltSemantics; 106 107 /* When bits of a floating point number are truncated, this enum is 108 used to indicate what fraction of the LSB those bits represented. 109 It essentially combines the roles of guard and sticky bits. */ 110 enum lostFraction { // Example of truncated bits: 111 lfExactlyZero, // 000000 112 lfLessThanHalf, // 0xxxxx x's not all zero 113 lfExactlyHalf, // 100000 114 lfMoreThanHalf // 1xxxxx x's not all zero 115 }; 116 117 class APFloat { 118 public: 119 120 /* We support the following floating point semantics. */ 121 static const fltSemantics IEEEsingle; 122 static const fltSemantics IEEEdouble; 123 static const fltSemantics IEEEquad; 124 static const fltSemantics x87DoubleExtended; 125 /* And this psuedo, used to construct APFloats that cannot 126 conflict with anything real. */ 127 static const fltSemantics Bogus; 128 129 static unsigned int semanticsPrecision(const fltSemantics &); 130 131 /* Floating point numbers have a four-state comparison relation. */ 132 enum cmpResult { 133 cmpLessThan, 134 cmpEqual, 135 cmpGreaterThan, 136 cmpUnordered 137 }; 138 139 /* IEEE-754R gives five rounding modes. */ 140 enum roundingMode { 141 rmNearestTiesToEven, 142 rmTowardPositive, 143 rmTowardNegative, 144 rmTowardZero, 145 rmNearestTiesToAway 146 }; 147 148 /* Operation status. opUnderflow or opOverflow are always returned 149 or-ed with opInexact. */ 150 enum opStatus { 151 opOK = 0x00, 152 opInvalidOp = 0x01, 153 opDivByZero = 0x02, 154 opOverflow = 0x04, 155 opUnderflow = 0x08, 156 opInexact = 0x10 157 }; 158 159 /* Category of internally-represented number. */ 160 enum fltCategory { 161 fcInfinity, 162 fcNaN, 163 fcNormal, 164 fcZero 165 }; 166 167 /* Constructors. */ 168 APFloat(const fltSemantics &, const char *); 169 APFloat(const fltSemantics &, integerPart); 170 APFloat(const fltSemantics &, fltCategory, bool negative); 171 APFloat(double d); 172 APFloat(float f); 173 APFloat(const APFloat &); 174 ~APFloat(); 175 176 /* Arithmetic. */ 177 opStatus add(const APFloat &, roundingMode); 178 opStatus subtract(const APFloat &, roundingMode); 179 opStatus multiply(const APFloat &, roundingMode); 180 opStatus divide(const APFloat &, roundingMode); 181 opStatus mod(const APFloat &, roundingMode); 182 void copySign(const APFloat &); 183 opStatus fusedMultiplyAdd(const APFloat &, const APFloat &, roundingMode); 184 void changeSign(); // neg 185 void clearSign(); // abs 186 187 /* Conversions. */ 188 opStatus convert(const fltSemantics &, roundingMode); 189 opStatus convertToInteger(integerPart *, unsigned int, bool, 190 roundingMode) const; 191 opStatus convertFromInteger(const integerPart *, unsigned int, bool, 192 roundingMode); 193 opStatus convertFromString(const char *, roundingMode); 194 double convertToDouble() const; 195 float convertToFloat() const; 196 197 /* The definition of equality is not straightforward for floating point, 198 so we won't use operator==. Use one of the following, or write 199 whatever it is you really mean. */ 200 // bool operator==(const APFloat &) const; // DO NOT IMPLEMENT 201 202 /* IEEE comparison with another floating point number (NaNs 203 compare unordered, 0==-0). */ 204 cmpResult compare(const APFloat &) const; 205 206 /* Bitwise comparison for equality (QNaNs compare equal, 0!=-0). */ 207 bool bitwiseIsEqual(const APFloat &) const; 208 209 /* Simple queries. */ 210 fltCategory getCategory() const { return category; } 211 const fltSemantics &getSemantics() const { return *semantics; } 212 bool isZero() const { return category == fcZero; } 213 bool isNonZero() const { return category != fcZero; } 214 bool isNegative() const { return sign; } 215 bool isPosZero() const { return isZero() && !isNegative(); } 216 bool isNegZero() const { return isZero() && isNegative(); } 217 218 APFloat& operator=(const APFloat &); 219 220 /* Return an arbitrary integer value usable for hashing. */ 221 uint32_t getHashValue() const; 222 223 private: 224 225 /* Trivial queries. */ 226 integerPart *significandParts(); 227 const integerPart *significandParts() const; 228 unsigned int partCount() const; 229 230 /* Significand operations. */ 231 integerPart addSignificand(const APFloat &); 232 integerPart subtractSignificand(const APFloat &, integerPart); 233 lostFraction addOrSubtractSignificand(const APFloat &, bool subtract); 234 lostFraction multiplySignificand(const APFloat &, const APFloat *); 235 lostFraction divideSignificand(const APFloat &); 236 void incrementSignificand(); 237 void initialize(const fltSemantics *); 238 void shiftSignificandLeft(unsigned int); 239 lostFraction shiftSignificandRight(unsigned int); 240 unsigned int significandLSB() const; 241 unsigned int significandMSB() const; 242 void zeroSignificand(); 243 244 /* Arithmetic on special values. */ 245 opStatus addOrSubtractSpecials(const APFloat &, bool subtract); 246 opStatus divideSpecials(const APFloat &); 247 opStatus multiplySpecials(const APFloat &); 248 249 /* Miscellany. */ 250 opStatus normalize(roundingMode, lostFraction); 251 opStatus addOrSubtract(const APFloat &, roundingMode, bool subtract); 252 cmpResult compareAbsoluteValue(const APFloat &) const; 253 opStatus handleOverflow(roundingMode); 254 bool roundAwayFromZero(roundingMode, lostFraction); 255 opStatus convertFromUnsignedInteger(integerPart *, unsigned int, 256 roundingMode); 257 lostFraction combineLostFractions(lostFraction, lostFraction); 258 opStatus convertFromHexadecimalString(const char *, roundingMode); 259 260 void assign(const APFloat &); 261 void copySignificand(const APFloat &); 262 void freeSignificand(); 263 264 /* What kind of semantics does this value obey? */ 265 const fltSemantics *semantics; 266 267 /* Significand - the fraction with an explicit integer bit. Must be 268 at least one bit wider than the target precision. */ 269 union Significand 270 { 271 integerPart part; 272 integerPart *parts; 273 } significand; 274 275 /* The exponent - a signed number. */ 276 exponent_t exponent; 277 278 /* What kind of floating point number this is. */ 279 /* Only 2 bits are required, but VisualStudio incorrectly sign extends 280 it. Using the extra bit keeps it from failing under VisualStudio */ 281 fltCategory category: 3; 282 283 /* The sign bit of this number. */ 284 unsigned int sign: 1; 285 }; 286} /* namespace llvm */ 287 288#endif /* LLVM_FLOAT_H */ 289