APFloat.h revision 418d360518811121ad9352af57fdd7ba58a4f917
1//== llvm/Support/APFloat.h - Arbitrary Precision Floating Point -*- C++ -*-==//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file was developed by Neil Booth and is distributed under the
6// University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file declares a class to represent arbitrary precision floating
11// point values and provide a variety of arithmetic operations on them.
12//
13//===----------------------------------------------------------------------===//
14
15/*  A self-contained host- and target-independent arbitrary-precision
16    floating-point software implementation using bignum integer
17    arithmetic, as provided by static functions in the APInt class.
18    The library will work with bignum integers whose parts are any
19    unsigned type at least 16 bits wide.  64 bits is recommended.
20
21    Written for clarity rather than speed, in particular with a view
22    to use in the front-end of a cross compiler so that target
23    arithmetic can be correctly performed on the host.  Performance
24    should nonetheless be reasonable, particularly for its intended
25    use.  It may be useful as a base implementation for a run-time
26    library during development of a faster target-specific one.
27
28    All 5 rounding modes in the IEEE-754R draft are handled correctly
29    for all implemented operations.  Currently implemented operations
30    are add, subtract, multiply, divide, fused-multiply-add,
31    conversion-to-float, conversion-to-integer and
32    conversion-from-integer.  New rounding modes (e.g. away from zero)
33    can be added with three or four lines of code.  The library reads
34    and correctly rounds hexadecimal floating point numbers as per
35    C99; syntax is required to have been validated by the caller.
36    Conversion from decimal is not currently implemented.
37
38    Four formats are built-in: IEEE single precision, double
39    precision, quadruple precision, and x87 80-bit extended double
40    (when operating with full extended precision).  Adding a new
41    format that obeys IEEE semantics only requires adding two lines of
42    code: a declaration and definition of the format.
43
44    All operations return the status of that operation as an exception
45    bit-mask, so multiple operations can be done consecutively with
46    their results or-ed together.  The returned status can be useful
47    for compiler diagnostics; e.g., inexact, underflow and overflow
48    can be easily diagnosed on constant folding, and compiler
49    optimizers can determine what exceptions would be raised by
50    folding operations and optimize, or perhaps not optimize,
51    accordingly.
52
53    At present, underflow tininess is detected after rounding; it
54    should be straight forward to add support for the before-rounding
55    case too.
56
57    Non-zero finite numbers are represented internally as a sign bit,
58    a 16-bit signed exponent, and the significand as an array of
59    integer parts.  After normalization of a number of precision P the
60    exponent is within the range of the format, and if the number is
61    not denormal the P-th bit of the significand is set as an explicit
62    integer bit.  For denormals the most significant bit is shifted
63    right so that the exponent is maintained at the format's minimum,
64    so that the smallest denormal has just the least significant bit
65    of the significand set.  The sign of zeroes and infinities is
66    significant; the exponent and significand of such numbers is
67    not stored, but has a known implicit (deterministic) value:
68    0 for the significands, 0 for zero exponent, all 1 bits for
69    infinity exponent.  For NaNs the sign and significand are
70    deterministic, although not really meaningful; the exponent is
71    implicitly all 1 bits.
72
73    TODO
74    ====
75
76    Some features that may or may not be worth adding:
77
78    Conversions to and from decimal strings (hard).
79
80    Conversions to hexadecimal string.
81
82    Read and write IEEE-format in-memory representations.
83
84    Optional ability to detect underflow tininess before rounding.
85
86    New formats: x87 in single and double precision mode (IEEE apart
87    from extended exponent range) and IBM two-double extended
88    precision (hard).
89
90    New operations: sqrt, copysign, nextafter, nexttoward.
91*/
92
93#ifndef LLVM_FLOAT_H
94#define LLVM_FLOAT_H
95
96// APInt contains static functions implementing bignum arithmetic.
97#include "llvm/ADT/APInt.h"
98#include "llvm/CodeGen/ValueTypes.h"
99
100namespace llvm {
101
102  /* Exponents are stored as signed numbers.  */
103  typedef signed short exponent_t;
104
105  struct fltSemantics;
106
107  /* When bits of a floating point number are truncated, this enum is
108     used to indicate what fraction of the LSB those bits represented.
109     It essentially combines the roles of guard and sticky bits.  */
110  enum lostFraction {		// Example of truncated bits:
111    lfExactlyZero,		// 000000
112    lfLessThanHalf,		// 0xxxxx  x's not all zero
113    lfExactlyHalf,		// 100000
114    lfMoreThanHalf		// 1xxxxx  x's not all zero
115  };
116
117  class APFloat {
118  public:
119
120    /* We support the following floating point semantics.  */
121    static const fltSemantics IEEEsingle;
122    static const fltSemantics IEEEdouble;
123    static const fltSemantics IEEEquad;
124    static const fltSemantics x87DoubleExtended;
125    /* And this psuedo, used to construct APFloats that cannot
126       conflict with anything real. */
127    static const fltSemantics Bogus;
128
129    static unsigned int semanticsPrecision(const fltSemantics &);
130
131    /* Floating point numbers have a four-state comparison relation.  */
132    enum cmpResult {
133      cmpLessThan,
134      cmpEqual,
135      cmpGreaterThan,
136      cmpUnordered
137    };
138
139    /* IEEE-754R gives five rounding modes.  */
140    enum roundingMode {
141      rmNearestTiesToEven,
142      rmTowardPositive,
143      rmTowardNegative,
144      rmTowardZero,
145      rmNearestTiesToAway
146    };
147
148    /* Operation status.  opUnderflow or opOverflow are always returned
149       or-ed with opInexact.  */
150    enum opStatus {
151      opOK          = 0x00,
152      opInvalidOp   = 0x01,
153      opDivByZero   = 0x02,
154      opOverflow    = 0x04,
155      opUnderflow   = 0x08,
156      opInexact     = 0x10
157    };
158
159    /* Category of internally-represented number.  */
160    enum fltCategory {
161      fcInfinity,
162      fcNaN,
163      fcNormal,
164      fcZero
165    };
166
167    /* Constructors.  */
168    APFloat(const fltSemantics &, const char *);
169    APFloat(const fltSemantics &, integerPart);
170    APFloat(const fltSemantics &, fltCategory, bool negative);
171    APFloat(double d);
172    APFloat(float f);
173    APFloat(const APFloat &);
174    ~APFloat();
175
176    /* Arithmetic.  */
177    opStatus add(const APFloat &, roundingMode);
178    opStatus subtract(const APFloat &, roundingMode);
179    opStatus multiply(const APFloat &, roundingMode);
180    opStatus divide(const APFloat &, roundingMode);
181    opStatus mod(const APFloat &, roundingMode);
182    void copySign(const APFloat &);
183    opStatus fusedMultiplyAdd(const APFloat &, const APFloat &, roundingMode);
184    void changeSign();    // neg
185    void clearSign();     // abs
186
187    /* Conversions.  */
188    opStatus convert(const fltSemantics &, roundingMode);
189    opStatus convertToInteger(integerPart *, unsigned int, bool,
190			      roundingMode) const;
191    opStatus convertFromInteger(const integerPart *, unsigned int, bool,
192				roundingMode);
193    opStatus convertFromString(const char *, roundingMode);
194    double convertToDouble() const;
195    float convertToFloat() const;
196
197    /* The definition of equality is not straightforward for floating point,
198       so we won't use operator==.  Use one of the following, or write
199       whatever it is you really mean. */
200    // bool operator==(const APFloat &) const;     // DO NOT IMPLEMENT
201
202    /* IEEE comparison with another floating point number (NaNs
203       compare unordered, 0==-0). */
204    cmpResult compare(const APFloat &) const;
205
206    /* Bitwise comparison for equality (QNaNs compare equal, 0!=-0). */
207    bool bitwiseIsEqual(const APFloat &) const;
208
209    /* Simple queries.  */
210    fltCategory getCategory() const { return category; }
211    const fltSemantics &getSemantics() const { return *semantics; }
212    bool isZero() const { return category == fcZero; }
213    bool isNonZero() const { return category != fcZero; }
214    bool isNegative() const { return sign; }
215    bool isPosZero() const { return isZero() && !isNegative(); }
216    bool isNegZero() const { return isZero() && isNegative(); }
217
218    APFloat& operator=(const APFloat &);
219
220    /* Return an arbitrary integer value usable for hashing. */
221    uint32_t getHashValue() const;
222
223  private:
224
225    /* Trivial queries.  */
226    integerPart *significandParts();
227    const integerPart *significandParts() const;
228    unsigned int partCount() const;
229
230    /* Significand operations.  */
231    integerPart addSignificand(const APFloat &);
232    integerPart subtractSignificand(const APFloat &, integerPart);
233    lostFraction addOrSubtractSignificand(const APFloat &, bool subtract);
234    lostFraction multiplySignificand(const APFloat &, const APFloat *);
235    lostFraction divideSignificand(const APFloat &);
236    void incrementSignificand();
237    void initialize(const fltSemantics *);
238    void shiftSignificandLeft(unsigned int);
239    lostFraction shiftSignificandRight(unsigned int);
240    unsigned int significandLSB() const;
241    unsigned int significandMSB() const;
242    void zeroSignificand();
243
244    /* Arithmetic on special values.  */
245    opStatus addOrSubtractSpecials(const APFloat &, bool subtract);
246    opStatus divideSpecials(const APFloat &);
247    opStatus multiplySpecials(const APFloat &);
248
249    /* Miscellany.  */
250    opStatus normalize(roundingMode, lostFraction);
251    opStatus addOrSubtract(const APFloat &, roundingMode, bool subtract);
252    cmpResult compareAbsoluteValue(const APFloat &) const;
253    opStatus handleOverflow(roundingMode);
254    bool roundAwayFromZero(roundingMode, lostFraction);
255    opStatus convertFromUnsignedInteger(integerPart *, unsigned int,
256					roundingMode);
257    lostFraction combineLostFractions(lostFraction, lostFraction);
258    opStatus convertFromHexadecimalString(const char *, roundingMode);
259
260    void assign(const APFloat &);
261    void copySignificand(const APFloat &);
262    void freeSignificand();
263
264    /* What kind of semantics does this value obey?  */
265    const fltSemantics *semantics;
266
267    /* Significand - the fraction with an explicit integer bit.  Must be
268       at least one bit wider than the target precision.  */
269    union Significand
270    {
271      integerPart part;
272      integerPart *parts;
273    } significand;
274
275    /* The exponent - a signed number.  */
276    exponent_t exponent;
277
278    /* What kind of floating point number this is.  */
279    /* Only 2 bits are required, but VisualStudio incorrectly sign extends
280       it.  Using the extra bit keeps it from failing under VisualStudio */
281    fltCategory category: 3;
282
283    /* The sign bit of this number.  */
284    unsigned int sign: 1;
285  };
286} /* namespace llvm */
287
288#endif /* LLVM_FLOAT_H */
289