1a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes/*- 2a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG> 3a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * All rights reserved. 4a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * 5a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * Redistribution and use in source and binary forms, with or without 6a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * modification, are permitted provided that the following conditions 7a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * are met: 8a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * 1. Redistributions of source code must retain the above copyright 9a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * notice, this list of conditions and the following disclaimer. 10a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * 2. Redistributions in binary form must reproduce the above copyright 11a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * notice, this list of conditions and the following disclaimer in the 12a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * documentation and/or other materials provided with the distribution. 13a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * 14a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * SUCH DAMAGE. 25a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes */ 26a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes 27a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes#include <sys/cdefs.h> 28a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes__FBSDID("$FreeBSD$"); 29a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes 30a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes#include <fenv.h> 31a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes#include <float.h> 32a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes#include <math.h> 33a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes 34a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes#include "math_private.h" 35a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes 36a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes/* 37a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * A struct dd represents a floating-point number with twice the precision 38a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * of a double. We maintain the invariant that "hi" stores the 53 high-order 39a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * bits of the result. 40a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes */ 41a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughesstruct dd { 42a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes double hi; 43a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes double lo; 44a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes}; 45a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes 46a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes/* 47a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * Compute a+b exactly, returning the exact result in a struct dd. We assume 48a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * that both a and b are finite, but make no assumptions about their relative 49a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * magnitudes. 50a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes */ 51a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughesstatic inline struct dd 52a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughesdd_add(double a, double b) 53a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes{ 54a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes struct dd ret; 55a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes double s; 56a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes 57a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes ret.hi = a + b; 58a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes s = ret.hi - a; 59a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes ret.lo = (a - (ret.hi - s)) + (b - s); 60a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes return (ret); 61a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes} 62a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes 63a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes/* 64a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * Compute a+b, with a small tweak: The least significant bit of the 65a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * result is adjusted into a sticky bit summarizing all the bits that 66a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * were lost to rounding. This adjustment negates the effects of double 67a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * rounding when the result is added to another number with a higher 68a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * exponent. For an explanation of round and sticky bits, see any reference 69a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * on FPU design, e.g., 70a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * 71a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * J. Coonen. An Implementation Guide to a Proposed Standard for 72a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980. 73a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes */ 74a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughesstatic inline double 75a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughesadd_adjusted(double a, double b) 76a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes{ 77a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes struct dd sum; 78a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes uint64_t hibits, lobits; 79a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes 80a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes sum = dd_add(a, b); 81a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes if (sum.lo != 0) { 82a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes EXTRACT_WORD64(hibits, sum.hi); 83a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes if ((hibits & 1) == 0) { 84a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */ 85a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes EXTRACT_WORD64(lobits, sum.lo); 86a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes hibits += 1 - ((hibits ^ lobits) >> 62); 87a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes INSERT_WORD64(sum.hi, hibits); 88a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes } 89a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes } 90a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes return (sum.hi); 91a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes} 92a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes 93a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes/* 94a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * Compute ldexp(a+b, scale) with a single rounding error. It is assumed 95a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * that the result will be subnormal, and care is taken to ensure that 96a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * double rounding does not occur. 97a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes */ 98a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughesstatic inline double 99a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughesadd_and_denormalize(double a, double b, int scale) 100a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes{ 101a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes struct dd sum; 102a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes uint64_t hibits, lobits; 103a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes int bits_lost; 104a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes 105a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes sum = dd_add(a, b); 106a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes 107a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes /* 108a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * If we are losing at least two bits of accuracy to denormalization, 109a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * then the first lost bit becomes a round bit, and we adjust the 110a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * lowest bit of sum.hi to make it a sticky bit summarizing all the 111a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * bits in sum.lo. With the sticky bit adjusted, the hardware will 112a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * break any ties in the correct direction. 113a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * 114a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * If we are losing only one bit to denormalization, however, we must 115a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * break the ties manually. 116a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes */ 117a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes if (sum.lo != 0) { 118a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes EXTRACT_WORD64(hibits, sum.hi); 119a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes bits_lost = -((int)(hibits >> 52) & 0x7ff) - scale + 1; 120bd3155dc5ddf09647388ad7f8fdbe6df123fbd6aCalin Juravle if ((bits_lost != 1) ^ (int)(hibits & 1)) { 121a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes /* hibits += (int)copysign(1.0, sum.hi * sum.lo) */ 122a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes EXTRACT_WORD64(lobits, sum.lo); 123a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes hibits += 1 - (((hibits ^ lobits) >> 62) & 2); 124a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes INSERT_WORD64(sum.hi, hibits); 125a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes } 126a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes } 127a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes return (ldexp(sum.hi, scale)); 128a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes} 129a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes 130a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes/* 131a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * Compute a*b exactly, returning the exact result in a struct dd. We assume 132a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * that both a and b are normalized, so no underflow or overflow will occur. 133a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * The current rounding mode must be round-to-nearest. 134a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes */ 135a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughesstatic inline struct dd 136a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughesdd_mul(double a, double b) 137a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes{ 138a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes static const double split = 0x1p27 + 1.0; 139a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes struct dd ret; 140a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes double ha, hb, la, lb, p, q; 141a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes 142a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes p = a * split; 143a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes ha = a - p; 144a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes ha += p; 145a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes la = a - ha; 146a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes 147a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes p = b * split; 148a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes hb = b - p; 149a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes hb += p; 150a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes lb = b - hb; 151a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes 152a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes p = ha * hb; 153a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes q = ha * lb + la * hb; 154a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes 155a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes ret.hi = p + q; 156a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes ret.lo = p - ret.hi + q + la * lb; 157a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes return (ret); 158a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes} 159a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes 160a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes/* 161a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * Fused multiply-add: Compute x * y + z with a single rounding error. 162a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * 163a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * We use scaling to avoid overflow/underflow, along with the 164a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * canonical precision-doubling technique adapted from: 165a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * 166a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * Dekker, T. A Floating-Point Technique for Extending the 167a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * Available Precision. Numer. Math. 18, 224-242 (1971). 168a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * 169a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * This algorithm is sensitive to the rounding precision. FPUs such 170a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * as the i387 must be set in double-precision mode if variables are 171a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * to be stored in FP registers in order to avoid incorrect results. 172a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * This is the default on FreeBSD, but not on many other systems. 173a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * 174a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * Hardware instructions should be used on architectures that support it, 175a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * since this implementation will likely be several times slower. 176a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes */ 177a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughesdouble 178a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughesfma(double x, double y, double z) 179a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes{ 180a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes double xs, ys, zs, adj; 181a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes struct dd xy, r; 182a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes int oround; 183a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes int ex, ey, ez; 184a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes int spread; 185a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes 186a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes /* 187a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * Handle special cases. The order of operations and the particular 188a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * return values here are crucial in handling special cases involving 189a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * infinities, NaNs, overflows, and signed zeroes correctly. 190a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes */ 191a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes if (x == 0.0 || y == 0.0) 192a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes return (x * y + z); 193a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes if (z == 0.0) 194a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes return (x * y); 195a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes if (!isfinite(x) || !isfinite(y)) 196a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes return (x * y + z); 197a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes if (!isfinite(z)) 198a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes return (z); 199a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes 200a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes xs = frexp(x, &ex); 201a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes ys = frexp(y, &ey); 202a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes zs = frexp(z, &ez); 203a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes oround = fegetround(); 204a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes spread = ex + ey - ez; 205a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes 206a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes /* 207a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * If x * y and z are many orders of magnitude apart, the scaling 208a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * will overflow, so we handle these cases specially. Rounding 209a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * modes other than FE_TONEAREST are painful. 210a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes */ 211a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes if (spread < -DBL_MANT_DIG) { 212a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes feraiseexcept(FE_INEXACT); 213a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes if (!isnormal(z)) 214a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes feraiseexcept(FE_UNDERFLOW); 215a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes switch (oround) { 216a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes case FE_TONEAREST: 217a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes return (z); 218a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes case FE_TOWARDZERO: 219a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes if (x > 0.0 ^ y < 0.0 ^ z < 0.0) 220a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes return (z); 221a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes else 222a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes return (nextafter(z, 0)); 223a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes case FE_DOWNWARD: 224a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes if (x > 0.0 ^ y < 0.0) 225a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes return (z); 226a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes else 227a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes return (nextafter(z, -INFINITY)); 228a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes default: /* FE_UPWARD */ 229a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes if (x > 0.0 ^ y < 0.0) 230a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes return (nextafter(z, INFINITY)); 231a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes else 232a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes return (z); 233a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes } 234a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes } 235a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes if (spread <= DBL_MANT_DIG * 2) 236a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes zs = ldexp(zs, -spread); 237a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes else 238a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes zs = copysign(DBL_MIN, zs); 239a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes 240a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes fesetround(FE_TONEAREST); 24178419467a2f88744ae2445fca5eb442877ebb1b0Elliott Hughes /* work around clang bug 8100 */ 24278419467a2f88744ae2445fca5eb442877ebb1b0Elliott Hughes volatile double vxs = xs; 243a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes 244a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes /* 245a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * Basic approach for round-to-nearest: 246a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * 247a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * (xy.hi, xy.lo) = x * y (exact) 248a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * (r.hi, r.lo) = xy.hi + z (exact) 249a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * adj = xy.lo + r.lo (inexact; low bit is sticky) 250a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * result = r.hi + adj (correctly rounded) 251a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes */ 25278419467a2f88744ae2445fca5eb442877ebb1b0Elliott Hughes xy = dd_mul(vxs, ys); 253a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes r = dd_add(xy.hi, zs); 254a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes 255a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes spread = ex + ey; 256a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes 257a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes if (r.hi == 0.0) { 258a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes /* 259a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * When the addends cancel to 0, ensure that the result has 260a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * the correct sign. 261a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes */ 262a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes fesetround(oround); 263a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes volatile double vzs = zs; /* XXX gcc CSE bug workaround */ 264a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes return (xy.hi + vzs + ldexp(xy.lo, spread)); 265a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes } 266a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes 267a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes if (oround != FE_TONEAREST) { 268a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes /* 269a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * There is no need to worry about double rounding in directed 270a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes * rounding modes. 271a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes */ 272a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes fesetround(oround); 27378419467a2f88744ae2445fca5eb442877ebb1b0Elliott Hughes /* work around clang bug 8100 */ 27478419467a2f88744ae2445fca5eb442877ebb1b0Elliott Hughes volatile double vrlo = r.lo; 27578419467a2f88744ae2445fca5eb442877ebb1b0Elliott Hughes adj = vrlo + xy.lo; 276a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes return (ldexp(r.hi + adj, spread)); 277a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes } 278a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes 279a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes adj = add_adjusted(r.lo, xy.lo); 280a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes if (spread + ilogb(r.hi) > -1023) 281a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes return (ldexp(r.hi + adj, spread)); 282a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes else 283a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes return (add_and_denormalize(r.hi, adj, spread)); 284a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes} 285a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes 286a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes#if (LDBL_MANT_DIG == 53) 287a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes__weak_reference(fma, fmal); 288a0ee07829a9ba7e99ef68e8c12551301cc797f0fElliott Hughes#endif 289