15d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines//===-- lib/divtf3.c - Quad-precision division --------------------*- C -*-===// 25d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines// 35d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines// The LLVM Compiler Infrastructure 45d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines// 55d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines// This file is dual licensed under the MIT and the University of Illinois Open 65d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines// Source Licenses. See LICENSE.TXT for details. 75d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines// 85d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines//===----------------------------------------------------------------------===// 95d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines// 105d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines// This file implements quad-precision soft-float division 115d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines// with the IEEE-754 default rounding (to nearest, ties to even). 125d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines// 135d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines// For simplicity, this implementation currently flushes denormals to zero. 145d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines// It should be a fairly straightforward exercise to implement gradual 155d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines// underflow with correct rounding. 165d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines// 175d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines//===----------------------------------------------------------------------===// 185d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines 195d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines#define QUAD_PRECISION 205d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines#include "fp_lib.h" 215d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines 225d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) 235d71de26cedae3dafc17449fe0182045c0bd20e8Stephen HinesCOMPILER_RT_ABI fp_t __divtf3(fp_t a, fp_t b) { 245d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines 255d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; 265d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; 275d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit; 285d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines 295d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines rep_t aSignificand = toRep(a) & significandMask; 305d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines rep_t bSignificand = toRep(b) & significandMask; 315d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines int scale = 0; 325d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines 335d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // Detect if a or b is zero, denormal, infinity, or NaN. 345d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) { 355d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines 365d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines const rep_t aAbs = toRep(a) & absMask; 375d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines const rep_t bAbs = toRep(b) & absMask; 385d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines 395d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // NaN / anything = qNaN 405d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines if (aAbs > infRep) return fromRep(toRep(a) | quietBit); 415d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // anything / NaN = qNaN 425d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines if (bAbs > infRep) return fromRep(toRep(b) | quietBit); 435d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines 445d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines if (aAbs == infRep) { 455d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // infinity / infinity = NaN 465d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines if (bAbs == infRep) return fromRep(qnanRep); 475d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // infinity / anything else = +/- infinity 485d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines else return fromRep(aAbs | quotientSign); 495d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines } 505d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines 515d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // anything else / infinity = +/- 0 525d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines if (bAbs == infRep) return fromRep(quotientSign); 535d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines 545d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines if (!aAbs) { 555d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // zero / zero = NaN 565d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines if (!bAbs) return fromRep(qnanRep); 575d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // zero / anything else = +/- zero 585d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines else return fromRep(quotientSign); 595d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines } 605d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // anything else / zero = +/- infinity 615d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines if (!bAbs) return fromRep(infRep | quotientSign); 625d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines 635d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // one or both of a or b is denormal, the other (if applicable) is a 645d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // normal number. Renormalize one or both of a and b, and set scale to 655d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // include the necessary exponent adjustment. 665d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines if (aAbs < implicitBit) scale += normalize(&aSignificand); 675d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines if (bAbs < implicitBit) scale -= normalize(&bSignificand); 685d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines } 695d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines 705d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // Or in the implicit significand bit. (If we fell through from the 715d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // denormal path it was already set by normalize( ), but setting it twice 725d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // won't hurt anything.) 735d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines aSignificand |= implicitBit; 745d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines bSignificand |= implicitBit; 755d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines int quotientExponent = aExponent - bExponent + scale; 765d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines 775d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // Align the significand of b as a Q63 fixed-point number in the range 785d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // [1, 2.0) and get a Q64 approximate reciprocal using a small minimax 795d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This 805d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // is accurate to about 3.5 binary digits. 815d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines const uint64_t q63b = bSignificand >> 49; 825d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines uint64_t recip64 = UINT64_C(0x7504f333F9DE6484) - q63b; 835d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // 0x7504f333F9DE6484 / 2^64 + 1 = 3/4 + 1/sqrt(2) 845d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines 855d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // Now refine the reciprocal estimate using a Newton-Raphson iteration: 865d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // 875d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // x1 = x0 * (2 - x0 * b) 885d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // 895d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // This doubles the number of correct binary digits in the approximation 905d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // with each iteration. 915d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines uint64_t correction64; 925d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines correction64 = -((rep_t)recip64 * q63b >> 64); 935d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines recip64 = (rep_t)recip64 * correction64 >> 63; 945d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines correction64 = -((rep_t)recip64 * q63b >> 64); 955d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines recip64 = (rep_t)recip64 * correction64 >> 63; 965d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines correction64 = -((rep_t)recip64 * q63b >> 64); 975d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines recip64 = (rep_t)recip64 * correction64 >> 63; 985d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines correction64 = -((rep_t)recip64 * q63b >> 64); 995d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines recip64 = (rep_t)recip64 * correction64 >> 63; 1005d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines correction64 = -((rep_t)recip64 * q63b >> 64); 1015d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines recip64 = (rep_t)recip64 * correction64 >> 63; 1025d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines 1035d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // recip64 might have overflowed to exactly zero in the preceeding 1045d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // computation if the high word of b is exactly 1.0. This would sabotage 1055d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // the full-width final stage of the computation that follows, so we adjust 1065d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // recip64 downward by one bit. 1075d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines recip64--; 1085d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines 1095d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // We need to perform one more iteration to get us to 112 binary digits; 1105d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // The last iteration needs to happen with extra precision. 1115d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines const uint64_t q127blo = bSignificand << 15; 1125d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines rep_t correction, reciprocal; 1135d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines 1145d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // NOTE: This operation is equivalent to __multi3, which is not implemented 1155d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // in some architechure 1165d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines rep_t r64q63, r64q127, r64cH, r64cL, dummy; 1175d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines wideMultiply((rep_t)recip64, (rep_t)q63b, &dummy, &r64q63); 1185d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines wideMultiply((rep_t)recip64, (rep_t)q127blo, &dummy, &r64q127); 1195d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines 1205d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines correction = -(r64q63 + (r64q127 >> 64)); 1215d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines 1225d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines uint64_t cHi = correction >> 64; 1235d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines uint64_t cLo = correction; 1245d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines 1255d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines wideMultiply((rep_t)recip64, (rep_t)cHi, &dummy, &r64cH); 1265d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines wideMultiply((rep_t)recip64, (rep_t)cLo, &dummy, &r64cL); 1275d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines 1285d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines reciprocal = r64cH + (r64cL >> 64); 1295d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines 1305d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // We already adjusted the 64-bit estimate, now we need to adjust the final 1315d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // 128-bit reciprocal estimate downward to ensure that it is strictly smaller 1325d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // than the infinitely precise exact reciprocal. Because the computation 1335d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // of the Newton-Raphson step is truncating at every step, this adjustment 1345d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // is small; most of the work is already done. 1355d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines reciprocal -= 2; 1365d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines 1375d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // The numerical reciprocal is accurate to within 2^-112, lies in the 1385d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // interval [0.5, 1.0), and is strictly smaller than the true reciprocal 1395d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // of b. Multiplying a by this reciprocal thus gives a numerical q = a/b 1405d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // in Q127 with the following properties: 1415d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // 1425d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // 1. q < a/b 1435d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // 2. q is in the interval [0.5, 2.0) 1445d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // 3. the error in q is bounded away from 2^-113 (actually, we have a 1455d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // couple of bits to spare, but this is all we need). 1465d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines 1475d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // We need a 128 x 128 multiply high to compute q, which isn't a basic 1485d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // operation in C, so we need to be a little bit fussy. 1495d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines rep_t quotient, quotientLo; 1505d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines wideMultiply(aSignificand << 2, reciprocal, "ient, "ientLo); 1515d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines 1525d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). 1535d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // In either case, we are going to compute a residual of the form 1545d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // 1555d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // r = a - q*b 1565d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // 1575d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // We know from the construction of q that r satisfies: 1585d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // 1595d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // 0 <= r < ulp(q)*b 1605d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // 1615d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we 1625d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // already have the correct result. The exact halfway case cannot occur. 1635d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // We also take this time to right shift quotient if it falls in the [1,2) 1645d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // range and adjust the exponent accordingly. 1655d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines rep_t residual; 1665d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines rep_t qb; 1675d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines 1685d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines if (quotient < (implicitBit << 1)) { 1695d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines wideMultiply(quotient, bSignificand, &dummy, &qb); 1705d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines residual = (aSignificand << 113) - qb; 1715d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines quotientExponent--; 1725d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines } else { 1735d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines quotient >>= 1; 1745d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines wideMultiply(quotient, bSignificand, &dummy, &qb); 1755d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines residual = (aSignificand << 112) - qb; 1765d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines } 1775d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines 1785d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines const int writtenExponent = quotientExponent + exponentBias; 1795d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines 1805d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines if (writtenExponent >= maxExponent) { 1815d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // If we have overflowed the exponent, return infinity. 1825d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines return fromRep(infRep | quotientSign); 1835d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines } 1845d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines else if (writtenExponent < 1) { 1855d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // Flush denormals to zero. In the future, it would be nice to add 1865d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // code to round them correctly. 1875d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines return fromRep(quotientSign); 1885d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines } 1895d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines else { 1905d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines const bool round = (residual << 1) >= bSignificand; 1915d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // Clear the implicit bit 1925d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines rep_t absResult = quotient & significandMask; 1935d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // Insert the exponent 1945d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines absResult |= (rep_t)writtenExponent << significandBits; 1955d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // Round 1965d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines absResult += round; 1975d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines // Insert the sign and return 1985d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines const long double result = fromRep(absResult | quotientSign); 1995d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines return result; 2005d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines } 2015d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines} 2025d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines 2035d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines#endif 204