1// The following is adapted from fdlibm (http://www.netlib.org/fdlibm).
2//
3// ====================================================
4// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
5//
6// Developed at SunSoft, a Sun Microsystems, Inc. business.
7// Permission to use, copy, modify, and distribute this
8// software is freely granted, provided that this notice
9// is preserved.
10// ====================================================
11//
12// The original source code covered by the above license above has been
13// modified significantly by Google Inc.
14// Copyright 2016 the V8 project authors. All rights reserved.
15
16#include "src/base/ieee754.h"
17
18#include <cmath>
19#include <limits>
20
21#include "src/base/build_config.h"
22#include "src/base/macros.h"
23
24namespace v8 {
25namespace base {
26namespace ieee754 {
27
28namespace {
29
30/* Disable "potential divide by 0" warning in Visual Studio compiler. */
31
32#if V8_CC_MSVC
33
34#pragma warning(disable : 4723)
35
36#endif
37
38/*
39 * The original fdlibm code used statements like:
40 *  n0 = ((*(int*)&one)>>29)^1;   * index of high word *
41 *  ix0 = *(n0+(int*)&x);     * high word of x *
42 *  ix1 = *((1-n0)+(int*)&x);   * low word of x *
43 * to dig two 32 bit words out of the 64 bit IEEE floating point
44 * value.  That is non-ANSI, and, moreover, the gcc instruction
45 * scheduler gets it wrong.  We instead use the following macros.
46 * Unlike the original code, we determine the endianness at compile
47 * time, not at run time; I don't see much benefit to selecting
48 * endianness at run time.
49 */
50
51/*
52 * A union which permits us to convert between a double and two 32 bit
53 * ints.
54 */
55
56#if V8_TARGET_LITTLE_ENDIAN
57
58typedef union {
59  double value;
60  struct {
61    uint32_t lsw;
62    uint32_t msw;
63  } parts;
64  struct {
65    uint64_t w;
66  } xparts;
67} ieee_double_shape_type;
68
69#else
70
71typedef union {
72  double value;
73  struct {
74    uint32_t msw;
75    uint32_t lsw;
76  } parts;
77  struct {
78    uint64_t w;
79  } xparts;
80} ieee_double_shape_type;
81
82#endif
83
84/* Get two 32 bit ints from a double.  */
85
86#define EXTRACT_WORDS(ix0, ix1, d) \
87  do {                             \
88    ieee_double_shape_type ew_u;   \
89    ew_u.value = (d);              \
90    (ix0) = ew_u.parts.msw;        \
91    (ix1) = ew_u.parts.lsw;        \
92  } while (0)
93
94/* Get a 64-bit int from a double. */
95#define EXTRACT_WORD64(ix, d)    \
96  do {                           \
97    ieee_double_shape_type ew_u; \
98    ew_u.value = (d);            \
99    (ix) = ew_u.xparts.w;        \
100  } while (0)
101
102/* Get the more significant 32 bit int from a double.  */
103
104#define GET_HIGH_WORD(i, d)      \
105  do {                           \
106    ieee_double_shape_type gh_u; \
107    gh_u.value = (d);            \
108    (i) = gh_u.parts.msw;        \
109  } while (0)
110
111/* Get the less significant 32 bit int from a double.  */
112
113#define GET_LOW_WORD(i, d)       \
114  do {                           \
115    ieee_double_shape_type gl_u; \
116    gl_u.value = (d);            \
117    (i) = gl_u.parts.lsw;        \
118  } while (0)
119
120/* Set a double from two 32 bit ints.  */
121
122#define INSERT_WORDS(d, ix0, ix1) \
123  do {                            \
124    ieee_double_shape_type iw_u;  \
125    iw_u.parts.msw = (ix0);       \
126    iw_u.parts.lsw = (ix1);       \
127    (d) = iw_u.value;             \
128  } while (0)
129
130/* Set a double from a 64-bit int. */
131#define INSERT_WORD64(d, ix)     \
132  do {                           \
133    ieee_double_shape_type iw_u; \
134    iw_u.xparts.w = (ix);        \
135    (d) = iw_u.value;            \
136  } while (0)
137
138/* Set the more significant 32 bits of a double from an int.  */
139
140#define SET_HIGH_WORD(d, v)      \
141  do {                           \
142    ieee_double_shape_type sh_u; \
143    sh_u.value = (d);            \
144    sh_u.parts.msw = (v);        \
145    (d) = sh_u.value;            \
146  } while (0)
147
148/* Set the less significant 32 bits of a double from an int.  */
149
150#define SET_LOW_WORD(d, v)       \
151  do {                           \
152    ieee_double_shape_type sl_u; \
153    sl_u.value = (d);            \
154    sl_u.parts.lsw = (v);        \
155    (d) = sl_u.value;            \
156  } while (0)
157
158/* Support macro. */
159
160#define STRICT_ASSIGN(type, lval, rval) ((lval) = (rval))
161
162int32_t __ieee754_rem_pio2(double x, double *y) WARN_UNUSED_RESULT;
163double __kernel_cos(double x, double y) WARN_UNUSED_RESULT;
164int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec,
165                      const int32_t *ipio2) WARN_UNUSED_RESULT;
166double __kernel_sin(double x, double y, int iy) WARN_UNUSED_RESULT;
167
168/* __ieee754_rem_pio2(x,y)
169 *
170 * return the remainder of x rem pi/2 in y[0]+y[1]
171 * use __kernel_rem_pio2()
172 */
173int32_t __ieee754_rem_pio2(double x, double *y) {
174  /*
175   * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
176   */
177  static const int32_t two_over_pi[] = {
178      0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 0x95993C,
179      0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, 0x424DD2, 0xE00649,
180      0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, 0xA73EE8, 0x8235F5, 0x2EBB44,
181      0x84E99C, 0x7026B4, 0x5F7E41, 0x3991D6, 0x398353, 0x39F49C, 0x845F8B,
182      0xBDF928, 0x3B1FF8, 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D,
183      0x367ECF, 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
184      0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08, 0x560330,
185      0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, 0x91615E, 0xE61B08,
186      0x659985, 0x5F14A0, 0x68408D, 0xFFD880, 0x4D7327, 0x310606, 0x1556CA,
187      0x73A8C9, 0x60E27B, 0xC08C6B,
188  };
189
190  static const int32_t npio2_hw[] = {
191      0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
192      0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
193      0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
194      0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
195      0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
196      0x404858EB, 0x404921FB,
197  };
198
199  /*
200   * invpio2:  53 bits of 2/pi
201   * pio2_1:   first  33 bit of pi/2
202   * pio2_1t:  pi/2 - pio2_1
203   * pio2_2:   second 33 bit of pi/2
204   * pio2_2t:  pi/2 - (pio2_1+pio2_2)
205   * pio2_3:   third  33 bit of pi/2
206   * pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)
207   */
208
209  static const double
210      zero = 0.00000000000000000000e+00,    /* 0x00000000, 0x00000000 */
211      half = 5.00000000000000000000e-01,    /* 0x3FE00000, 0x00000000 */
212      two24 = 1.67772160000000000000e+07,   /* 0x41700000, 0x00000000 */
213      invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
214      pio2_1 = 1.57079632673412561417e+00,  /* 0x3FF921FB, 0x54400000 */
215      pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
216      pio2_2 = 6.07710050630396597660e-11,  /* 0x3DD0B461, 0x1A600000 */
217      pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
218      pio2_3 = 2.02226624871116645580e-21,  /* 0x3BA3198A, 0x2E000000 */
219      pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
220
221  double z, w, t, r, fn;
222  double tx[3];
223  int32_t e0, i, j, nx, n, ix, hx;
224  uint32_t low;
225
226  z = 0;
227  GET_HIGH_WORD(hx, x); /* high word of x */
228  ix = hx & 0x7fffffff;
229  if (ix <= 0x3fe921fb) { /* |x| ~<= pi/4 , no need for reduction */
230    y[0] = x;
231    y[1] = 0;
232    return 0;
233  }
234  if (ix < 0x4002d97c) { /* |x| < 3pi/4, special case with n=+-1 */
235    if (hx > 0) {
236      z = x - pio2_1;
237      if (ix != 0x3ff921fb) { /* 33+53 bit pi is good enough */
238        y[0] = z - pio2_1t;
239        y[1] = (z - y[0]) - pio2_1t;
240      } else { /* near pi/2, use 33+33+53 bit pi */
241        z -= pio2_2;
242        y[0] = z - pio2_2t;
243        y[1] = (z - y[0]) - pio2_2t;
244      }
245      return 1;
246    } else { /* negative x */
247      z = x + pio2_1;
248      if (ix != 0x3ff921fb) { /* 33+53 bit pi is good enough */
249        y[0] = z + pio2_1t;
250        y[1] = (z - y[0]) + pio2_1t;
251      } else { /* near pi/2, use 33+33+53 bit pi */
252        z += pio2_2;
253        y[0] = z + pio2_2t;
254        y[1] = (z - y[0]) + pio2_2t;
255      }
256      return -1;
257    }
258  }
259  if (ix <= 0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
260    t = fabs(x);
261    n = static_cast<int32_t>(t * invpio2 + half);
262    fn = static_cast<double>(n);
263    r = t - fn * pio2_1;
264    w = fn * pio2_1t; /* 1st round good to 85 bit */
265    if (n < 32 && ix != npio2_hw[n - 1]) {
266      y[0] = r - w; /* quick check no cancellation */
267    } else {
268      uint32_t high;
269      j = ix >> 20;
270      y[0] = r - w;
271      GET_HIGH_WORD(high, y[0]);
272      i = j - ((high >> 20) & 0x7ff);
273      if (i > 16) { /* 2nd iteration needed, good to 118 */
274        t = r;
275        w = fn * pio2_2;
276        r = t - w;
277        w = fn * pio2_2t - ((t - r) - w);
278        y[0] = r - w;
279        GET_HIGH_WORD(high, y[0]);
280        i = j - ((high >> 20) & 0x7ff);
281        if (i > 49) { /* 3rd iteration need, 151 bits acc */
282          t = r;      /* will cover all possible cases */
283          w = fn * pio2_3;
284          r = t - w;
285          w = fn * pio2_3t - ((t - r) - w);
286          y[0] = r - w;
287        }
288      }
289    }
290    y[1] = (r - y[0]) - w;
291    if (hx < 0) {
292      y[0] = -y[0];
293      y[1] = -y[1];
294      return -n;
295    } else {
296      return n;
297    }
298  }
299  /*
300   * all other (large) arguments
301   */
302  if (ix >= 0x7ff00000) { /* x is inf or NaN */
303    y[0] = y[1] = x - x;
304    return 0;
305  }
306  /* set z = scalbn(|x|,ilogb(x)-23) */
307  GET_LOW_WORD(low, x);
308  SET_LOW_WORD(z, low);
309  e0 = (ix >> 20) - 1046; /* e0 = ilogb(z)-23; */
310  SET_HIGH_WORD(z, ix - static_cast<int32_t>(e0 << 20));
311  for (i = 0; i < 2; i++) {
312    tx[i] = static_cast<double>(static_cast<int32_t>(z));
313    z = (z - tx[i]) * two24;
314  }
315  tx[2] = z;
316  nx = 3;
317  while (tx[nx - 1] == zero) nx--; /* skip zero term */
318  n = __kernel_rem_pio2(tx, y, e0, nx, 2, two_over_pi);
319  if (hx < 0) {
320    y[0] = -y[0];
321    y[1] = -y[1];
322    return -n;
323  }
324  return n;
325}
326
327/* __kernel_cos( x,  y )
328 * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
329 * Input x is assumed to be bounded by ~pi/4 in magnitude.
330 * Input y is the tail of x.
331 *
332 * Algorithm
333 *      1. Since cos(-x) = cos(x), we need only to consider positive x.
334 *      2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
335 *      3. cos(x) is approximated by a polynomial of degree 14 on
336 *         [0,pi/4]
337 *                                       4            14
338 *              cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
339 *         where the remez error is
340 *
341 *      |              2     4     6     8     10    12     14 |     -58
342 *      |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  )| <= 2
343 *      |                                                      |
344 *
345 *                     4     6     8     10    12     14
346 *      4. let r = C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  , then
347 *             cos(x) = 1 - x*x/2 + r
348 *         since cos(x+y) ~ cos(x) - sin(x)*y
349 *                        ~ cos(x) - x*y,
350 *         a correction term is necessary in cos(x) and hence
351 *              cos(x+y) = 1 - (x*x/2 - (r - x*y))
352 *         For better accuracy when x > 0.3, let qx = |x|/4 with
353 *         the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
354 *         Then
355 *              cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).
356 *         Note that 1-qx and (x*x/2-qx) is EXACT here, and the
357 *         magnitude of the latter is at least a quarter of x*x/2,
358 *         thus, reducing the rounding error in the subtraction.
359 */
360V8_INLINE double __kernel_cos(double x, double y) {
361  static const double
362      one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
363      C1 = 4.16666666666666019037e-02,  /* 0x3FA55555, 0x5555554C */
364      C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
365      C3 = 2.48015872894767294178e-05,  /* 0x3EFA01A0, 0x19CB1590 */
366      C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
367      C5 = 2.08757232129817482790e-09,  /* 0x3E21EE9E, 0xBDB4B1C4 */
368      C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
369
370  double a, iz, z, r, qx;
371  int32_t ix;
372  GET_HIGH_WORD(ix, x);
373  ix &= 0x7fffffff;                           /* ix = |x|'s high word*/
374  if (ix < 0x3e400000) {                      /* if x < 2**27 */
375    if (static_cast<int>(x) == 0) return one; /* generate inexact */
376  }
377  z = x * x;
378  r = z * (C1 + z * (C2 + z * (C3 + z * (C4 + z * (C5 + z * C6)))));
379  if (ix < 0x3FD33333) { /* if |x| < 0.3 */
380    return one - (0.5 * z - (z * r - x * y));
381  } else {
382    if (ix > 0x3fe90000) { /* x > 0.78125 */
383      qx = 0.28125;
384    } else {
385      INSERT_WORDS(qx, ix - 0x00200000, 0); /* x/4 */
386    }
387    iz = 0.5 * z - qx;
388    a = one - qx;
389    return a - (iz - (z * r - x * y));
390  }
391}
392
393/* __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
394 * double x[],y[]; int e0,nx,prec; int ipio2[];
395 *
396 * __kernel_rem_pio2 return the last three digits of N with
397 *              y = x - N*pi/2
398 * so that |y| < pi/2.
399 *
400 * The method is to compute the integer (mod 8) and fraction parts of
401 * (2/pi)*x without doing the full multiplication. In general we
402 * skip the part of the product that are known to be a huge integer (
403 * more accurately, = 0 mod 8 ). Thus the number of operations are
404 * independent of the exponent of the input.
405 *
406 * (2/pi) is represented by an array of 24-bit integers in ipio2[].
407 *
408 * Input parameters:
409 *      x[]     The input value (must be positive) is broken into nx
410 *              pieces of 24-bit integers in double precision format.
411 *              x[i] will be the i-th 24 bit of x. The scaled exponent
412 *              of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
413 *              match x's up to 24 bits.
414 *
415 *              Example of breaking a double positive z into x[0]+x[1]+x[2]:
416 *                      e0 = ilogb(z)-23
417 *                      z  = scalbn(z,-e0)
418 *              for i = 0,1,2
419 *                      x[i] = floor(z)
420 *                      z    = (z-x[i])*2**24
421 *
422 *
423 *      y[]     output result in an array of double precision numbers.
424 *              The dimension of y[] is:
425 *                      24-bit  precision       1
426 *                      53-bit  precision       2
427 *                      64-bit  precision       2
428 *                      113-bit precision       3
429 *              The actual value is the sum of them. Thus for 113-bit
430 *              precison, one may have to do something like:
431 *
432 *              long double t,w,r_head, r_tail;
433 *              t = (long double)y[2] + (long double)y[1];
434 *              w = (long double)y[0];
435 *              r_head = t+w;
436 *              r_tail = w - (r_head - t);
437 *
438 *      e0      The exponent of x[0]
439 *
440 *      nx      dimension of x[]
441 *
442 *      prec    an integer indicating the precision:
443 *                      0       24  bits (single)
444 *                      1       53  bits (double)
445 *                      2       64  bits (extended)
446 *                      3       113 bits (quad)
447 *
448 *      ipio2[]
449 *              integer array, contains the (24*i)-th to (24*i+23)-th
450 *              bit of 2/pi after binary point. The corresponding
451 *              floating value is
452 *
453 *                      ipio2[i] * 2^(-24(i+1)).
454 *
455 * External function:
456 *      double scalbn(), floor();
457 *
458 *
459 * Here is the description of some local variables:
460 *
461 *      jk      jk+1 is the initial number of terms of ipio2[] needed
462 *              in the computation. The recommended value is 2,3,4,
463 *              6 for single, double, extended,and quad.
464 *
465 *      jz      local integer variable indicating the number of
466 *              terms of ipio2[] used.
467 *
468 *      jx      nx - 1
469 *
470 *      jv      index for pointing to the suitable ipio2[] for the
471 *              computation. In general, we want
472 *                      ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
473 *              is an integer. Thus
474 *                      e0-3-24*jv >= 0 or (e0-3)/24 >= jv
475 *              Hence jv = max(0,(e0-3)/24).
476 *
477 *      jp      jp+1 is the number of terms in PIo2[] needed, jp = jk.
478 *
479 *      q[]     double array with integral value, representing the
480 *              24-bits chunk of the product of x and 2/pi.
481 *
482 *      q0      the corresponding exponent of q[0]. Note that the
483 *              exponent for q[i] would be q0-24*i.
484 *
485 *      PIo2[]  double precision array, obtained by cutting pi/2
486 *              into 24 bits chunks.
487 *
488 *      f[]     ipio2[] in floating point
489 *
490 *      iq[]    integer array by breaking up q[] in 24-bits chunk.
491 *
492 *      fq[]    final product of x*(2/pi) in fq[0],..,fq[jk]
493 *
494 *      ih      integer. If >0 it indicates q[] is >= 0.5, hence
495 *              it also indicates the *sign* of the result.
496 *
497 */
498int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec,
499                      const int32_t *ipio2) {
500  /* Constants:
501   * The hexadecimal values are the intended ones for the following
502   * constants. The decimal values may be used, provided that the
503   * compiler will convert from decimal to binary accurately enough
504   * to produce the hexadecimal values shown.
505   */
506  static const int init_jk[] = {2, 3, 4, 6}; /* initial value for jk */
507
508  static const double PIo2[] = {
509      1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
510      7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
511      5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
512      3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
513      1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
514      1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
515      2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
516      2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
517  };
518
519  static const double
520      zero = 0.0,
521      one = 1.0,
522      two24 = 1.67772160000000000000e+07,  /* 0x41700000, 0x00000000 */
523      twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
524
525  int32_t jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih;
526  double z, fw, f[20], fq[20], q[20];
527
528  /* initialize jk*/
529  jk = init_jk[prec];
530  jp = jk;
531
532  /* determine jx,jv,q0, note that 3>q0 */
533  jx = nx - 1;
534  jv = (e0 - 3) / 24;
535  if (jv < 0) jv = 0;
536  q0 = e0 - 24 * (jv + 1);
537
538  /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
539  j = jv - jx;
540  m = jx + jk;
541  for (i = 0; i <= m; i++, j++) {
542    f[i] = (j < 0) ? zero : static_cast<double>(ipio2[j]);
543  }
544
545  /* compute q[0],q[1],...q[jk] */
546  for (i = 0; i <= jk; i++) {
547    for (j = 0, fw = 0.0; j <= jx; j++) fw += x[j] * f[jx + i - j];
548    q[i] = fw;
549  }
550
551  jz = jk;
552recompute:
553  /* distill q[] into iq[] reversingly */
554  for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--) {
555    fw = static_cast<double>(static_cast<int32_t>(twon24 * z));
556    iq[i] = static_cast<int32_t>(z - two24 * fw);
557    z = q[j - 1] + fw;
558  }
559
560  /* compute n */
561  z = scalbn(z, q0);           /* actual value of z */
562  z -= 8.0 * floor(z * 0.125); /* trim off integer >= 8 */
563  n = static_cast<int32_t>(z);
564  z -= static_cast<double>(n);
565  ih = 0;
566  if (q0 > 0) { /* need iq[jz-1] to determine n */
567    i = (iq[jz - 1] >> (24 - q0));
568    n += i;
569    iq[jz - 1] -= i << (24 - q0);
570    ih = iq[jz - 1] >> (23 - q0);
571  } else if (q0 == 0) {
572    ih = iq[jz - 1] >> 23;
573  } else if (z >= 0.5) {
574    ih = 2;
575  }
576
577  if (ih > 0) { /* q > 0.5 */
578    n += 1;
579    carry = 0;
580    for (i = 0; i < jz; i++) { /* compute 1-q */
581      j = iq[i];
582      if (carry == 0) {
583        if (j != 0) {
584          carry = 1;
585          iq[i] = 0x1000000 - j;
586        }
587      } else {
588        iq[i] = 0xffffff - j;
589      }
590    }
591    if (q0 > 0) { /* rare case: chance is 1 in 12 */
592      switch (q0) {
593        case 1:
594          iq[jz - 1] &= 0x7fffff;
595          break;
596        case 2:
597          iq[jz - 1] &= 0x3fffff;
598          break;
599      }
600    }
601    if (ih == 2) {
602      z = one - z;
603      if (carry != 0) z -= scalbn(one, q0);
604    }
605  }
606
607  /* check if recomputation is needed */
608  if (z == zero) {
609    j = 0;
610    for (i = jz - 1; i >= jk; i--) j |= iq[i];
611    if (j == 0) { /* need recomputation */
612      for (k = 1; jk >= k && iq[jk - k] == 0; k++) {
613        /* k = no. of terms needed */
614      }
615
616      for (i = jz + 1; i <= jz + k; i++) { /* add q[jz+1] to q[jz+k] */
617        f[jx + i] = ipio2[jv + i];
618        for (j = 0, fw = 0.0; j <= jx; j++) fw += x[j] * f[jx + i - j];
619        q[i] = fw;
620      }
621      jz += k;
622      goto recompute;
623    }
624  }
625
626  /* chop off zero terms */
627  if (z == 0.0) {
628    jz -= 1;
629    q0 -= 24;
630    while (iq[jz] == 0) {
631      jz--;
632      q0 -= 24;
633    }
634  } else { /* break z into 24-bit if necessary */
635    z = scalbn(z, -q0);
636    if (z >= two24) {
637      fw = static_cast<double>(static_cast<int32_t>(twon24 * z));
638      iq[jz] = z - two24 * fw;
639      jz += 1;
640      q0 += 24;
641      iq[jz] = fw;
642    } else {
643      iq[jz] = z;
644    }
645  }
646
647  /* convert integer "bit" chunk to floating-point value */
648  fw = scalbn(one, q0);
649  for (i = jz; i >= 0; i--) {
650    q[i] = fw * iq[i];
651    fw *= twon24;
652  }
653
654  /* compute PIo2[0,...,jp]*q[jz,...,0] */
655  for (i = jz; i >= 0; i--) {
656    for (fw = 0.0, k = 0; k <= jp && k <= jz - i; k++) fw += PIo2[k] * q[i + k];
657    fq[jz - i] = fw;
658  }
659
660  /* compress fq[] into y[] */
661  switch (prec) {
662    case 0:
663      fw = 0.0;
664      for (i = jz; i >= 0; i--) fw += fq[i];
665      y[0] = (ih == 0) ? fw : -fw;
666      break;
667    case 1:
668    case 2:
669      fw = 0.0;
670      for (i = jz; i >= 0; i--) fw += fq[i];
671      y[0] = (ih == 0) ? fw : -fw;
672      fw = fq[0] - fw;
673      for (i = 1; i <= jz; i++) fw += fq[i];
674      y[1] = (ih == 0) ? fw : -fw;
675      break;
676    case 3: /* painful */
677      for (i = jz; i > 0; i--) {
678        fw = fq[i - 1] + fq[i];
679        fq[i] += fq[i - 1] - fw;
680        fq[i - 1] = fw;
681      }
682      for (i = jz; i > 1; i--) {
683        fw = fq[i - 1] + fq[i];
684        fq[i] += fq[i - 1] - fw;
685        fq[i - 1] = fw;
686      }
687      for (fw = 0.0, i = jz; i >= 2; i--) fw += fq[i];
688      if (ih == 0) {
689        y[0] = fq[0];
690        y[1] = fq[1];
691        y[2] = fw;
692      } else {
693        y[0] = -fq[0];
694        y[1] = -fq[1];
695        y[2] = -fw;
696      }
697  }
698  return n & 7;
699}
700
701/* __kernel_sin( x, y, iy)
702 * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
703 * Input x is assumed to be bounded by ~pi/4 in magnitude.
704 * Input y is the tail of x.
705 * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
706 *
707 * Algorithm
708 *      1. Since sin(-x) = -sin(x), we need only to consider positive x.
709 *      2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.
710 *      3. sin(x) is approximated by a polynomial of degree 13 on
711 *         [0,pi/4]
712 *                               3            13
713 *              sin(x) ~ x + S1*x + ... + S6*x
714 *         where
715 *
716 *      |sin(x)         2     4     6     8     10     12  |     -58
717 *      |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x  +S6*x   )| <= 2
718 *      |  x                                               |
719 *
720 *      4. sin(x+y) = sin(x) + sin'(x')*y
721 *                  ~ sin(x) + (1-x*x/2)*y
722 *         For better accuracy, let
723 *                   3      2      2      2      2
724 *              r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
725 *         then                   3    2
726 *              sin(x) = x + (S1*x + (x *(r-y/2)+y))
727 */
728V8_INLINE double __kernel_sin(double x, double y, int iy) {
729  static const double
730      half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
731      S1 = -1.66666666666666324348e-01,  /* 0xBFC55555, 0x55555549 */
732      S2 = 8.33333333332248946124e-03,   /* 0x3F811111, 0x1110F8A6 */
733      S3 = -1.98412698298579493134e-04,  /* 0xBF2A01A0, 0x19C161D5 */
734      S4 = 2.75573137070700676789e-06,   /* 0x3EC71DE3, 0x57B1FE7D */
735      S5 = -2.50507602534068634195e-08,  /* 0xBE5AE5E6, 0x8A2B9CEB */
736      S6 = 1.58969099521155010221e-10;   /* 0x3DE5D93A, 0x5ACFD57C */
737
738  double z, r, v;
739  int32_t ix;
740  GET_HIGH_WORD(ix, x);
741  ix &= 0x7fffffff;      /* high word of x */
742  if (ix < 0x3e400000) { /* |x| < 2**-27 */
743    if (static_cast<int>(x) == 0) return x;
744  } /* generate inexact */
745  z = x * x;
746  v = z * x;
747  r = S2 + z * (S3 + z * (S4 + z * (S5 + z * S6)));
748  if (iy == 0) {
749    return x + v * (S1 + z * r);
750  } else {
751    return x - ((z * (half * y - v * r) - y) - v * S1);
752  }
753}
754
755/* __kernel_tan( x, y, k )
756 * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854
757 * Input x is assumed to be bounded by ~pi/4 in magnitude.
758 * Input y is the tail of x.
759 * Input k indicates whether tan (if k=1) or
760 * -1/tan (if k= -1) is returned.
761 *
762 * Algorithm
763 *      1. Since tan(-x) = -tan(x), we need only to consider positive x.
764 *      2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0.
765 *      3. tan(x) is approximated by a odd polynomial of degree 27 on
766 *         [0,0.67434]
767 *                               3             27
768 *              tan(x) ~ x + T1*x + ... + T13*x
769 *         where
770 *
771 *              |tan(x)         2     4            26   |     -59.2
772 *              |----- - (1+T1*x +T2*x +.... +T13*x    )| <= 2
773 *              |  x                                    |
774 *
775 *         Note: tan(x+y) = tan(x) + tan'(x)*y
776 *                        ~ tan(x) + (1+x*x)*y
777 *         Therefore, for better accuracy in computing tan(x+y), let
778 *                   3      2      2       2       2
779 *              r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
780 *         then
781 *                                  3    2
782 *              tan(x+y) = x + (T1*x + (x *(r+y)+y))
783 *
784 *      4. For x in [0.67434,pi/4],  let y = pi/4 - x, then
785 *              tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))
786 *                     = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
787 */
788double __kernel_tan(double x, double y, int iy) {
789  static const double xxx[] = {
790      3.33333333333334091986e-01,             /* 3FD55555, 55555563 */
791      1.33333333333201242699e-01,             /* 3FC11111, 1110FE7A */
792      5.39682539762260521377e-02,             /* 3FABA1BA, 1BB341FE */
793      2.18694882948595424599e-02,             /* 3F9664F4, 8406D637 */
794      8.86323982359930005737e-03,             /* 3F8226E3, E96E8493 */
795      3.59207910759131235356e-03,             /* 3F6D6D22, C9560328 */
796      1.45620945432529025516e-03,             /* 3F57DBC8, FEE08315 */
797      5.88041240820264096874e-04,             /* 3F4344D8, F2F26501 */
798      2.46463134818469906812e-04,             /* 3F3026F7, 1A8D1068 */
799      7.81794442939557092300e-05,             /* 3F147E88, A03792A6 */
800      7.14072491382608190305e-05,             /* 3F12B80F, 32F0A7E9 */
801      -1.85586374855275456654e-05,            /* BEF375CB, DB605373 */
802      2.59073051863633712884e-05,             /* 3EFB2A70, 74BF7AD4 */
803      /* one */ 1.00000000000000000000e+00,   /* 3FF00000, 00000000 */
804      /* pio4 */ 7.85398163397448278999e-01,  /* 3FE921FB, 54442D18 */
805      /* pio4lo */ 3.06161699786838301793e-17 /* 3C81A626, 33145C07 */
806  };
807#define one xxx[13]
808#define pio4 xxx[14]
809#define pio4lo xxx[15]
810#define T xxx
811
812  double z, r, v, w, s;
813  int32_t ix, hx;
814
815  GET_HIGH_WORD(hx, x);             /* high word of x */
816  ix = hx & 0x7fffffff;             /* high word of |x| */
817  if (ix < 0x3e300000) {            /* x < 2**-28 */
818    if (static_cast<int>(x) == 0) { /* generate inexact */
819      uint32_t low;
820      GET_LOW_WORD(low, x);
821      if (((ix | low) | (iy + 1)) == 0) {
822        return one / fabs(x);
823      } else {
824        if (iy == 1) {
825          return x;
826        } else { /* compute -1 / (x+y) carefully */
827          double a, t;
828
829          z = w = x + y;
830          SET_LOW_WORD(z, 0);
831          v = y - (z - x);
832          t = a = -one / w;
833          SET_LOW_WORD(t, 0);
834          s = one + t * z;
835          return t + a * (s + t * v);
836        }
837      }
838    }
839  }
840  if (ix >= 0x3FE59428) { /* |x| >= 0.6744 */
841    if (hx < 0) {
842      x = -x;
843      y = -y;
844    }
845    z = pio4 - x;
846    w = pio4lo - y;
847    x = z + w;
848    y = 0.0;
849  }
850  z = x * x;
851  w = z * z;
852  /*
853   * Break x^5*(T[1]+x^2*T[2]+...) into
854   * x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
855   * x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
856   */
857  r = T[1] + w * (T[3] + w * (T[5] + w * (T[7] + w * (T[9] + w * T[11]))));
858  v = z *
859      (T[2] + w * (T[4] + w * (T[6] + w * (T[8] + w * (T[10] + w * T[12])))));
860  s = z * x;
861  r = y + z * (s * (r + v) + y);
862  r += T[0] * s;
863  w = x + r;
864  if (ix >= 0x3FE59428) {
865    v = iy;
866    return (1 - ((hx >> 30) & 2)) * (v - 2.0 * (x - (w * w / (w + v) - r)));
867  }
868  if (iy == 1) {
869    return w;
870  } else {
871    /*
872     * if allow error up to 2 ulp, simply return
873     * -1.0 / (x+r) here
874     */
875    /* compute -1.0 / (x+r) accurately */
876    double a, t;
877    z = w;
878    SET_LOW_WORD(z, 0);
879    v = r - (z - x);  /* z+v = r+x */
880    t = a = -1.0 / w; /* a = -1.0/w */
881    SET_LOW_WORD(t, 0);
882    s = 1.0 + t * z;
883    return t + a * (s + t * v);
884  }
885
886#undef one
887#undef pio4
888#undef pio4lo
889#undef T
890}
891
892}  // namespace
893
894/* acos(x)
895 * Method :
896 *      acos(x)  = pi/2 - asin(x)
897 *      acos(-x) = pi/2 + asin(x)
898 * For |x|<=0.5
899 *      acos(x) = pi/2 - (x + x*x^2*R(x^2))     (see asin.c)
900 * For x>0.5
901 *      acos(x) = pi/2 - (pi/2 - 2asin(sqrt((1-x)/2)))
902 *              = 2asin(sqrt((1-x)/2))
903 *              = 2s + 2s*z*R(z)        ...z=(1-x)/2, s=sqrt(z)
904 *              = 2f + (2c + 2s*z*R(z))
905 *     where f=hi part of s, and c = (z-f*f)/(s+f) is the correction term
906 *     for f so that f+c ~ sqrt(z).
907 * For x<-0.5
908 *      acos(x) = pi - 2asin(sqrt((1-|x|)/2))
909 *              = pi - 0.5*(s+s*z*R(z)), where z=(1-|x|)/2,s=sqrt(z)
910 *
911 * Special cases:
912 *      if x is NaN, return x itself;
913 *      if |x|>1, return NaN with invalid signal.
914 *
915 * Function needed: sqrt
916 */
917double acos(double x) {
918  static const double
919      one = 1.00000000000000000000e+00,     /* 0x3FF00000, 0x00000000 */
920      pi = 3.14159265358979311600e+00,      /* 0x400921FB, 0x54442D18 */
921      pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */
922      pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */
923      pS0 = 1.66666666666666657415e-01,     /* 0x3FC55555, 0x55555555 */
924      pS1 = -3.25565818622400915405e-01,    /* 0xBFD4D612, 0x03EB6F7D */
925      pS2 = 2.01212532134862925881e-01,     /* 0x3FC9C155, 0x0E884455 */
926      pS3 = -4.00555345006794114027e-02,    /* 0xBFA48228, 0xB5688F3B */
927      pS4 = 7.91534994289814532176e-04,     /* 0x3F49EFE0, 0x7501B288 */
928      pS5 = 3.47933107596021167570e-05,     /* 0x3F023DE1, 0x0DFDF709 */
929      qS1 = -2.40339491173441421878e+00,    /* 0xC0033A27, 0x1C8A2D4B */
930      qS2 = 2.02094576023350569471e+00,     /* 0x40002AE5, 0x9C598AC8 */
931      qS3 = -6.88283971605453293030e-01,    /* 0xBFE6066C, 0x1B8D0159 */
932      qS4 = 7.70381505559019352791e-02;     /* 0x3FB3B8C5, 0xB12E9282 */
933
934  double z, p, q, r, w, s, c, df;
935  int32_t hx, ix;
936  GET_HIGH_WORD(hx, x);
937  ix = hx & 0x7fffffff;
938  if (ix >= 0x3ff00000) { /* |x| >= 1 */
939    uint32_t lx;
940    GET_LOW_WORD(lx, x);
941    if (((ix - 0x3ff00000) | lx) == 0) { /* |x|==1 */
942      if (hx > 0)
943        return 0.0; /* acos(1) = 0  */
944      else
945        return pi + 2.0 * pio2_lo; /* acos(-1)= pi */
946    }
947    return (x - x) / (x - x); /* acos(|x|>1) is NaN */
948  }
949  if (ix < 0x3fe00000) {                            /* |x| < 0.5 */
950    if (ix <= 0x3c600000) return pio2_hi + pio2_lo; /*if|x|<2**-57*/
951    z = x * x;
952    p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
953    q = one + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
954    r = p / q;
955    return pio2_hi - (x - (pio2_lo - x * r));
956  } else if (hx < 0) { /* x < -0.5 */
957    z = (one + x) * 0.5;
958    p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
959    q = one + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
960    s = sqrt(z);
961    r = p / q;
962    w = r * s - pio2_lo;
963    return pi - 2.0 * (s + w);
964  } else { /* x > 0.5 */
965    z = (one - x) * 0.5;
966    s = sqrt(z);
967    df = s;
968    SET_LOW_WORD(df, 0);
969    c = (z - df * df) / (s + df);
970    p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
971    q = one + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
972    r = p / q;
973    w = r * s + c;
974    return 2.0 * (df + w);
975  }
976}
977
978/* acosh(x)
979 * Method :
980 *      Based on
981 *              acosh(x) = log [ x + sqrt(x*x-1) ]
982 *      we have
983 *              acosh(x) := log(x)+ln2, if x is large; else
984 *              acosh(x) := log(2x-1/(sqrt(x*x-1)+x)) if x>2; else
985 *              acosh(x) := log1p(t+sqrt(2.0*t+t*t)); where t=x-1.
986 *
987 * Special cases:
988 *      acosh(x) is NaN with signal if x<1.
989 *      acosh(NaN) is NaN without signal.
990 */
991double acosh(double x) {
992  static const double
993      one = 1.0,
994      ln2 = 6.93147180559945286227e-01; /* 0x3FE62E42, 0xFEFA39EF */
995  double t;
996  int32_t hx;
997  uint32_t lx;
998  EXTRACT_WORDS(hx, lx, x);
999  if (hx < 0x3ff00000) { /* x < 1 */
1000    return (x - x) / (x - x);
1001  } else if (hx >= 0x41b00000) { /* x > 2**28 */
1002    if (hx >= 0x7ff00000) {      /* x is inf of NaN */
1003      return x + x;
1004    } else {
1005      return log(x) + ln2; /* acosh(huge)=log(2x) */
1006    }
1007  } else if (((hx - 0x3ff00000) | lx) == 0) {
1008    return 0.0;                 /* acosh(1) = 0 */
1009  } else if (hx > 0x40000000) { /* 2**28 > x > 2 */
1010    t = x * x;
1011    return log(2.0 * x - one / (x + sqrt(t - one)));
1012  } else { /* 1<x<2 */
1013    t = x - one;
1014    return log1p(t + sqrt(2.0 * t + t * t));
1015  }
1016}
1017
1018/* asin(x)
1019 * Method :
1020 *      Since  asin(x) = x + x^3/6 + x^5*3/40 + x^7*15/336 + ...
1021 *      we approximate asin(x) on [0,0.5] by
1022 *              asin(x) = x + x*x^2*R(x^2)
1023 *      where
1024 *              R(x^2) is a rational approximation of (asin(x)-x)/x^3
1025 *      and its remez error is bounded by
1026 *              |(asin(x)-x)/x^3 - R(x^2)| < 2^(-58.75)
1027 *
1028 *      For x in [0.5,1]
1029 *              asin(x) = pi/2-2*asin(sqrt((1-x)/2))
1030 *      Let y = (1-x), z = y/2, s := sqrt(z), and pio2_hi+pio2_lo=pi/2;
1031 *      then for x>0.98
1032 *              asin(x) = pi/2 - 2*(s+s*z*R(z))
1033 *                      = pio2_hi - (2*(s+s*z*R(z)) - pio2_lo)
1034 *      For x<=0.98, let pio4_hi = pio2_hi/2, then
1035 *              f = hi part of s;
1036 *              c = sqrt(z) - f = (z-f*f)/(s+f)         ...f+c=sqrt(z)
1037 *      and
1038 *              asin(x) = pi/2 - 2*(s+s*z*R(z))
1039 *                      = pio4_hi+(pio4-2s)-(2s*z*R(z)-pio2_lo)
1040 *                      = pio4_hi+(pio4-2f)-(2s*z*R(z)-(pio2_lo+2c))
1041 *
1042 * Special cases:
1043 *      if x is NaN, return x itself;
1044 *      if |x|>1, return NaN with invalid signal.
1045 */
1046double asin(double x) {
1047  static const double
1048      one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
1049      huge = 1.000e+300,
1050      pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */
1051      pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */
1052      pio4_hi = 7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */
1053                                            /* coefficient for R(x^2) */
1054      pS0 = 1.66666666666666657415e-01,     /* 0x3FC55555, 0x55555555 */
1055      pS1 = -3.25565818622400915405e-01,    /* 0xBFD4D612, 0x03EB6F7D */
1056      pS2 = 2.01212532134862925881e-01,     /* 0x3FC9C155, 0x0E884455 */
1057      pS3 = -4.00555345006794114027e-02,    /* 0xBFA48228, 0xB5688F3B */
1058      pS4 = 7.91534994289814532176e-04,     /* 0x3F49EFE0, 0x7501B288 */
1059      pS5 = 3.47933107596021167570e-05,     /* 0x3F023DE1, 0x0DFDF709 */
1060      qS1 = -2.40339491173441421878e+00,    /* 0xC0033A27, 0x1C8A2D4B */
1061      qS2 = 2.02094576023350569471e+00,     /* 0x40002AE5, 0x9C598AC8 */
1062      qS3 = -6.88283971605453293030e-01,    /* 0xBFE6066C, 0x1B8D0159 */
1063      qS4 = 7.70381505559019352791e-02;     /* 0x3FB3B8C5, 0xB12E9282 */
1064
1065  double t, w, p, q, c, r, s;
1066  int32_t hx, ix;
1067
1068  t = 0;
1069  GET_HIGH_WORD(hx, x);
1070  ix = hx & 0x7fffffff;
1071  if (ix >= 0x3ff00000) { /* |x|>= 1 */
1072    uint32_t lx;
1073    GET_LOW_WORD(lx, x);
1074    if (((ix - 0x3ff00000) | lx) == 0) /* asin(1)=+-pi/2 with inexact */
1075      return x * pio2_hi + x * pio2_lo;
1076    return (x - x) / (x - x);       /* asin(|x|>1) is NaN */
1077  } else if (ix < 0x3fe00000) {     /* |x|<0.5 */
1078    if (ix < 0x3e400000) {          /* if |x| < 2**-27 */
1079      if (huge + x > one) return x; /* return x with inexact if x!=0*/
1080    } else {
1081      t = x * x;
1082    }
1083    p = t * (pS0 + t * (pS1 + t * (pS2 + t * (pS3 + t * (pS4 + t * pS5)))));
1084    q = one + t * (qS1 + t * (qS2 + t * (qS3 + t * qS4)));
1085    w = p / q;
1086    return x + x * w;
1087  }
1088  /* 1> |x|>= 0.5 */
1089  w = one - fabs(x);
1090  t = w * 0.5;
1091  p = t * (pS0 + t * (pS1 + t * (pS2 + t * (pS3 + t * (pS4 + t * pS5)))));
1092  q = one + t * (qS1 + t * (qS2 + t * (qS3 + t * qS4)));
1093  s = sqrt(t);
1094  if (ix >= 0x3FEF3333) { /* if |x| > 0.975 */
1095    w = p / q;
1096    t = pio2_hi - (2.0 * (s + s * w) - pio2_lo);
1097  } else {
1098    w = s;
1099    SET_LOW_WORD(w, 0);
1100    c = (t - w * w) / (s + w);
1101    r = p / q;
1102    p = 2.0 * s * r - (pio2_lo - 2.0 * c);
1103    q = pio4_hi - 2.0 * w;
1104    t = pio4_hi - (p - q);
1105  }
1106  if (hx > 0)
1107    return t;
1108  else
1109    return -t;
1110}
1111/* asinh(x)
1112 * Method :
1113 *      Based on
1114 *              asinh(x) = sign(x) * log [ |x| + sqrt(x*x+1) ]
1115 *      we have
1116 *      asinh(x) := x  if  1+x*x=1,
1117 *               := sign(x)*(log(x)+ln2)) for large |x|, else
1118 *               := sign(x)*log(2|x|+1/(|x|+sqrt(x*x+1))) if|x|>2, else
1119 *               := sign(x)*log1p(|x| + x^2/(1 + sqrt(1+x^2)))
1120 */
1121double asinh(double x) {
1122  static const double
1123      one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
1124      ln2 = 6.93147180559945286227e-01, /* 0x3FE62E42, 0xFEFA39EF */
1125      huge = 1.00000000000000000000e+300;
1126
1127  double t, w;
1128  int32_t hx, ix;
1129  GET_HIGH_WORD(hx, x);
1130  ix = hx & 0x7fffffff;
1131  if (ix >= 0x7ff00000) return x + x; /* x is inf or NaN */
1132  if (ix < 0x3e300000) {              /* |x|<2**-28 */
1133    if (huge + x > one) return x;     /* return x inexact except 0 */
1134  }
1135  if (ix > 0x41b00000) { /* |x| > 2**28 */
1136    w = log(fabs(x)) + ln2;
1137  } else if (ix > 0x40000000) { /* 2**28 > |x| > 2.0 */
1138    t = fabs(x);
1139    w = log(2.0 * t + one / (sqrt(x * x + one) + t));
1140  } else { /* 2.0 > |x| > 2**-28 */
1141    t = x * x;
1142    w = log1p(fabs(x) + t / (one + sqrt(one + t)));
1143  }
1144  if (hx > 0) {
1145    return w;
1146  } else {
1147    return -w;
1148  }
1149}
1150
1151/* atan(x)
1152 * Method
1153 *   1. Reduce x to positive by atan(x) = -atan(-x).
1154 *   2. According to the integer k=4t+0.25 chopped, t=x, the argument
1155 *      is further reduced to one of the following intervals and the
1156 *      arctangent of t is evaluated by the corresponding formula:
1157 *
1158 *      [0,7/16]      atan(x) = t-t^3*(a1+t^2*(a2+...(a10+t^2*a11)...)
1159 *      [7/16,11/16]  atan(x) = atan(1/2) + atan( (t-0.5)/(1+t/2) )
1160 *      [11/16.19/16] atan(x) = atan( 1 ) + atan( (t-1)/(1+t) )
1161 *      [19/16,39/16] atan(x) = atan(3/2) + atan( (t-1.5)/(1+1.5t) )
1162 *      [39/16,INF]   atan(x) = atan(INF) + atan( -1/t )
1163 *
1164 * Constants:
1165 * The hexadecimal values are the intended ones for the following
1166 * constants. The decimal values may be used, provided that the
1167 * compiler will convert from decimal to binary accurately enough
1168 * to produce the hexadecimal values shown.
1169 */
1170double atan(double x) {
1171  static const double atanhi[] = {
1172      4.63647609000806093515e-01, /* atan(0.5)hi 0x3FDDAC67, 0x0561BB4F */
1173      7.85398163397448278999e-01, /* atan(1.0)hi 0x3FE921FB, 0x54442D18 */
1174      9.82793723247329054082e-01, /* atan(1.5)hi 0x3FEF730B, 0xD281F69B */
1175      1.57079632679489655800e+00, /* atan(inf)hi 0x3FF921FB, 0x54442D18 */
1176  };
1177
1178  static const double atanlo[] = {
1179      2.26987774529616870924e-17, /* atan(0.5)lo 0x3C7A2B7F, 0x222F65E2 */
1180      3.06161699786838301793e-17, /* atan(1.0)lo 0x3C81A626, 0x33145C07 */
1181      1.39033110312309984516e-17, /* atan(1.5)lo 0x3C700788, 0x7AF0CBBD */
1182      6.12323399573676603587e-17, /* atan(inf)lo 0x3C91A626, 0x33145C07 */
1183  };
1184
1185  static const double aT[] = {
1186      3.33333333333329318027e-01,  /* 0x3FD55555, 0x5555550D */
1187      -1.99999999998764832476e-01, /* 0xBFC99999, 0x9998EBC4 */
1188      1.42857142725034663711e-01,  /* 0x3FC24924, 0x920083FF */
1189      -1.11111104054623557880e-01, /* 0xBFBC71C6, 0xFE231671 */
1190      9.09088713343650656196e-02,  /* 0x3FB745CD, 0xC54C206E */
1191      -7.69187620504482999495e-02, /* 0xBFB3B0F2, 0xAF749A6D */
1192      6.66107313738753120669e-02,  /* 0x3FB10D66, 0xA0D03D51 */
1193      -5.83357013379057348645e-02, /* 0xBFADDE2D, 0x52DEFD9A */
1194      4.97687799461593236017e-02,  /* 0x3FA97B4B, 0x24760DEB */
1195      -3.65315727442169155270e-02, /* 0xBFA2B444, 0x2C6A6C2F */
1196      1.62858201153657823623e-02,  /* 0x3F90AD3A, 0xE322DA11 */
1197  };
1198
1199  static const double one = 1.0, huge = 1.0e300;
1200
1201  double w, s1, s2, z;
1202  int32_t ix, hx, id;
1203
1204  GET_HIGH_WORD(hx, x);
1205  ix = hx & 0x7fffffff;
1206  if (ix >= 0x44100000) { /* if |x| >= 2^66 */
1207    uint32_t low;
1208    GET_LOW_WORD(low, x);
1209    if (ix > 0x7ff00000 || (ix == 0x7ff00000 && (low != 0)))
1210      return x + x; /* NaN */
1211    if (hx > 0)
1212      return atanhi[3] + *(volatile double *)&atanlo[3];
1213    else
1214      return -atanhi[3] - *(volatile double *)&atanlo[3];
1215  }
1216  if (ix < 0x3fdc0000) {            /* |x| < 0.4375 */
1217    if (ix < 0x3e400000) {          /* |x| < 2^-27 */
1218      if (huge + x > one) return x; /* raise inexact */
1219    }
1220    id = -1;
1221  } else {
1222    x = fabs(x);
1223    if (ix < 0x3ff30000) {   /* |x| < 1.1875 */
1224      if (ix < 0x3fe60000) { /* 7/16 <=|x|<11/16 */
1225        id = 0;
1226        x = (2.0 * x - one) / (2.0 + x);
1227      } else { /* 11/16<=|x|< 19/16 */
1228        id = 1;
1229        x = (x - one) / (x + one);
1230      }
1231    } else {
1232      if (ix < 0x40038000) { /* |x| < 2.4375 */
1233        id = 2;
1234        x = (x - 1.5) / (one + 1.5 * x);
1235      } else { /* 2.4375 <= |x| < 2^66 */
1236        id = 3;
1237        x = -1.0 / x;
1238      }
1239    }
1240  }
1241  /* end of argument reduction */
1242  z = x * x;
1243  w = z * z;
1244  /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
1245  s1 = z * (aT[0] +
1246            w * (aT[2] + w * (aT[4] + w * (aT[6] + w * (aT[8] + w * aT[10])))));
1247  s2 = w * (aT[1] + w * (aT[3] + w * (aT[5] + w * (aT[7] + w * aT[9]))));
1248  if (id < 0) {
1249    return x - x * (s1 + s2);
1250  } else {
1251    z = atanhi[id] - ((x * (s1 + s2) - atanlo[id]) - x);
1252    return (hx < 0) ? -z : z;
1253  }
1254}
1255
1256/* atan2(y,x)
1257 * Method :
1258 *  1. Reduce y to positive by atan2(y,x)=-atan2(-y,x).
1259 *  2. Reduce x to positive by (if x and y are unexceptional):
1260 *    ARG (x+iy) = arctan(y/x)       ... if x > 0,
1261 *    ARG (x+iy) = pi - arctan[y/(-x)]   ... if x < 0,
1262 *
1263 * Special cases:
1264 *
1265 *  ATAN2((anything), NaN ) is NaN;
1266 *  ATAN2(NAN , (anything) ) is NaN;
1267 *  ATAN2(+-0, +(anything but NaN)) is +-0  ;
1268 *  ATAN2(+-0, -(anything but NaN)) is +-pi ;
1269 *  ATAN2(+-(anything but 0 and NaN), 0) is +-pi/2;
1270 *  ATAN2(+-(anything but INF and NaN), +INF) is +-0 ;
1271 *  ATAN2(+-(anything but INF and NaN), -INF) is +-pi;
1272 *  ATAN2(+-INF,+INF ) is +-pi/4 ;
1273 *  ATAN2(+-INF,-INF ) is +-3pi/4;
1274 *  ATAN2(+-INF, (anything but,0,NaN, and INF)) is +-pi/2;
1275 *
1276 * Constants:
1277 * The hexadecimal values are the intended ones for the following
1278 * constants. The decimal values may be used, provided that the
1279 * compiler will convert from decimal to binary accurately enough
1280 * to produce the hexadecimal values shown.
1281 */
1282double atan2(double y, double x) {
1283  static volatile double tiny = 1.0e-300;
1284  static const double
1285      zero = 0.0,
1286      pi_o_4 = 7.8539816339744827900E-01, /* 0x3FE921FB, 0x54442D18 */
1287      pi_o_2 = 1.5707963267948965580E+00, /* 0x3FF921FB, 0x54442D18 */
1288      pi = 3.1415926535897931160E+00;     /* 0x400921FB, 0x54442D18 */
1289  static volatile double pi_lo =
1290      1.2246467991473531772E-16; /* 0x3CA1A626, 0x33145C07 */
1291
1292  double z;
1293  int32_t k, m, hx, hy, ix, iy;
1294  uint32_t lx, ly;
1295
1296  EXTRACT_WORDS(hx, lx, x);
1297  ix = hx & 0x7fffffff;
1298  EXTRACT_WORDS(hy, ly, y);
1299  iy = hy & 0x7fffffff;
1300  if (((ix | ((lx | -static_cast<int32_t>(lx)) >> 31)) > 0x7ff00000) ||
1301      ((iy | ((ly | -static_cast<int32_t>(ly)) >> 31)) > 0x7ff00000)) {
1302    return x + y; /* x or y is NaN */
1303  }
1304  if (((hx - 0x3ff00000) | lx) == 0) return atan(y); /* x=1.0 */
1305  m = ((hy >> 31) & 1) | ((hx >> 30) & 2);           /* 2*sign(x)+sign(y) */
1306
1307  /* when y = 0 */
1308  if ((iy | ly) == 0) {
1309    switch (m) {
1310      case 0:
1311      case 1:
1312        return y; /* atan(+-0,+anything)=+-0 */
1313      case 2:
1314        return pi + tiny; /* atan(+0,-anything) = pi */
1315      case 3:
1316        return -pi - tiny; /* atan(-0,-anything) =-pi */
1317    }
1318  }
1319  /* when x = 0 */
1320  if ((ix | lx) == 0) return (hy < 0) ? -pi_o_2 - tiny : pi_o_2 + tiny;
1321
1322  /* when x is INF */
1323  if (ix == 0x7ff00000) {
1324    if (iy == 0x7ff00000) {
1325      switch (m) {
1326        case 0:
1327          return pi_o_4 + tiny; /* atan(+INF,+INF) */
1328        case 1:
1329          return -pi_o_4 - tiny; /* atan(-INF,+INF) */
1330        case 2:
1331          return 3.0 * pi_o_4 + tiny; /*atan(+INF,-INF)*/
1332        case 3:
1333          return -3.0 * pi_o_4 - tiny; /*atan(-INF,-INF)*/
1334      }
1335    } else {
1336      switch (m) {
1337        case 0:
1338          return zero; /* atan(+...,+INF) */
1339        case 1:
1340          return -zero; /* atan(-...,+INF) */
1341        case 2:
1342          return pi + tiny; /* atan(+...,-INF) */
1343        case 3:
1344          return -pi - tiny; /* atan(-...,-INF) */
1345      }
1346    }
1347  }
1348  /* when y is INF */
1349  if (iy == 0x7ff00000) return (hy < 0) ? -pi_o_2 - tiny : pi_o_2 + tiny;
1350
1351  /* compute y/x */
1352  k = (iy - ix) >> 20;
1353  if (k > 60) { /* |y/x| >  2**60 */
1354    z = pi_o_2 + 0.5 * pi_lo;
1355    m &= 1;
1356  } else if (hx < 0 && k < -60) {
1357    z = 0.0; /* 0 > |y|/x > -2**-60 */
1358  } else {
1359    z = atan(fabs(y / x)); /* safe to do y/x */
1360  }
1361  switch (m) {
1362    case 0:
1363      return z; /* atan(+,+) */
1364    case 1:
1365      return -z; /* atan(-,+) */
1366    case 2:
1367      return pi - (z - pi_lo); /* atan(+,-) */
1368    default:                   /* case 3 */
1369      return (z - pi_lo) - pi; /* atan(-,-) */
1370  }
1371}
1372
1373/* cos(x)
1374 * Return cosine function of x.
1375 *
1376 * kernel function:
1377 *      __kernel_sin            ... sine function on [-pi/4,pi/4]
1378 *      __kernel_cos            ... cosine function on [-pi/4,pi/4]
1379 *      __ieee754_rem_pio2      ... argument reduction routine
1380 *
1381 * Method.
1382 *      Let S,C and T denote the sin, cos and tan respectively on
1383 *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
1384 *      in [-pi/4 , +pi/4], and let n = k mod 4.
1385 *      We have
1386 *
1387 *          n        sin(x)      cos(x)        tan(x)
1388 *     ----------------------------------------------------------
1389 *          0          S           C             T
1390 *          1          C          -S            -1/T
1391 *          2         -S          -C             T
1392 *          3         -C           S            -1/T
1393 *     ----------------------------------------------------------
1394 *
1395 * Special cases:
1396 *      Let trig be any of sin, cos, or tan.
1397 *      trig(+-INF)  is NaN, with signals;
1398 *      trig(NaN)    is that NaN;
1399 *
1400 * Accuracy:
1401 *      TRIG(x) returns trig(x) nearly rounded
1402 */
1403double cos(double x) {
1404  double y[2], z = 0.0;
1405  int32_t n, ix;
1406
1407  /* High word of x. */
1408  GET_HIGH_WORD(ix, x);
1409
1410  /* |x| ~< pi/4 */
1411  ix &= 0x7fffffff;
1412  if (ix <= 0x3fe921fb) {
1413    return __kernel_cos(x, z);
1414  } else if (ix >= 0x7ff00000) {
1415    /* cos(Inf or NaN) is NaN */
1416    return x - x;
1417  } else {
1418    /* argument reduction needed */
1419    n = __ieee754_rem_pio2(x, y);
1420    switch (n & 3) {
1421      case 0:
1422        return __kernel_cos(y[0], y[1]);
1423      case 1:
1424        return -__kernel_sin(y[0], y[1], 1);
1425      case 2:
1426        return -__kernel_cos(y[0], y[1]);
1427      default:
1428        return __kernel_sin(y[0], y[1], 1);
1429    }
1430  }
1431}
1432
1433/* exp(x)
1434 * Returns the exponential of x.
1435 *
1436 * Method
1437 *   1. Argument reduction:
1438 *      Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658.
1439 *      Given x, find r and integer k such that
1440 *
1441 *               x = k*ln2 + r,  |r| <= 0.5*ln2.
1442 *
1443 *      Here r will be represented as r = hi-lo for better
1444 *      accuracy.
1445 *
1446 *   2. Approximation of exp(r) by a special rational function on
1447 *      the interval [0,0.34658]:
1448 *      Write
1449 *          R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ...
1450 *      We use a special Remes algorithm on [0,0.34658] to generate
1451 *      a polynomial of degree 5 to approximate R. The maximum error
1452 *      of this polynomial approximation is bounded by 2**-59. In
1453 *      other words,
1454 *          R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5
1455 *      (where z=r*r, and the values of P1 to P5 are listed below)
1456 *      and
1457 *          |                  5          |     -59
1458 *          | 2.0+P1*z+...+P5*z   -  R(z) | <= 2
1459 *          |                             |
1460 *      The computation of exp(r) thus becomes
1461 *                             2*r
1462 *              exp(r) = 1 + -------
1463 *                            R - r
1464 *                                 r*R1(r)
1465 *                     = 1 + r + ----------- (for better accuracy)
1466 *                                2 - R1(r)
1467 *      where
1468 *                               2       4             10
1469 *              R1(r) = r - (P1*r  + P2*r  + ... + P5*r   ).
1470 *
1471 *   3. Scale back to obtain exp(x):
1472 *      From step 1, we have
1473 *         exp(x) = 2^k * exp(r)
1474 *
1475 * Special cases:
1476 *      exp(INF) is INF, exp(NaN) is NaN;
1477 *      exp(-INF) is 0, and
1478 *      for finite argument, only exp(0)=1 is exact.
1479 *
1480 * Accuracy:
1481 *      according to an error analysis, the error is always less than
1482 *      1 ulp (unit in the last place).
1483 *
1484 * Misc. info.
1485 *      For IEEE double
1486 *          if x >  7.09782712893383973096e+02 then exp(x) overflow
1487 *          if x < -7.45133219101941108420e+02 then exp(x) underflow
1488 *
1489 * Constants:
1490 * The hexadecimal values are the intended ones for the following
1491 * constants. The decimal values may be used, provided that the
1492 * compiler will convert from decimal to binary accurately enough
1493 * to produce the hexadecimal values shown.
1494 */
1495double exp(double x) {
1496  static const double
1497      one = 1.0,
1498      halF[2] = {0.5, -0.5},
1499      o_threshold = 7.09782712893383973096e+02,  /* 0x40862E42, 0xFEFA39EF */
1500      u_threshold = -7.45133219101941108420e+02, /* 0xc0874910, 0xD52D3051 */
1501      ln2HI[2] = {6.93147180369123816490e-01,    /* 0x3fe62e42, 0xfee00000 */
1502                  -6.93147180369123816490e-01},  /* 0xbfe62e42, 0xfee00000 */
1503      ln2LO[2] = {1.90821492927058770002e-10,    /* 0x3dea39ef, 0x35793c76 */
1504                  -1.90821492927058770002e-10},  /* 0xbdea39ef, 0x35793c76 */
1505      invln2 = 1.44269504088896338700e+00,       /* 0x3ff71547, 0x652b82fe */
1506      P1 = 1.66666666666666019037e-01,           /* 0x3FC55555, 0x5555553E */
1507      P2 = -2.77777777770155933842e-03,          /* 0xBF66C16C, 0x16BEBD93 */
1508      P3 = 6.61375632143793436117e-05,           /* 0x3F11566A, 0xAF25DE2C */
1509      P4 = -1.65339022054652515390e-06,          /* 0xBEBBBD41, 0xC5D26BF1 */
1510      P5 = 4.13813679705723846039e-08,           /* 0x3E663769, 0x72BEA4D0 */
1511      E = 2.718281828459045;                     /* 0x4005bf0a, 0x8b145769 */
1512
1513  static volatile double
1514      huge = 1.0e+300,
1515      twom1000 = 9.33263618503218878990e-302, /* 2**-1000=0x01700000,0*/
1516      two1023 = 8.988465674311579539e307;     /* 0x1p1023 */
1517
1518  double y, hi = 0.0, lo = 0.0, c, t, twopk;
1519  int32_t k = 0, xsb;
1520  uint32_t hx;
1521
1522  GET_HIGH_WORD(hx, x);
1523  xsb = (hx >> 31) & 1; /* sign bit of x */
1524  hx &= 0x7fffffff;     /* high word of |x| */
1525
1526  /* filter out non-finite argument */
1527  if (hx >= 0x40862E42) { /* if |x|>=709.78... */
1528    if (hx >= 0x7ff00000) {
1529      uint32_t lx;
1530      GET_LOW_WORD(lx, x);
1531      if (((hx & 0xfffff) | lx) != 0)
1532        return x + x; /* NaN */
1533      else
1534        return (xsb == 0) ? x : 0.0; /* exp(+-inf)={inf,0} */
1535    }
1536    if (x > o_threshold) return huge * huge;         /* overflow */
1537    if (x < u_threshold) return twom1000 * twom1000; /* underflow */
1538  }
1539
1540  /* argument reduction */
1541  if (hx > 0x3fd62e42) {   /* if  |x| > 0.5 ln2 */
1542    if (hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */
1543      /* TODO(rtoy): We special case exp(1) here to return the correct
1544       * value of E, as the computation below would get the last bit
1545       * wrong. We should probably fix the algorithm instead.
1546       */
1547      if (x == 1.0) return E;
1548      hi = x - ln2HI[xsb];
1549      lo = ln2LO[xsb];
1550      k = 1 - xsb - xsb;
1551    } else {
1552      k = static_cast<int>(invln2 * x + halF[xsb]);
1553      t = k;
1554      hi = x - t * ln2HI[0]; /* t*ln2HI is exact here */
1555      lo = t * ln2LO[0];
1556    }
1557    STRICT_ASSIGN(double, x, hi - lo);
1558  } else if (hx < 0x3e300000) {         /* when |x|<2**-28 */
1559    if (huge + x > one) return one + x; /* trigger inexact */
1560  } else {
1561    k = 0;
1562  }
1563
1564  /* x is now in primary range */
1565  t = x * x;
1566  if (k >= -1021) {
1567    INSERT_WORDS(twopk, 0x3ff00000 + (k << 20), 0);
1568  } else {
1569    INSERT_WORDS(twopk, 0x3ff00000 + ((k + 1000) << 20), 0);
1570  }
1571  c = x - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5))));
1572  if (k == 0) {
1573    return one - ((x * c) / (c - 2.0) - x);
1574  } else {
1575    y = one - ((lo - (x * c) / (2.0 - c)) - hi);
1576  }
1577  if (k >= -1021) {
1578    if (k == 1024) return y * 2.0 * two1023;
1579    return y * twopk;
1580  } else {
1581    return y * twopk * twom1000;
1582  }
1583}
1584
1585/*
1586 * Method :
1587 *    1.Reduced x to positive by atanh(-x) = -atanh(x)
1588 *    2.For x>=0.5
1589 *              1              2x                          x
1590 *  atanh(x) = --- * log(1 + -------) = 0.5 * log1p(2 * --------)
1591 *              2             1 - x                      1 - x
1592 *
1593 *   For x<0.5
1594 *  atanh(x) = 0.5*log1p(2x+2x*x/(1-x))
1595 *
1596 * Special cases:
1597 *  atanh(x) is NaN if |x| > 1 with signal;
1598 *  atanh(NaN) is that NaN with no signal;
1599 *  atanh(+-1) is +-INF with signal.
1600 *
1601 */
1602double atanh(double x) {
1603  static const double one = 1.0, huge = 1e300;
1604  static const double zero = 0.0;
1605
1606  double t;
1607  int32_t hx, ix;
1608  uint32_t lx;
1609  EXTRACT_WORDS(hx, lx, x);
1610  ix = hx & 0x7fffffff;
1611  if ((ix | ((lx | -static_cast<int32_t>(lx)) >> 31)) > 0x3ff00000) /* |x|>1 */
1612    return (x - x) / (x - x);
1613  if (ix == 0x3ff00000) return x / zero;
1614  if (ix < 0x3e300000 && (huge + x) > zero) return x; /* x<2**-28 */
1615  SET_HIGH_WORD(x, ix);
1616  if (ix < 0x3fe00000) { /* x < 0.5 */
1617    t = x + x;
1618    t = 0.5 * log1p(t + t * x / (one - x));
1619  } else {
1620    t = 0.5 * log1p((x + x) / (one - x));
1621  }
1622  if (hx >= 0)
1623    return t;
1624  else
1625    return -t;
1626}
1627
1628/* log(x)
1629 * Return the logrithm of x
1630 *
1631 * Method :
1632 *   1. Argument Reduction: find k and f such that
1633 *     x = 2^k * (1+f),
1634 *     where  sqrt(2)/2 < 1+f < sqrt(2) .
1635 *
1636 *   2. Approximation of log(1+f).
1637 *  Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
1638 *     = 2s + 2/3 s**3 + 2/5 s**5 + .....,
1639 *         = 2s + s*R
1640 *      We use a special Reme algorithm on [0,0.1716] to generate
1641 *  a polynomial of degree 14 to approximate R The maximum error
1642 *  of this polynomial approximation is bounded by 2**-58.45. In
1643 *  other words,
1644 *            2      4      6      8      10      12      14
1645 *      R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s  +Lg6*s  +Lg7*s
1646 *    (the values of Lg1 to Lg7 are listed in the program)
1647 *  and
1648 *      |      2          14          |     -58.45
1649 *      | Lg1*s +...+Lg7*s    -  R(z) | <= 2
1650 *      |                             |
1651 *  Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
1652 *  In order to guarantee error in log below 1ulp, we compute log
1653 *  by
1654 *    log(1+f) = f - s*(f - R)  (if f is not too large)
1655 *    log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy)
1656 *
1657 *  3. Finally,  log(x) = k*ln2 + log(1+f).
1658 *          = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
1659 *     Here ln2 is split into two floating point number:
1660 *      ln2_hi + ln2_lo,
1661 *     where n*ln2_hi is always exact for |n| < 2000.
1662 *
1663 * Special cases:
1664 *  log(x) is NaN with signal if x < 0 (including -INF) ;
1665 *  log(+INF) is +INF; log(0) is -INF with signal;
1666 *  log(NaN) is that NaN with no signal.
1667 *
1668 * Accuracy:
1669 *  according to an error analysis, the error is always less than
1670 *  1 ulp (unit in the last place).
1671 *
1672 * Constants:
1673 * The hexadecimal values are the intended ones for the following
1674 * constants. The decimal values may be used, provided that the
1675 * compiler will convert from decimal to binary accurately enough
1676 * to produce the hexadecimal values shown.
1677 */
1678double log(double x) {
1679  static const double                      /* -- */
1680      ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */
1681      ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */
1682      two54 = 1.80143985094819840000e+16,  /* 43500000 00000000 */
1683      Lg1 = 6.666666666666735130e-01,      /* 3FE55555 55555593 */
1684      Lg2 = 3.999999999940941908e-01,      /* 3FD99999 9997FA04 */
1685      Lg3 = 2.857142874366239149e-01,      /* 3FD24924 94229359 */
1686      Lg4 = 2.222219843214978396e-01,      /* 3FCC71C5 1D8E78AF */
1687      Lg5 = 1.818357216161805012e-01,      /* 3FC74664 96CB03DE */
1688      Lg6 = 1.531383769920937332e-01,      /* 3FC39A09 D078C69F */
1689      Lg7 = 1.479819860511658591e-01;      /* 3FC2F112 DF3E5244 */
1690
1691  static const double zero = 0.0;
1692  static volatile double vzero = 0.0;
1693
1694  double hfsq, f, s, z, R, w, t1, t2, dk;
1695  int32_t k, hx, i, j;
1696  uint32_t lx;
1697
1698  EXTRACT_WORDS(hx, lx, x);
1699
1700  k = 0;
1701  if (hx < 0x00100000) { /* x < 2**-1022  */
1702    if (((hx & 0x7fffffff) | lx) == 0)
1703      return -two54 / vzero;           /* log(+-0)=-inf */
1704    if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */
1705    k -= 54;
1706    x *= two54; /* subnormal number, scale up x */
1707    GET_HIGH_WORD(hx, x);
1708  }
1709  if (hx >= 0x7ff00000) return x + x;
1710  k += (hx >> 20) - 1023;
1711  hx &= 0x000fffff;
1712  i = (hx + 0x95f64) & 0x100000;
1713  SET_HIGH_WORD(x, hx | (i ^ 0x3ff00000)); /* normalize x or x/2 */
1714  k += (i >> 20);
1715  f = x - 1.0;
1716  if ((0x000fffff & (2 + hx)) < 3) { /* -2**-20 <= f < 2**-20 */
1717    if (f == zero) {
1718      if (k == 0) {
1719        return zero;
1720      } else {
1721        dk = static_cast<double>(k);
1722        return dk * ln2_hi + dk * ln2_lo;
1723      }
1724    }
1725    R = f * f * (0.5 - 0.33333333333333333 * f);
1726    if (k == 0) {
1727      return f - R;
1728    } else {
1729      dk = static_cast<double>(k);
1730      return dk * ln2_hi - ((R - dk * ln2_lo) - f);
1731    }
1732  }
1733  s = f / (2.0 + f);
1734  dk = static_cast<double>(k);
1735  z = s * s;
1736  i = hx - 0x6147a;
1737  w = z * z;
1738  j = 0x6b851 - hx;
1739  t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
1740  t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
1741  i |= j;
1742  R = t2 + t1;
1743  if (i > 0) {
1744    hfsq = 0.5 * f * f;
1745    if (k == 0)
1746      return f - (hfsq - s * (hfsq + R));
1747    else
1748      return dk * ln2_hi - ((hfsq - (s * (hfsq + R) + dk * ln2_lo)) - f);
1749  } else {
1750    if (k == 0)
1751      return f - s * (f - R);
1752    else
1753      return dk * ln2_hi - ((s * (f - R) - dk * ln2_lo) - f);
1754  }
1755}
1756
1757/* double log1p(double x)
1758 *
1759 * Method :
1760 *   1. Argument Reduction: find k and f such that
1761 *      1+x = 2^k * (1+f),
1762 *     where  sqrt(2)/2 < 1+f < sqrt(2) .
1763 *
1764 *      Note. If k=0, then f=x is exact. However, if k!=0, then f
1765 *  may not be representable exactly. In that case, a correction
1766 *  term is need. Let u=1+x rounded. Let c = (1+x)-u, then
1767 *  log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u),
1768 *  and add back the correction term c/u.
1769 *  (Note: when x > 2**53, one can simply return log(x))
1770 *
1771 *   2. Approximation of log1p(f).
1772 *  Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
1773 *     = 2s + 2/3 s**3 + 2/5 s**5 + .....,
1774 *         = 2s + s*R
1775 *      We use a special Reme algorithm on [0,0.1716] to generate
1776 *  a polynomial of degree 14 to approximate R The maximum error
1777 *  of this polynomial approximation is bounded by 2**-58.45. In
1778 *  other words,
1779 *            2      4      6      8      10      12      14
1780 *      R(z) ~ Lp1*s +Lp2*s +Lp3*s +Lp4*s +Lp5*s  +Lp6*s  +Lp7*s
1781 *    (the values of Lp1 to Lp7 are listed in the program)
1782 *  and
1783 *      |      2          14          |     -58.45
1784 *      | Lp1*s +...+Lp7*s    -  R(z) | <= 2
1785 *      |                             |
1786 *  Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
1787 *  In order to guarantee error in log below 1ulp, we compute log
1788 *  by
1789 *    log1p(f) = f - (hfsq - s*(hfsq+R)).
1790 *
1791 *  3. Finally, log1p(x) = k*ln2 + log1p(f).
1792 *           = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
1793 *     Here ln2 is split into two floating point number:
1794 *      ln2_hi + ln2_lo,
1795 *     where n*ln2_hi is always exact for |n| < 2000.
1796 *
1797 * Special cases:
1798 *  log1p(x) is NaN with signal if x < -1 (including -INF) ;
1799 *  log1p(+INF) is +INF; log1p(-1) is -INF with signal;
1800 *  log1p(NaN) is that NaN with no signal.
1801 *
1802 * Accuracy:
1803 *  according to an error analysis, the error is always less than
1804 *  1 ulp (unit in the last place).
1805 *
1806 * Constants:
1807 * The hexadecimal values are the intended ones for the following
1808 * constants. The decimal values may be used, provided that the
1809 * compiler will convert from decimal to binary accurately enough
1810 * to produce the hexadecimal values shown.
1811 *
1812 * Note: Assuming log() return accurate answer, the following
1813 *   algorithm can be used to compute log1p(x) to within a few ULP:
1814 *
1815 *    u = 1+x;
1816 *    if(u==1.0) return x ; else
1817 *         return log(u)*(x/(u-1.0));
1818 *
1819 *   See HP-15C Advanced Functions Handbook, p.193.
1820 */
1821double log1p(double x) {
1822  static const double                      /* -- */
1823      ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */
1824      ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */
1825      two54 = 1.80143985094819840000e+16,  /* 43500000 00000000 */
1826      Lp1 = 6.666666666666735130e-01,      /* 3FE55555 55555593 */
1827      Lp2 = 3.999999999940941908e-01,      /* 3FD99999 9997FA04 */
1828      Lp3 = 2.857142874366239149e-01,      /* 3FD24924 94229359 */
1829      Lp4 = 2.222219843214978396e-01,      /* 3FCC71C5 1D8E78AF */
1830      Lp5 = 1.818357216161805012e-01,      /* 3FC74664 96CB03DE */
1831      Lp6 = 1.531383769920937332e-01,      /* 3FC39A09 D078C69F */
1832      Lp7 = 1.479819860511658591e-01;      /* 3FC2F112 DF3E5244 */
1833
1834  static const double zero = 0.0;
1835  static volatile double vzero = 0.0;
1836
1837  double hfsq, f, c, s, z, R, u;
1838  int32_t k, hx, hu, ax;
1839
1840  GET_HIGH_WORD(hx, x);
1841  ax = hx & 0x7fffffff;
1842
1843  k = 1;
1844  if (hx < 0x3FDA827A) {    /* 1+x < sqrt(2)+ */
1845    if (ax >= 0x3ff00000) { /* x <= -1.0 */
1846      if (x == -1.0)
1847        return -two54 / vzero; /* log1p(-1)=+inf */
1848      else
1849        return (x - x) / (x - x); /* log1p(x<-1)=NaN */
1850    }
1851    if (ax < 0x3e200000) {    /* |x| < 2**-29 */
1852      if (two54 + x > zero    /* raise inexact */
1853          && ax < 0x3c900000) /* |x| < 2**-54 */
1854        return x;
1855      else
1856        return x - x * x * 0.5;
1857    }
1858    if (hx > 0 || hx <= static_cast<int32_t>(0xbfd2bec4)) {
1859      k = 0;
1860      f = x;
1861      hu = 1;
1862    } /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
1863  }
1864  if (hx >= 0x7ff00000) return x + x;
1865  if (k != 0) {
1866    if (hx < 0x43400000) {
1867      STRICT_ASSIGN(double, u, 1.0 + x);
1868      GET_HIGH_WORD(hu, u);
1869      k = (hu >> 20) - 1023;
1870      c = (k > 0) ? 1.0 - (u - x) : x - (u - 1.0); /* correction term */
1871      c /= u;
1872    } else {
1873      u = x;
1874      GET_HIGH_WORD(hu, u);
1875      k = (hu >> 20) - 1023;
1876      c = 0;
1877    }
1878    hu &= 0x000fffff;
1879    /*
1880     * The approximation to sqrt(2) used in thresholds is not
1881     * critical.  However, the ones used above must give less
1882     * strict bounds than the one here so that the k==0 case is
1883     * never reached from here, since here we have committed to
1884     * using the correction term but don't use it if k==0.
1885     */
1886    if (hu < 0x6a09e) {                  /* u ~< sqrt(2) */
1887      SET_HIGH_WORD(u, hu | 0x3ff00000); /* normalize u */
1888    } else {
1889      k += 1;
1890      SET_HIGH_WORD(u, hu | 0x3fe00000); /* normalize u/2 */
1891      hu = (0x00100000 - hu) >> 2;
1892    }
1893    f = u - 1.0;
1894  }
1895  hfsq = 0.5 * f * f;
1896  if (hu == 0) { /* |f| < 2**-20 */
1897    if (f == zero) {
1898      if (k == 0) {
1899        return zero;
1900      } else {
1901        c += k * ln2_lo;
1902        return k * ln2_hi + c;
1903      }
1904    }
1905    R = hfsq * (1.0 - 0.66666666666666666 * f);
1906    if (k == 0)
1907      return f - R;
1908    else
1909      return k * ln2_hi - ((R - (k * ln2_lo + c)) - f);
1910  }
1911  s = f / (2.0 + f);
1912  z = s * s;
1913  R = z * (Lp1 +
1914           z * (Lp2 + z * (Lp3 + z * (Lp4 + z * (Lp5 + z * (Lp6 + z * Lp7))))));
1915  if (k == 0)
1916    return f - (hfsq - s * (hfsq + R));
1917  else
1918    return k * ln2_hi - ((hfsq - (s * (hfsq + R) + (k * ln2_lo + c))) - f);
1919}
1920
1921/*
1922 * k_log1p(f):
1923 * Return log(1+f) - f for 1+f in ~[sqrt(2)/2, sqrt(2)].
1924 *
1925 * The following describes the overall strategy for computing
1926 * logarithms in base e.  The argument reduction and adding the final
1927 * term of the polynomial are done by the caller for increased accuracy
1928 * when different bases are used.
1929 *
1930 * Method :
1931 *   1. Argument Reduction: find k and f such that
1932 *         x = 2^k * (1+f),
1933 *         where  sqrt(2)/2 < 1+f < sqrt(2) .
1934 *
1935 *   2. Approximation of log(1+f).
1936 *      Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
1937 *            = 2s + 2/3 s**3 + 2/5 s**5 + .....,
1938 *            = 2s + s*R
1939 *      We use a special Reme algorithm on [0,0.1716] to generate
1940 *      a polynomial of degree 14 to approximate R The maximum error
1941 *      of this polynomial approximation is bounded by 2**-58.45. In
1942 *      other words,
1943 *          2      4      6      8      10      12      14
1944 *          R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s  +Lg6*s  +Lg7*s
1945 *      (the values of Lg1 to Lg7 are listed in the program)
1946 *      and
1947 *          |      2          14          |     -58.45
1948 *          | Lg1*s +...+Lg7*s    -  R(z) | <= 2
1949 *          |                             |
1950 *      Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
1951 *      In order to guarantee error in log below 1ulp, we compute log
1952 *      by
1953 *          log(1+f) = f - s*(f - R)            (if f is not too large)
1954 *          log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy)
1955 *
1956 *   3. Finally,  log(x) = k*ln2 + log(1+f).
1957 *          = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
1958 *      Here ln2 is split into two floating point number:
1959 *          ln2_hi + ln2_lo,
1960 *      where n*ln2_hi is always exact for |n| < 2000.
1961 *
1962 * Special cases:
1963 *      log(x) is NaN with signal if x < 0 (including -INF) ;
1964 *      log(+INF) is +INF; log(0) is -INF with signal;
1965 *      log(NaN) is that NaN with no signal.
1966 *
1967 * Accuracy:
1968 *      according to an error analysis, the error is always less than
1969 *      1 ulp (unit in the last place).
1970 *
1971 * Constants:
1972 * The hexadecimal values are the intended ones for the following
1973 * constants. The decimal values may be used, provided that the
1974 * compiler will convert from decimal to binary accurately enough
1975 * to produce the hexadecimal values shown.
1976 */
1977
1978static const double Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */
1979    Lg2 = 3.999999999940941908e-01,                 /* 3FD99999 9997FA04 */
1980    Lg3 = 2.857142874366239149e-01,                 /* 3FD24924 94229359 */
1981    Lg4 = 2.222219843214978396e-01,                 /* 3FCC71C5 1D8E78AF */
1982    Lg5 = 1.818357216161805012e-01,                 /* 3FC74664 96CB03DE */
1983    Lg6 = 1.531383769920937332e-01,                 /* 3FC39A09 D078C69F */
1984    Lg7 = 1.479819860511658591e-01;                 /* 3FC2F112 DF3E5244 */
1985
1986/*
1987 * We always inline k_log1p(), since doing so produces a
1988 * substantial performance improvement (~40% on amd64).
1989 */
1990static inline double k_log1p(double f) {
1991  double hfsq, s, z, R, w, t1, t2;
1992
1993  s = f / (2.0 + f);
1994  z = s * s;
1995  w = z * z;
1996  t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
1997  t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
1998  R = t2 + t1;
1999  hfsq = 0.5 * f * f;
2000  return s * (hfsq + R);
2001}
2002
2003/*
2004 * Return the base 2 logarithm of x.  See e_log.c and k_log.h for most
2005 * comments.
2006 *
2007 * This reduces x to {k, 1+f} exactly as in e_log.c, then calls the kernel,
2008 * then does the combining and scaling steps
2009 *    log2(x) = (f - 0.5*f*f + k_log1p(f)) / ln2 + k
2010 * in not-quite-routine extra precision.
2011 */
2012double log2(double x) {
2013  static const double
2014      two54 = 1.80143985094819840000e+16,   /* 0x43500000, 0x00000000 */
2015      ivln2hi = 1.44269504072144627571e+00, /* 0x3ff71547, 0x65200000 */
2016      ivln2lo = 1.67517131648865118353e-10; /* 0x3de705fc, 0x2eefa200 */
2017
2018  static const double zero = 0.0;
2019  static volatile double vzero = 0.0;
2020
2021  double f, hfsq, hi, lo, r, val_hi, val_lo, w, y;
2022  int32_t i, k, hx;
2023  uint32_t lx;
2024
2025  EXTRACT_WORDS(hx, lx, x);
2026
2027  k = 0;
2028  if (hx < 0x00100000) { /* x < 2**-1022  */
2029    if (((hx & 0x7fffffff) | lx) == 0)
2030      return -two54 / vzero;           /* log(+-0)=-inf */
2031    if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */
2032    k -= 54;
2033    x *= two54; /* subnormal number, scale up x */
2034    GET_HIGH_WORD(hx, x);
2035  }
2036  if (hx >= 0x7ff00000) return x + x;
2037  if (hx == 0x3ff00000 && lx == 0) return zero; /* log(1) = +0 */
2038  k += (hx >> 20) - 1023;
2039  hx &= 0x000fffff;
2040  i = (hx + 0x95f64) & 0x100000;
2041  SET_HIGH_WORD(x, hx | (i ^ 0x3ff00000)); /* normalize x or x/2 */
2042  k += (i >> 20);
2043  y = static_cast<double>(k);
2044  f = x - 1.0;
2045  hfsq = 0.5 * f * f;
2046  r = k_log1p(f);
2047
2048  /*
2049   * f-hfsq must (for args near 1) be evaluated in extra precision
2050   * to avoid a large cancellation when x is near sqrt(2) or 1/sqrt(2).
2051   * This is fairly efficient since f-hfsq only depends on f, so can
2052   * be evaluated in parallel with R.  Not combining hfsq with R also
2053   * keeps R small (though not as small as a true `lo' term would be),
2054   * so that extra precision is not needed for terms involving R.
2055   *
2056   * Compiler bugs involving extra precision used to break Dekker's
2057   * theorem for spitting f-hfsq as hi+lo, unless double_t was used
2058   * or the multi-precision calculations were avoided when double_t
2059   * has extra precision.  These problems are now automatically
2060   * avoided as a side effect of the optimization of combining the
2061   * Dekker splitting step with the clear-low-bits step.
2062   *
2063   * y must (for args near sqrt(2) and 1/sqrt(2)) be added in extra
2064   * precision to avoid a very large cancellation when x is very near
2065   * these values.  Unlike the above cancellations, this problem is
2066   * specific to base 2.  It is strange that adding +-1 is so much
2067   * harder than adding +-ln2 or +-log10_2.
2068   *
2069   * This uses Dekker's theorem to normalize y+val_hi, so the
2070   * compiler bugs are back in some configurations, sigh.  And I
2071   * don't want to used double_t to avoid them, since that gives a
2072   * pessimization and the support for avoiding the pessimization
2073   * is not yet available.
2074   *
2075   * The multi-precision calculations for the multiplications are
2076   * routine.
2077   */
2078  hi = f - hfsq;
2079  SET_LOW_WORD(hi, 0);
2080  lo = (f - hi) - hfsq + r;
2081  val_hi = hi * ivln2hi;
2082  val_lo = (lo + hi) * ivln2lo + lo * ivln2hi;
2083
2084  /* spadd(val_hi, val_lo, y), except for not using double_t: */
2085  w = y + val_hi;
2086  val_lo += (y - w) + val_hi;
2087  val_hi = w;
2088
2089  return val_lo + val_hi;
2090}
2091
2092/*
2093 * Return the base 10 logarithm of x
2094 *
2095 * Method :
2096 *      Let log10_2hi = leading 40 bits of log10(2) and
2097 *          log10_2lo = log10(2) - log10_2hi,
2098 *          ivln10   = 1/log(10) rounded.
2099 *      Then
2100 *              n = ilogb(x),
2101 *              if(n<0)  n = n+1;
2102 *              x = scalbn(x,-n);
2103 *              log10(x) := n*log10_2hi + (n*log10_2lo + ivln10*log(x))
2104 *
2105 *  Note 1:
2106 *     To guarantee log10(10**n)=n, where 10**n is normal, the rounding
2107 *     mode must set to Round-to-Nearest.
2108 *  Note 2:
2109 *      [1/log(10)] rounded to 53 bits has error .198 ulps;
2110 *      log10 is monotonic at all binary break points.
2111 *
2112 *  Special cases:
2113 *      log10(x) is NaN if x < 0;
2114 *      log10(+INF) is +INF; log10(0) is -INF;
2115 *      log10(NaN) is that NaN;
2116 *      log10(10**N) = N  for N=0,1,...,22.
2117 */
2118double log10(double x) {
2119  static const double
2120      two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
2121      ivln10 = 4.34294481903251816668e-01,
2122      log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */
2123      log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */
2124
2125  static const double zero = 0.0;
2126  static volatile double vzero = 0.0;
2127
2128  double y;
2129  int32_t i, k, hx;
2130  uint32_t lx;
2131
2132  EXTRACT_WORDS(hx, lx, x);
2133
2134  k = 0;
2135  if (hx < 0x00100000) { /* x < 2**-1022  */
2136    if (((hx & 0x7fffffff) | lx) == 0)
2137      return -two54 / vzero;           /* log(+-0)=-inf */
2138    if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */
2139    k -= 54;
2140    x *= two54; /* subnormal number, scale up x */
2141    GET_HIGH_WORD(hx, x);
2142    GET_LOW_WORD(lx, x);
2143  }
2144  if (hx >= 0x7ff00000) return x + x;
2145  if (hx == 0x3ff00000 && lx == 0) return zero; /* log(1) = +0 */
2146  k += (hx >> 20) - 1023;
2147
2148  i = (k & 0x80000000) >> 31;
2149  hx = (hx & 0x000fffff) | ((0x3ff - i) << 20);
2150  y = k + i;
2151  SET_HIGH_WORD(x, hx);
2152  SET_LOW_WORD(x, lx);
2153
2154  double z = y * log10_2lo + ivln10 * log(x);
2155  return z + y * log10_2hi;
2156}
2157
2158/* expm1(x)
2159 * Returns exp(x)-1, the exponential of x minus 1.
2160 *
2161 * Method
2162 *   1. Argument reduction:
2163 *  Given x, find r and integer k such that
2164 *
2165 *               x = k*ln2 + r,  |r| <= 0.5*ln2 ~ 0.34658
2166 *
2167 *      Here a correction term c will be computed to compensate
2168 *  the error in r when rounded to a floating-point number.
2169 *
2170 *   2. Approximating expm1(r) by a special rational function on
2171 *  the interval [0,0.34658]:
2172 *  Since
2173 *      r*(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 - r^4/360 + ...
2174 *  we define R1(r*r) by
2175 *      r*(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 * R1(r*r)
2176 *  That is,
2177 *      R1(r**2) = 6/r *((exp(r)+1)/(exp(r)-1) - 2/r)
2178 *         = 6/r * ( 1 + 2.0*(1/(exp(r)-1) - 1/r))
2179 *         = 1 - r^2/60 + r^4/2520 - r^6/100800 + ...
2180 *      We use a special Reme algorithm on [0,0.347] to generate
2181 *   a polynomial of degree 5 in r*r to approximate R1. The
2182 *  maximum error of this polynomial approximation is bounded
2183 *  by 2**-61. In other words,
2184 *      R1(z) ~ 1.0 + Q1*z + Q2*z**2 + Q3*z**3 + Q4*z**4 + Q5*z**5
2185 *  where   Q1  =  -1.6666666666666567384E-2,
2186 *     Q2  =   3.9682539681370365873E-4,
2187 *     Q3  =  -9.9206344733435987357E-6,
2188 *     Q4  =   2.5051361420808517002E-7,
2189 *     Q5  =  -6.2843505682382617102E-9;
2190 *    z   =  r*r,
2191 *  with error bounded by
2192 *      |                  5           |     -61
2193 *      | 1.0+Q1*z+...+Q5*z   -  R1(z) | <= 2
2194 *      |                              |
2195 *
2196 *  expm1(r) = exp(r)-1 is then computed by the following
2197 *   specific way which minimize the accumulation rounding error:
2198 *             2     3
2199 *            r     r    [ 3 - (R1 + R1*r/2)  ]
2200 *        expm1(r) = r + --- + --- * [--------------------]
2201 *                  2     2    [ 6 - r*(3 - R1*r/2) ]
2202 *
2203 *  To compensate the error in the argument reduction, we use
2204 *    expm1(r+c) = expm1(r) + c + expm1(r)*c
2205 *         ~ expm1(r) + c + r*c
2206 *  Thus c+r*c will be added in as the correction terms for
2207 *  expm1(r+c). Now rearrange the term to avoid optimization
2208 *   screw up:
2209 *            (      2                                    2 )
2210 *            ({  ( r    [ R1 -  (3 - R1*r/2) ]  )  }    r  )
2211 *   expm1(r+c)~r - ({r*(--- * [--------------------]-c)-c} - --- )
2212 *                  ({  ( 2    [ 6 - r*(3 - R1*r/2) ]  )  }    2  )
2213 *                      (                                             )
2214 *
2215 *       = r - E
2216 *   3. Scale back to obtain expm1(x):
2217 *  From step 1, we have
2218 *     expm1(x) = either 2^k*[expm1(r)+1] - 1
2219 *        = or     2^k*[expm1(r) + (1-2^-k)]
2220 *   4. Implementation notes:
2221 *  (A). To save one multiplication, we scale the coefficient Qi
2222 *       to Qi*2^i, and replace z by (x^2)/2.
2223 *  (B). To achieve maximum accuracy, we compute expm1(x) by
2224 *    (i)   if x < -56*ln2, return -1.0, (raise inexact if x!=inf)
2225 *    (ii)  if k=0, return r-E
2226 *    (iii) if k=-1, return 0.5*(r-E)-0.5
2227 *        (iv)  if k=1 if r < -0.25, return 2*((r+0.5)- E)
2228 *                  else       return  1.0+2.0*(r-E);
2229 *    (v)   if (k<-2||k>56) return 2^k(1-(E-r)) - 1 (or exp(x)-1)
2230 *    (vi)  if k <= 20, return 2^k((1-2^-k)-(E-r)), else
2231 *    (vii) return 2^k(1-((E+2^-k)-r))
2232 *
2233 * Special cases:
2234 *  expm1(INF) is INF, expm1(NaN) is NaN;
2235 *  expm1(-INF) is -1, and
2236 *  for finite argument, only expm1(0)=0 is exact.
2237 *
2238 * Accuracy:
2239 *  according to an error analysis, the error is always less than
2240 *  1 ulp (unit in the last place).
2241 *
2242 * Misc. info.
2243 *  For IEEE double
2244 *      if x >  7.09782712893383973096e+02 then expm1(x) overflow
2245 *
2246 * Constants:
2247 * The hexadecimal values are the intended ones for the following
2248 * constants. The decimal values may be used, provided that the
2249 * compiler will convert from decimal to binary accurately enough
2250 * to produce the hexadecimal values shown.
2251 */
2252double expm1(double x) {
2253  static const double
2254      one = 1.0,
2255      tiny = 1.0e-300,
2256      o_threshold = 7.09782712893383973096e+02, /* 0x40862E42, 0xFEFA39EF */
2257      ln2_hi = 6.93147180369123816490e-01,      /* 0x3fe62e42, 0xfee00000 */
2258      ln2_lo = 1.90821492927058770002e-10,      /* 0x3dea39ef, 0x35793c76 */
2259      invln2 = 1.44269504088896338700e+00,      /* 0x3ff71547, 0x652b82fe */
2260      /* Scaled Q's: Qn_here = 2**n * Qn_above, for R(2*z) where z = hxs =
2261         x*x/2: */
2262      Q1 = -3.33333333333331316428e-02, /* BFA11111 111110F4 */
2263      Q2 = 1.58730158725481460165e-03,  /* 3F5A01A0 19FE5585 */
2264      Q3 = -7.93650757867487942473e-05, /* BF14CE19 9EAADBB7 */
2265      Q4 = 4.00821782732936239552e-06,  /* 3ED0CFCA 86E65239 */
2266      Q5 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */
2267
2268  static volatile double huge = 1.0e+300;
2269
2270  double y, hi, lo, c, t, e, hxs, hfx, r1, twopk;
2271  int32_t k, xsb;
2272  uint32_t hx;
2273
2274  GET_HIGH_WORD(hx, x);
2275  xsb = hx & 0x80000000; /* sign bit of x */
2276  hx &= 0x7fffffff;      /* high word of |x| */
2277
2278  /* filter out huge and non-finite argument */
2279  if (hx >= 0x4043687A) {   /* if |x|>=56*ln2 */
2280    if (hx >= 0x40862E42) { /* if |x|>=709.78... */
2281      if (hx >= 0x7ff00000) {
2282        uint32_t low;
2283        GET_LOW_WORD(low, x);
2284        if (((hx & 0xfffff) | low) != 0)
2285          return x + x; /* NaN */
2286        else
2287          return (xsb == 0) ? x : -1.0; /* exp(+-inf)={inf,-1} */
2288      }
2289      if (x > o_threshold) return huge * huge; /* overflow */
2290    }
2291    if (xsb != 0) {        /* x < -56*ln2, return -1.0 with inexact */
2292      if (x + tiny < 0.0)  /* raise inexact */
2293        return tiny - one; /* return -1 */
2294    }
2295  }
2296
2297  /* argument reduction */
2298  if (hx > 0x3fd62e42) {   /* if  |x| > 0.5 ln2 */
2299    if (hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */
2300      if (xsb == 0) {
2301        hi = x - ln2_hi;
2302        lo = ln2_lo;
2303        k = 1;
2304      } else {
2305        hi = x + ln2_hi;
2306        lo = -ln2_lo;
2307        k = -1;
2308      }
2309    } else {
2310      k = invln2 * x + ((xsb == 0) ? 0.5 : -0.5);
2311      t = k;
2312      hi = x - t * ln2_hi; /* t*ln2_hi is exact here */
2313      lo = t * ln2_lo;
2314    }
2315    STRICT_ASSIGN(double, x, hi - lo);
2316    c = (hi - x) - lo;
2317  } else if (hx < 0x3c900000) { /* when |x|<2**-54, return x */
2318    t = huge + x;               /* return x with inexact flags when x!=0 */
2319    return x - (t - (huge + x));
2320  } else {
2321    k = 0;
2322  }
2323
2324  /* x is now in primary range */
2325  hfx = 0.5 * x;
2326  hxs = x * hfx;
2327  r1 = one + hxs * (Q1 + hxs * (Q2 + hxs * (Q3 + hxs * (Q4 + hxs * Q5))));
2328  t = 3.0 - r1 * hfx;
2329  e = hxs * ((r1 - t) / (6.0 - x * t));
2330  if (k == 0) {
2331    return x - (x * e - hxs); /* c is 0 */
2332  } else {
2333    INSERT_WORDS(twopk, 0x3ff00000 + (k << 20), 0); /* 2^k */
2334    e = (x * (e - c) - c);
2335    e -= hxs;
2336    if (k == -1) return 0.5 * (x - e) - 0.5;
2337    if (k == 1) {
2338      if (x < -0.25)
2339        return -2.0 * (e - (x + 0.5));
2340      else
2341        return one + 2.0 * (x - e);
2342    }
2343    if (k <= -2 || k > 56) { /* suffice to return exp(x)-1 */
2344      y = one - (e - x);
2345      // TODO(mvstanton): is this replacement for the hex float
2346      // sufficient?
2347      // if (k == 1024) y = y*2.0*0x1p1023;
2348      if (k == 1024)
2349        y = y * 2.0 * 8.98846567431158e+307;
2350      else
2351        y = y * twopk;
2352      return y - one;
2353    }
2354    t = one;
2355    if (k < 20) {
2356      SET_HIGH_WORD(t, 0x3ff00000 - (0x200000 >> k)); /* t=1-2^-k */
2357      y = t - (e - x);
2358      y = y * twopk;
2359    } else {
2360      SET_HIGH_WORD(t, ((0x3ff - k) << 20)); /* 2^-k */
2361      y = x - (e + t);
2362      y += one;
2363      y = y * twopk;
2364    }
2365  }
2366  return y;
2367}
2368
2369double cbrt(double x) {
2370  static const uint32_t
2371      B1 = 715094163, /* B1 = (1023-1023/3-0.03306235651)*2**20 */
2372      B2 = 696219795; /* B2 = (1023-1023/3-54/3-0.03306235651)*2**20 */
2373
2374  /* |1/cbrt(x) - p(x)| < 2**-23.5 (~[-7.93e-8, 7.929e-8]). */
2375  static const double P0 = 1.87595182427177009643, /* 0x3ffe03e6, 0x0f61e692 */
2376      P1 = -1.88497979543377169875,                /* 0xbffe28e0, 0x92f02420 */
2377      P2 = 1.621429720105354466140,                /* 0x3ff9f160, 0x4a49d6c2 */
2378      P3 = -0.758397934778766047437,               /* 0xbfe844cb, 0xbee751d9 */
2379      P4 = 0.145996192886612446982;                /* 0x3fc2b000, 0xd4e4edd7 */
2380
2381  int32_t hx;
2382  union {
2383    double value;
2384    uint64_t bits;
2385  } u;
2386  double r, s, t = 0.0, w;
2387  uint32_t sign;
2388  uint32_t high, low;
2389
2390  EXTRACT_WORDS(hx, low, x);
2391  sign = hx & 0x80000000; /* sign= sign(x) */
2392  hx ^= sign;
2393  if (hx >= 0x7ff00000) return (x + x); /* cbrt(NaN,INF) is itself */
2394
2395  /*
2396   * Rough cbrt to 5 bits:
2397   *    cbrt(2**e*(1+m) ~= 2**(e/3)*(1+(e%3+m)/3)
2398   * where e is integral and >= 0, m is real and in [0, 1), and "/" and
2399   * "%" are integer division and modulus with rounding towards minus
2400   * infinity.  The RHS is always >= the LHS and has a maximum relative
2401   * error of about 1 in 16.  Adding a bias of -0.03306235651 to the
2402   * (e%3+m)/3 term reduces the error to about 1 in 32. With the IEEE
2403   * floating point representation, for finite positive normal values,
2404   * ordinary integer divison of the value in bits magically gives
2405   * almost exactly the RHS of the above provided we first subtract the
2406   * exponent bias (1023 for doubles) and later add it back.  We do the
2407   * subtraction virtually to keep e >= 0 so that ordinary integer
2408   * division rounds towards minus infinity; this is also efficient.
2409   */
2410  if (hx < 0x00100000) {             /* zero or subnormal? */
2411    if ((hx | low) == 0) return (x); /* cbrt(0) is itself */
2412    SET_HIGH_WORD(t, 0x43500000);    /* set t= 2**54 */
2413    t *= x;
2414    GET_HIGH_WORD(high, t);
2415    INSERT_WORDS(t, sign | ((high & 0x7fffffff) / 3 + B2), 0);
2416  } else {
2417    INSERT_WORDS(t, sign | (hx / 3 + B1), 0);
2418  }
2419
2420  /*
2421   * New cbrt to 23 bits:
2422   *    cbrt(x) = t*cbrt(x/t**3) ~= t*P(t**3/x)
2423   * where P(r) is a polynomial of degree 4 that approximates 1/cbrt(r)
2424   * to within 2**-23.5 when |r - 1| < 1/10.  The rough approximation
2425   * has produced t such than |t/cbrt(x) - 1| ~< 1/32, and cubing this
2426   * gives us bounds for r = t**3/x.
2427   *
2428   * Try to optimize for parallel evaluation as in k_tanf.c.
2429   */
2430  r = (t * t) * (t / x);
2431  t = t * ((P0 + r * (P1 + r * P2)) + ((r * r) * r) * (P3 + r * P4));
2432
2433  /*
2434   * Round t away from zero to 23 bits (sloppily except for ensuring that
2435   * the result is larger in magnitude than cbrt(x) but not much more than
2436   * 2 23-bit ulps larger).  With rounding towards zero, the error bound
2437   * would be ~5/6 instead of ~4/6.  With a maximum error of 2 23-bit ulps
2438   * in the rounded t, the infinite-precision error in the Newton
2439   * approximation barely affects third digit in the final error
2440   * 0.667; the error in the rounded t can be up to about 3 23-bit ulps
2441   * before the final error is larger than 0.667 ulps.
2442   */
2443  u.value = t;
2444  u.bits = (u.bits + 0x80000000) & 0xffffffffc0000000ULL;
2445  t = u.value;
2446
2447  /* one step Newton iteration to 53 bits with error < 0.667 ulps */
2448  s = t * t;             /* t*t is exact */
2449  r = x / s;             /* error <= 0.5 ulps; |r| < |t| */
2450  w = t + t;             /* t+t is exact */
2451  r = (r - t) / (w + r); /* r-t is exact; w+r ~= 3*t */
2452  t = t + t * r;         /* error <= 0.5 + 0.5/3 + epsilon */
2453
2454  return (t);
2455}
2456
2457/* sin(x)
2458 * Return sine function of x.
2459 *
2460 * kernel function:
2461 *      __kernel_sin            ... sine function on [-pi/4,pi/4]
2462 *      __kernel_cos            ... cose function on [-pi/4,pi/4]
2463 *      __ieee754_rem_pio2      ... argument reduction routine
2464 *
2465 * Method.
2466 *      Let S,C and T denote the sin, cos and tan respectively on
2467 *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
2468 *      in [-pi/4 , +pi/4], and let n = k mod 4.
2469 *      We have
2470 *
2471 *          n        sin(x)      cos(x)        tan(x)
2472 *     ----------------------------------------------------------
2473 *          0          S           C             T
2474 *          1          C          -S            -1/T
2475 *          2         -S          -C             T
2476 *          3         -C           S            -1/T
2477 *     ----------------------------------------------------------
2478 *
2479 * Special cases:
2480 *      Let trig be any of sin, cos, or tan.
2481 *      trig(+-INF)  is NaN, with signals;
2482 *      trig(NaN)    is that NaN;
2483 *
2484 * Accuracy:
2485 *      TRIG(x) returns trig(x) nearly rounded
2486 */
2487double sin(double x) {
2488  double y[2], z = 0.0;
2489  int32_t n, ix;
2490
2491  /* High word of x. */
2492  GET_HIGH_WORD(ix, x);
2493
2494  /* |x| ~< pi/4 */
2495  ix &= 0x7fffffff;
2496  if (ix <= 0x3fe921fb) {
2497    return __kernel_sin(x, z, 0);
2498  } else if (ix >= 0x7ff00000) {
2499    /* sin(Inf or NaN) is NaN */
2500    return x - x;
2501  } else {
2502    /* argument reduction needed */
2503    n = __ieee754_rem_pio2(x, y);
2504    switch (n & 3) {
2505      case 0:
2506        return __kernel_sin(y[0], y[1], 1);
2507      case 1:
2508        return __kernel_cos(y[0], y[1]);
2509      case 2:
2510        return -__kernel_sin(y[0], y[1], 1);
2511      default:
2512        return -__kernel_cos(y[0], y[1]);
2513    }
2514  }
2515}
2516
2517/* tan(x)
2518 * Return tangent function of x.
2519 *
2520 * kernel function:
2521 *      __kernel_tan            ... tangent function on [-pi/4,pi/4]
2522 *      __ieee754_rem_pio2      ... argument reduction routine
2523 *
2524 * Method.
2525 *      Let S,C and T denote the sin, cos and tan respectively on
2526 *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
2527 *      in [-pi/4 , +pi/4], and let n = k mod 4.
2528 *      We have
2529 *
2530 *          n        sin(x)      cos(x)        tan(x)
2531 *     ----------------------------------------------------------
2532 *          0          S           C             T
2533 *          1          C          -S            -1/T
2534 *          2         -S          -C             T
2535 *          3         -C           S            -1/T
2536 *     ----------------------------------------------------------
2537 *
2538 * Special cases:
2539 *      Let trig be any of sin, cos, or tan.
2540 *      trig(+-INF)  is NaN, with signals;
2541 *      trig(NaN)    is that NaN;
2542 *
2543 * Accuracy:
2544 *      TRIG(x) returns trig(x) nearly rounded
2545 */
2546double tan(double x) {
2547  double y[2], z = 0.0;
2548  int32_t n, ix;
2549
2550  /* High word of x. */
2551  GET_HIGH_WORD(ix, x);
2552
2553  /* |x| ~< pi/4 */
2554  ix &= 0x7fffffff;
2555  if (ix <= 0x3fe921fb) {
2556    return __kernel_tan(x, z, 1);
2557  } else if (ix >= 0x7ff00000) {
2558    /* tan(Inf or NaN) is NaN */
2559    return x - x; /* NaN */
2560  } else {
2561    /* argument reduction needed */
2562    n = __ieee754_rem_pio2(x, y);
2563    /* 1 -> n even, -1 -> n odd */
2564    return __kernel_tan(y[0], y[1], 1 - ((n & 1) << 1));
2565  }
2566}
2567
2568/*
2569 * ES6 draft 09-27-13, section 20.2.2.12.
2570 * Math.cosh
2571 * Method :
2572 * mathematically cosh(x) if defined to be (exp(x)+exp(-x))/2
2573 *      1. Replace x by |x| (cosh(x) = cosh(-x)).
2574 *      2.
2575 *                                                      [ exp(x) - 1 ]^2
2576 *          0        <= x <= ln2/2  :  cosh(x) := 1 + -------------------
2577 *                                                         2*exp(x)
2578 *
2579 *                                                 exp(x) + 1/exp(x)
2580 *          ln2/2    <= x <= 22     :  cosh(x) := -------------------
2581 *                                                        2
2582 *          22       <= x <= lnovft :  cosh(x) := exp(x)/2
2583 *          lnovft   <= x <= ln2ovft:  cosh(x) := exp(x/2)/2 * exp(x/2)
2584 *          ln2ovft  <  x           :  cosh(x) := huge*huge (overflow)
2585 *
2586 * Special cases:
2587 *      cosh(x) is |x| if x is +INF, -INF, or NaN.
2588 *      only cosh(0)=1 is exact for finite x.
2589 */
2590double cosh(double x) {
2591  static const double KCOSH_OVERFLOW = 710.4758600739439;
2592  static const double one = 1.0, half = 0.5;
2593  static volatile double huge = 1.0e+300;
2594
2595  int32_t ix;
2596
2597  /* High word of |x|. */
2598  GET_HIGH_WORD(ix, x);
2599  ix &= 0x7fffffff;
2600
2601  // |x| in [0,0.5*log2], return 1+expm1(|x|)^2/(2*exp(|x|))
2602  if (ix < 0x3fd62e43) {
2603    double t = expm1(fabs(x));
2604    double w = one + t;
2605    // For |x| < 2^-55, cosh(x) = 1
2606    if (ix < 0x3c800000) return w;
2607    return one + (t * t) / (w + w);
2608  }
2609
2610  // |x| in [0.5*log2, 22], return (exp(|x|)+1/exp(|x|)/2
2611  if (ix < 0x40360000) {
2612    double t = exp(fabs(x));
2613    return half * t + half / t;
2614  }
2615
2616  // |x| in [22, log(maxdouble)], return half*exp(|x|)
2617  if (ix < 0x40862e42) return half * exp(fabs(x));
2618
2619  // |x| in [log(maxdouble), overflowthreshold]
2620  if (fabs(x) <= KCOSH_OVERFLOW) {
2621    double w = exp(half * fabs(x));
2622    double t = half * w;
2623    return t * w;
2624  }
2625
2626  /* x is INF or NaN */
2627  if (ix >= 0x7ff00000) return x * x;
2628
2629  // |x| > overflowthreshold.
2630  return huge * huge;
2631}
2632
2633/*
2634 * ES6 draft 09-27-13, section 20.2.2.30.
2635 * Math.sinh
2636 * Method :
2637 * mathematically sinh(x) if defined to be (exp(x)-exp(-x))/2
2638 *      1. Replace x by |x| (sinh(-x) = -sinh(x)).
2639 *      2.
2640 *                                                  E + E/(E+1)
2641 *          0        <= x <= 22     :  sinh(x) := --------------, E=expm1(x)
2642 *                                                      2
2643 *
2644 *          22       <= x <= lnovft :  sinh(x) := exp(x)/2
2645 *          lnovft   <= x <= ln2ovft:  sinh(x) := exp(x/2)/2 * exp(x/2)
2646 *          ln2ovft  <  x           :  sinh(x) := x*shuge (overflow)
2647 *
2648 * Special cases:
2649 *      sinh(x) is |x| if x is +Infinity, -Infinity, or NaN.
2650 *      only sinh(0)=0 is exact for finite x.
2651 */
2652double sinh(double x) {
2653  static const double KSINH_OVERFLOW = 710.4758600739439,
2654                      TWO_M28 =
2655                          3.725290298461914e-9,  // 2^-28, empty lower half
2656      LOG_MAXD = 709.7822265625;  // 0x40862e42 00000000, empty lower half
2657  static const double shuge = 1.0e307;
2658
2659  double h = (x < 0) ? -0.5 : 0.5;
2660  // |x| in [0, 22]. return sign(x)*0.5*(E+E/(E+1))
2661  double ax = fabs(x);
2662  if (ax < 22) {
2663    // For |x| < 2^-28, sinh(x) = x
2664    if (ax < TWO_M28) return x;
2665    double t = expm1(ax);
2666    if (ax < 1) {
2667      return h * (2 * t - t * t / (t + 1));
2668    }
2669    return h * (t + t / (t + 1));
2670  }
2671  // |x| in [22, log(maxdouble)], return 0.5 * exp(|x|)
2672  if (ax < LOG_MAXD) return h * exp(ax);
2673  // |x| in [log(maxdouble), overflowthreshold]
2674  // overflowthreshold = 710.4758600739426
2675  if (ax <= KSINH_OVERFLOW) {
2676    double w = exp(0.5 * ax);
2677    double t = h * w;
2678    return t * w;
2679  }
2680  // |x| > overflowthreshold or is NaN.
2681  // Return Infinity of the appropriate sign or NaN.
2682  return x * shuge;
2683}
2684
2685/* Tanh(x)
2686 * Return the Hyperbolic Tangent of x
2687 *
2688 * Method :
2689 *                                 x    -x
2690 *                                e  - e
2691 *  0. tanh(x) is defined to be -----------
2692 *                                 x    -x
2693 *                                e  + e
2694 *  1. reduce x to non-negative by tanh(-x) = -tanh(x).
2695 *  2.  0      <= x <  2**-28 : tanh(x) := x with inexact if x != 0
2696 *                                          -t
2697 *      2**-28 <= x <  1      : tanh(x) := -----; t = expm1(-2x)
2698 *                                         t + 2
2699 *                                               2
2700 *      1      <= x <  22     : tanh(x) := 1 - -----; t = expm1(2x)
2701 *                                             t + 2
2702 *      22     <= x <= INF    : tanh(x) := 1.
2703 *
2704 * Special cases:
2705 *      tanh(NaN) is NaN;
2706 *      only tanh(0)=0 is exact for finite argument.
2707 */
2708double tanh(double x) {
2709  static const volatile double tiny = 1.0e-300;
2710  static const double one = 1.0, two = 2.0, huge = 1.0e300;
2711  double t, z;
2712  int32_t jx, ix;
2713
2714  GET_HIGH_WORD(jx, x);
2715  ix = jx & 0x7fffffff;
2716
2717  /* x is INF or NaN */
2718  if (ix >= 0x7ff00000) {
2719    if (jx >= 0)
2720      return one / x + one; /* tanh(+-inf)=+-1 */
2721    else
2722      return one / x - one; /* tanh(NaN) = NaN */
2723  }
2724
2725  /* |x| < 22 */
2726  if (ix < 0x40360000) {            /* |x|<22 */
2727    if (ix < 0x3e300000) {          /* |x|<2**-28 */
2728      if (huge + x > one) return x; /* tanh(tiny) = tiny with inexact */
2729    }
2730    if (ix >= 0x3ff00000) { /* |x|>=1  */
2731      t = expm1(two * fabs(x));
2732      z = one - two / (t + two);
2733    } else {
2734      t = expm1(-two * fabs(x));
2735      z = -t / (t + two);
2736    }
2737    /* |x| >= 22, return +-1 */
2738  } else {
2739    z = one - tiny; /* raise inexact flag */
2740  }
2741  return (jx >= 0) ? z : -z;
2742}
2743
2744}  // namespace ieee754
2745}  // namespace base
2746}  // namespace v8
2747