1// The following is adapted from fdlibm (http://www.netlib.org/fdlibm). 2// 3// ==================================================== 4// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. 5// 6// Developed at SunSoft, a Sun Microsystems, Inc. business. 7// Permission to use, copy, modify, and distribute this 8// software is freely granted, provided that this notice 9// is preserved. 10// ==================================================== 11// 12// The original source code covered by the above license above has been 13// modified significantly by Google Inc. 14// Copyright 2016 the V8 project authors. All rights reserved. 15 16#include "src/base/ieee754.h" 17 18#include <cmath> 19#include <limits> 20 21#include "src/base/build_config.h" 22#include "src/base/macros.h" 23 24namespace v8 { 25namespace base { 26namespace ieee754 { 27 28namespace { 29 30/* Disable "potential divide by 0" warning in Visual Studio compiler. */ 31 32#if V8_CC_MSVC 33 34#pragma warning(disable : 4723) 35 36#endif 37 38/* 39 * The original fdlibm code used statements like: 40 * n0 = ((*(int*)&one)>>29)^1; * index of high word * 41 * ix0 = *(n0+(int*)&x); * high word of x * 42 * ix1 = *((1-n0)+(int*)&x); * low word of x * 43 * to dig two 32 bit words out of the 64 bit IEEE floating point 44 * value. That is non-ANSI, and, moreover, the gcc instruction 45 * scheduler gets it wrong. We instead use the following macros. 46 * Unlike the original code, we determine the endianness at compile 47 * time, not at run time; I don't see much benefit to selecting 48 * endianness at run time. 49 */ 50 51/* 52 * A union which permits us to convert between a double and two 32 bit 53 * ints. 54 */ 55 56#if V8_TARGET_LITTLE_ENDIAN 57 58typedef union { 59 double value; 60 struct { 61 uint32_t lsw; 62 uint32_t msw; 63 } parts; 64 struct { 65 uint64_t w; 66 } xparts; 67} ieee_double_shape_type; 68 69#else 70 71typedef union { 72 double value; 73 struct { 74 uint32_t msw; 75 uint32_t lsw; 76 } parts; 77 struct { 78 uint64_t w; 79 } xparts; 80} ieee_double_shape_type; 81 82#endif 83 84/* Get two 32 bit ints from a double. */ 85 86#define EXTRACT_WORDS(ix0, ix1, d) \ 87 do { \ 88 ieee_double_shape_type ew_u; \ 89 ew_u.value = (d); \ 90 (ix0) = ew_u.parts.msw; \ 91 (ix1) = ew_u.parts.lsw; \ 92 } while (0) 93 94/* Get a 64-bit int from a double. */ 95#define EXTRACT_WORD64(ix, d) \ 96 do { \ 97 ieee_double_shape_type ew_u; \ 98 ew_u.value = (d); \ 99 (ix) = ew_u.xparts.w; \ 100 } while (0) 101 102/* Get the more significant 32 bit int from a double. */ 103 104#define GET_HIGH_WORD(i, d) \ 105 do { \ 106 ieee_double_shape_type gh_u; \ 107 gh_u.value = (d); \ 108 (i) = gh_u.parts.msw; \ 109 } while (0) 110 111/* Get the less significant 32 bit int from a double. */ 112 113#define GET_LOW_WORD(i, d) \ 114 do { \ 115 ieee_double_shape_type gl_u; \ 116 gl_u.value = (d); \ 117 (i) = gl_u.parts.lsw; \ 118 } while (0) 119 120/* Set a double from two 32 bit ints. */ 121 122#define INSERT_WORDS(d, ix0, ix1) \ 123 do { \ 124 ieee_double_shape_type iw_u; \ 125 iw_u.parts.msw = (ix0); \ 126 iw_u.parts.lsw = (ix1); \ 127 (d) = iw_u.value; \ 128 } while (0) 129 130/* Set a double from a 64-bit int. */ 131#define INSERT_WORD64(d, ix) \ 132 do { \ 133 ieee_double_shape_type iw_u; \ 134 iw_u.xparts.w = (ix); \ 135 (d) = iw_u.value; \ 136 } while (0) 137 138/* Set the more significant 32 bits of a double from an int. */ 139 140#define SET_HIGH_WORD(d, v) \ 141 do { \ 142 ieee_double_shape_type sh_u; \ 143 sh_u.value = (d); \ 144 sh_u.parts.msw = (v); \ 145 (d) = sh_u.value; \ 146 } while (0) 147 148/* Set the less significant 32 bits of a double from an int. */ 149 150#define SET_LOW_WORD(d, v) \ 151 do { \ 152 ieee_double_shape_type sl_u; \ 153 sl_u.value = (d); \ 154 sl_u.parts.lsw = (v); \ 155 (d) = sl_u.value; \ 156 } while (0) 157 158/* Support macro. */ 159 160#define STRICT_ASSIGN(type, lval, rval) ((lval) = (rval)) 161 162int32_t __ieee754_rem_pio2(double x, double *y) WARN_UNUSED_RESULT; 163double __kernel_cos(double x, double y) WARN_UNUSED_RESULT; 164int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, 165 const int32_t *ipio2) WARN_UNUSED_RESULT; 166double __kernel_sin(double x, double y, int iy) WARN_UNUSED_RESULT; 167 168/* __ieee754_rem_pio2(x,y) 169 * 170 * return the remainder of x rem pi/2 in y[0]+y[1] 171 * use __kernel_rem_pio2() 172 */ 173int32_t __ieee754_rem_pio2(double x, double *y) { 174 /* 175 * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi 176 */ 177 static const int32_t two_over_pi[] = { 178 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 0x95993C, 179 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, 0x424DD2, 0xE00649, 180 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, 0xA73EE8, 0x8235F5, 0x2EBB44, 181 0x84E99C, 0x7026B4, 0x5F7E41, 0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 182 0xBDF928, 0x3B1FF8, 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 183 0x367ECF, 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5, 184 0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08, 0x560330, 185 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, 0x91615E, 0xE61B08, 186 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, 0x4D7327, 0x310606, 0x1556CA, 187 0x73A8C9, 0x60E27B, 0xC08C6B, 188 }; 189 190 static const int32_t npio2_hw[] = { 191 0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C, 192 0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C, 193 0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A, 194 0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C, 195 0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB, 196 0x404858EB, 0x404921FB, 197 }; 198 199 /* 200 * invpio2: 53 bits of 2/pi 201 * pio2_1: first 33 bit of pi/2 202 * pio2_1t: pi/2 - pio2_1 203 * pio2_2: second 33 bit of pi/2 204 * pio2_2t: pi/2 - (pio2_1+pio2_2) 205 * pio2_3: third 33 bit of pi/2 206 * pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3) 207 */ 208 209 static const double 210 zero = 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */ 211 half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */ 212 two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */ 213 invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */ 214 pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */ 215 pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */ 216 pio2_2 = 6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */ 217 pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */ 218 pio2_3 = 2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */ 219 pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */ 220 221 double z, w, t, r, fn; 222 double tx[3]; 223 int32_t e0, i, j, nx, n, ix, hx; 224 uint32_t low; 225 226 z = 0; 227 GET_HIGH_WORD(hx, x); /* high word of x */ 228 ix = hx & 0x7fffffff; 229 if (ix <= 0x3fe921fb) { /* |x| ~<= pi/4 , no need for reduction */ 230 y[0] = x; 231 y[1] = 0; 232 return 0; 233 } 234 if (ix < 0x4002d97c) { /* |x| < 3pi/4, special case with n=+-1 */ 235 if (hx > 0) { 236 z = x - pio2_1; 237 if (ix != 0x3ff921fb) { /* 33+53 bit pi is good enough */ 238 y[0] = z - pio2_1t; 239 y[1] = (z - y[0]) - pio2_1t; 240 } else { /* near pi/2, use 33+33+53 bit pi */ 241 z -= pio2_2; 242 y[0] = z - pio2_2t; 243 y[1] = (z - y[0]) - pio2_2t; 244 } 245 return 1; 246 } else { /* negative x */ 247 z = x + pio2_1; 248 if (ix != 0x3ff921fb) { /* 33+53 bit pi is good enough */ 249 y[0] = z + pio2_1t; 250 y[1] = (z - y[0]) + pio2_1t; 251 } else { /* near pi/2, use 33+33+53 bit pi */ 252 z += pio2_2; 253 y[0] = z + pio2_2t; 254 y[1] = (z - y[0]) + pio2_2t; 255 } 256 return -1; 257 } 258 } 259 if (ix <= 0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */ 260 t = fabs(x); 261 n = static_cast<int32_t>(t * invpio2 + half); 262 fn = static_cast<double>(n); 263 r = t - fn * pio2_1; 264 w = fn * pio2_1t; /* 1st round good to 85 bit */ 265 if (n < 32 && ix != npio2_hw[n - 1]) { 266 y[0] = r - w; /* quick check no cancellation */ 267 } else { 268 uint32_t high; 269 j = ix >> 20; 270 y[0] = r - w; 271 GET_HIGH_WORD(high, y[0]); 272 i = j - ((high >> 20) & 0x7ff); 273 if (i > 16) { /* 2nd iteration needed, good to 118 */ 274 t = r; 275 w = fn * pio2_2; 276 r = t - w; 277 w = fn * pio2_2t - ((t - r) - w); 278 y[0] = r - w; 279 GET_HIGH_WORD(high, y[0]); 280 i = j - ((high >> 20) & 0x7ff); 281 if (i > 49) { /* 3rd iteration need, 151 bits acc */ 282 t = r; /* will cover all possible cases */ 283 w = fn * pio2_3; 284 r = t - w; 285 w = fn * pio2_3t - ((t - r) - w); 286 y[0] = r - w; 287 } 288 } 289 } 290 y[1] = (r - y[0]) - w; 291 if (hx < 0) { 292 y[0] = -y[0]; 293 y[1] = -y[1]; 294 return -n; 295 } else { 296 return n; 297 } 298 } 299 /* 300 * all other (large) arguments 301 */ 302 if (ix >= 0x7ff00000) { /* x is inf or NaN */ 303 y[0] = y[1] = x - x; 304 return 0; 305 } 306 /* set z = scalbn(|x|,ilogb(x)-23) */ 307 GET_LOW_WORD(low, x); 308 SET_LOW_WORD(z, low); 309 e0 = (ix >> 20) - 1046; /* e0 = ilogb(z)-23; */ 310 SET_HIGH_WORD(z, ix - static_cast<int32_t>(e0 << 20)); 311 for (i = 0; i < 2; i++) { 312 tx[i] = static_cast<double>(static_cast<int32_t>(z)); 313 z = (z - tx[i]) * two24; 314 } 315 tx[2] = z; 316 nx = 3; 317 while (tx[nx - 1] == zero) nx--; /* skip zero term */ 318 n = __kernel_rem_pio2(tx, y, e0, nx, 2, two_over_pi); 319 if (hx < 0) { 320 y[0] = -y[0]; 321 y[1] = -y[1]; 322 return -n; 323 } 324 return n; 325} 326 327/* __kernel_cos( x, y ) 328 * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164 329 * Input x is assumed to be bounded by ~pi/4 in magnitude. 330 * Input y is the tail of x. 331 * 332 * Algorithm 333 * 1. Since cos(-x) = cos(x), we need only to consider positive x. 334 * 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0. 335 * 3. cos(x) is approximated by a polynomial of degree 14 on 336 * [0,pi/4] 337 * 4 14 338 * cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x 339 * where the remez error is 340 * 341 * | 2 4 6 8 10 12 14 | -58 342 * |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2 343 * | | 344 * 345 * 4 6 8 10 12 14 346 * 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then 347 * cos(x) = 1 - x*x/2 + r 348 * since cos(x+y) ~ cos(x) - sin(x)*y 349 * ~ cos(x) - x*y, 350 * a correction term is necessary in cos(x) and hence 351 * cos(x+y) = 1 - (x*x/2 - (r - x*y)) 352 * For better accuracy when x > 0.3, let qx = |x|/4 with 353 * the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125. 354 * Then 355 * cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)). 356 * Note that 1-qx and (x*x/2-qx) is EXACT here, and the 357 * magnitude of the latter is at least a quarter of x*x/2, 358 * thus, reducing the rounding error in the subtraction. 359 */ 360V8_INLINE double __kernel_cos(double x, double y) { 361 static const double 362 one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */ 363 C1 = 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */ 364 C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */ 365 C3 = 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */ 366 C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */ 367 C5 = 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */ 368 C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */ 369 370 double a, iz, z, r, qx; 371 int32_t ix; 372 GET_HIGH_WORD(ix, x); 373 ix &= 0x7fffffff; /* ix = |x|'s high word*/ 374 if (ix < 0x3e400000) { /* if x < 2**27 */ 375 if (static_cast<int>(x) == 0) return one; /* generate inexact */ 376 } 377 z = x * x; 378 r = z * (C1 + z * (C2 + z * (C3 + z * (C4 + z * (C5 + z * C6))))); 379 if (ix < 0x3FD33333) { /* if |x| < 0.3 */ 380 return one - (0.5 * z - (z * r - x * y)); 381 } else { 382 if (ix > 0x3fe90000) { /* x > 0.78125 */ 383 qx = 0.28125; 384 } else { 385 INSERT_WORDS(qx, ix - 0x00200000, 0); /* x/4 */ 386 } 387 iz = 0.5 * z - qx; 388 a = one - qx; 389 return a - (iz - (z * r - x * y)); 390 } 391} 392 393/* __kernel_rem_pio2(x,y,e0,nx,prec,ipio2) 394 * double x[],y[]; int e0,nx,prec; int ipio2[]; 395 * 396 * __kernel_rem_pio2 return the last three digits of N with 397 * y = x - N*pi/2 398 * so that |y| < pi/2. 399 * 400 * The method is to compute the integer (mod 8) and fraction parts of 401 * (2/pi)*x without doing the full multiplication. In general we 402 * skip the part of the product that are known to be a huge integer ( 403 * more accurately, = 0 mod 8 ). Thus the number of operations are 404 * independent of the exponent of the input. 405 * 406 * (2/pi) is represented by an array of 24-bit integers in ipio2[]. 407 * 408 * Input parameters: 409 * x[] The input value (must be positive) is broken into nx 410 * pieces of 24-bit integers in double precision format. 411 * x[i] will be the i-th 24 bit of x. The scaled exponent 412 * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0 413 * match x's up to 24 bits. 414 * 415 * Example of breaking a double positive z into x[0]+x[1]+x[2]: 416 * e0 = ilogb(z)-23 417 * z = scalbn(z,-e0) 418 * for i = 0,1,2 419 * x[i] = floor(z) 420 * z = (z-x[i])*2**24 421 * 422 * 423 * y[] output result in an array of double precision numbers. 424 * The dimension of y[] is: 425 * 24-bit precision 1 426 * 53-bit precision 2 427 * 64-bit precision 2 428 * 113-bit precision 3 429 * The actual value is the sum of them. Thus for 113-bit 430 * precison, one may have to do something like: 431 * 432 * long double t,w,r_head, r_tail; 433 * t = (long double)y[2] + (long double)y[1]; 434 * w = (long double)y[0]; 435 * r_head = t+w; 436 * r_tail = w - (r_head - t); 437 * 438 * e0 The exponent of x[0] 439 * 440 * nx dimension of x[] 441 * 442 * prec an integer indicating the precision: 443 * 0 24 bits (single) 444 * 1 53 bits (double) 445 * 2 64 bits (extended) 446 * 3 113 bits (quad) 447 * 448 * ipio2[] 449 * integer array, contains the (24*i)-th to (24*i+23)-th 450 * bit of 2/pi after binary point. The corresponding 451 * floating value is 452 * 453 * ipio2[i] * 2^(-24(i+1)). 454 * 455 * External function: 456 * double scalbn(), floor(); 457 * 458 * 459 * Here is the description of some local variables: 460 * 461 * jk jk+1 is the initial number of terms of ipio2[] needed 462 * in the computation. The recommended value is 2,3,4, 463 * 6 for single, double, extended,and quad. 464 * 465 * jz local integer variable indicating the number of 466 * terms of ipio2[] used. 467 * 468 * jx nx - 1 469 * 470 * jv index for pointing to the suitable ipio2[] for the 471 * computation. In general, we want 472 * ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8 473 * is an integer. Thus 474 * e0-3-24*jv >= 0 or (e0-3)/24 >= jv 475 * Hence jv = max(0,(e0-3)/24). 476 * 477 * jp jp+1 is the number of terms in PIo2[] needed, jp = jk. 478 * 479 * q[] double array with integral value, representing the 480 * 24-bits chunk of the product of x and 2/pi. 481 * 482 * q0 the corresponding exponent of q[0]. Note that the 483 * exponent for q[i] would be q0-24*i. 484 * 485 * PIo2[] double precision array, obtained by cutting pi/2 486 * into 24 bits chunks. 487 * 488 * f[] ipio2[] in floating point 489 * 490 * iq[] integer array by breaking up q[] in 24-bits chunk. 491 * 492 * fq[] final product of x*(2/pi) in fq[0],..,fq[jk] 493 * 494 * ih integer. If >0 it indicates q[] is >= 0.5, hence 495 * it also indicates the *sign* of the result. 496 * 497 */ 498int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, 499 const int32_t *ipio2) { 500 /* Constants: 501 * The hexadecimal values are the intended ones for the following 502 * constants. The decimal values may be used, provided that the 503 * compiler will convert from decimal to binary accurately enough 504 * to produce the hexadecimal values shown. 505 */ 506 static const int init_jk[] = {2, 3, 4, 6}; /* initial value for jk */ 507 508 static const double PIo2[] = { 509 1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */ 510 7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */ 511 5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */ 512 3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */ 513 1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */ 514 1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */ 515 2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */ 516 2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */ 517 }; 518 519 static const double 520 zero = 0.0, 521 one = 1.0, 522 two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */ 523 twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */ 524 525 int32_t jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih; 526 double z, fw, f[20], fq[20], q[20]; 527 528 /* initialize jk*/ 529 jk = init_jk[prec]; 530 jp = jk; 531 532 /* determine jx,jv,q0, note that 3>q0 */ 533 jx = nx - 1; 534 jv = (e0 - 3) / 24; 535 if (jv < 0) jv = 0; 536 q0 = e0 - 24 * (jv + 1); 537 538 /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */ 539 j = jv - jx; 540 m = jx + jk; 541 for (i = 0; i <= m; i++, j++) { 542 f[i] = (j < 0) ? zero : static_cast<double>(ipio2[j]); 543 } 544 545 /* compute q[0],q[1],...q[jk] */ 546 for (i = 0; i <= jk; i++) { 547 for (j = 0, fw = 0.0; j <= jx; j++) fw += x[j] * f[jx + i - j]; 548 q[i] = fw; 549 } 550 551 jz = jk; 552recompute: 553 /* distill q[] into iq[] reversingly */ 554 for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--) { 555 fw = static_cast<double>(static_cast<int32_t>(twon24 * z)); 556 iq[i] = static_cast<int32_t>(z - two24 * fw); 557 z = q[j - 1] + fw; 558 } 559 560 /* compute n */ 561 z = scalbn(z, q0); /* actual value of z */ 562 z -= 8.0 * floor(z * 0.125); /* trim off integer >= 8 */ 563 n = static_cast<int32_t>(z); 564 z -= static_cast<double>(n); 565 ih = 0; 566 if (q0 > 0) { /* need iq[jz-1] to determine n */ 567 i = (iq[jz - 1] >> (24 - q0)); 568 n += i; 569 iq[jz - 1] -= i << (24 - q0); 570 ih = iq[jz - 1] >> (23 - q0); 571 } else if (q0 == 0) { 572 ih = iq[jz - 1] >> 23; 573 } else if (z >= 0.5) { 574 ih = 2; 575 } 576 577 if (ih > 0) { /* q > 0.5 */ 578 n += 1; 579 carry = 0; 580 for (i = 0; i < jz; i++) { /* compute 1-q */ 581 j = iq[i]; 582 if (carry == 0) { 583 if (j != 0) { 584 carry = 1; 585 iq[i] = 0x1000000 - j; 586 } 587 } else { 588 iq[i] = 0xffffff - j; 589 } 590 } 591 if (q0 > 0) { /* rare case: chance is 1 in 12 */ 592 switch (q0) { 593 case 1: 594 iq[jz - 1] &= 0x7fffff; 595 break; 596 case 2: 597 iq[jz - 1] &= 0x3fffff; 598 break; 599 } 600 } 601 if (ih == 2) { 602 z = one - z; 603 if (carry != 0) z -= scalbn(one, q0); 604 } 605 } 606 607 /* check if recomputation is needed */ 608 if (z == zero) { 609 j = 0; 610 for (i = jz - 1; i >= jk; i--) j |= iq[i]; 611 if (j == 0) { /* need recomputation */ 612 for (k = 1; jk >= k && iq[jk - k] == 0; k++) { 613 /* k = no. of terms needed */ 614 } 615 616 for (i = jz + 1; i <= jz + k; i++) { /* add q[jz+1] to q[jz+k] */ 617 f[jx + i] = ipio2[jv + i]; 618 for (j = 0, fw = 0.0; j <= jx; j++) fw += x[j] * f[jx + i - j]; 619 q[i] = fw; 620 } 621 jz += k; 622 goto recompute; 623 } 624 } 625 626 /* chop off zero terms */ 627 if (z == 0.0) { 628 jz -= 1; 629 q0 -= 24; 630 while (iq[jz] == 0) { 631 jz--; 632 q0 -= 24; 633 } 634 } else { /* break z into 24-bit if necessary */ 635 z = scalbn(z, -q0); 636 if (z >= two24) { 637 fw = static_cast<double>(static_cast<int32_t>(twon24 * z)); 638 iq[jz] = z - two24 * fw; 639 jz += 1; 640 q0 += 24; 641 iq[jz] = fw; 642 } else { 643 iq[jz] = z; 644 } 645 } 646 647 /* convert integer "bit" chunk to floating-point value */ 648 fw = scalbn(one, q0); 649 for (i = jz; i >= 0; i--) { 650 q[i] = fw * iq[i]; 651 fw *= twon24; 652 } 653 654 /* compute PIo2[0,...,jp]*q[jz,...,0] */ 655 for (i = jz; i >= 0; i--) { 656 for (fw = 0.0, k = 0; k <= jp && k <= jz - i; k++) fw += PIo2[k] * q[i + k]; 657 fq[jz - i] = fw; 658 } 659 660 /* compress fq[] into y[] */ 661 switch (prec) { 662 case 0: 663 fw = 0.0; 664 for (i = jz; i >= 0; i--) fw += fq[i]; 665 y[0] = (ih == 0) ? fw : -fw; 666 break; 667 case 1: 668 case 2: 669 fw = 0.0; 670 for (i = jz; i >= 0; i--) fw += fq[i]; 671 y[0] = (ih == 0) ? fw : -fw; 672 fw = fq[0] - fw; 673 for (i = 1; i <= jz; i++) fw += fq[i]; 674 y[1] = (ih == 0) ? fw : -fw; 675 break; 676 case 3: /* painful */ 677 for (i = jz; i > 0; i--) { 678 fw = fq[i - 1] + fq[i]; 679 fq[i] += fq[i - 1] - fw; 680 fq[i - 1] = fw; 681 } 682 for (i = jz; i > 1; i--) { 683 fw = fq[i - 1] + fq[i]; 684 fq[i] += fq[i - 1] - fw; 685 fq[i - 1] = fw; 686 } 687 for (fw = 0.0, i = jz; i >= 2; i--) fw += fq[i]; 688 if (ih == 0) { 689 y[0] = fq[0]; 690 y[1] = fq[1]; 691 y[2] = fw; 692 } else { 693 y[0] = -fq[0]; 694 y[1] = -fq[1]; 695 y[2] = -fw; 696 } 697 } 698 return n & 7; 699} 700 701/* __kernel_sin( x, y, iy) 702 * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854 703 * Input x is assumed to be bounded by ~pi/4 in magnitude. 704 * Input y is the tail of x. 705 * Input iy indicates whether y is 0. (if iy=0, y assume to be 0). 706 * 707 * Algorithm 708 * 1. Since sin(-x) = -sin(x), we need only to consider positive x. 709 * 2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0. 710 * 3. sin(x) is approximated by a polynomial of degree 13 on 711 * [0,pi/4] 712 * 3 13 713 * sin(x) ~ x + S1*x + ... + S6*x 714 * where 715 * 716 * |sin(x) 2 4 6 8 10 12 | -58 717 * |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2 718 * | x | 719 * 720 * 4. sin(x+y) = sin(x) + sin'(x')*y 721 * ~ sin(x) + (1-x*x/2)*y 722 * For better accuracy, let 723 * 3 2 2 2 2 724 * r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6)))) 725 * then 3 2 726 * sin(x) = x + (S1*x + (x *(r-y/2)+y)) 727 */ 728V8_INLINE double __kernel_sin(double x, double y, int iy) { 729 static const double 730 half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */ 731 S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */ 732 S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */ 733 S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */ 734 S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */ 735 S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */ 736 S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */ 737 738 double z, r, v; 739 int32_t ix; 740 GET_HIGH_WORD(ix, x); 741 ix &= 0x7fffffff; /* high word of x */ 742 if (ix < 0x3e400000) { /* |x| < 2**-27 */ 743 if (static_cast<int>(x) == 0) return x; 744 } /* generate inexact */ 745 z = x * x; 746 v = z * x; 747 r = S2 + z * (S3 + z * (S4 + z * (S5 + z * S6))); 748 if (iy == 0) { 749 return x + v * (S1 + z * r); 750 } else { 751 return x - ((z * (half * y - v * r) - y) - v * S1); 752 } 753} 754 755/* __kernel_tan( x, y, k ) 756 * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854 757 * Input x is assumed to be bounded by ~pi/4 in magnitude. 758 * Input y is the tail of x. 759 * Input k indicates whether tan (if k=1) or 760 * -1/tan (if k= -1) is returned. 761 * 762 * Algorithm 763 * 1. Since tan(-x) = -tan(x), we need only to consider positive x. 764 * 2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0. 765 * 3. tan(x) is approximated by a odd polynomial of degree 27 on 766 * [0,0.67434] 767 * 3 27 768 * tan(x) ~ x + T1*x + ... + T13*x 769 * where 770 * 771 * |tan(x) 2 4 26 | -59.2 772 * |----- - (1+T1*x +T2*x +.... +T13*x )| <= 2 773 * | x | 774 * 775 * Note: tan(x+y) = tan(x) + tan'(x)*y 776 * ~ tan(x) + (1+x*x)*y 777 * Therefore, for better accuracy in computing tan(x+y), let 778 * 3 2 2 2 2 779 * r = x *(T2+x *(T3+x *(...+x *(T12+x *T13)))) 780 * then 781 * 3 2 782 * tan(x+y) = x + (T1*x + (x *(r+y)+y)) 783 * 784 * 4. For x in [0.67434,pi/4], let y = pi/4 - x, then 785 * tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y)) 786 * = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y))) 787 */ 788double __kernel_tan(double x, double y, int iy) { 789 static const double xxx[] = { 790 3.33333333333334091986e-01, /* 3FD55555, 55555563 */ 791 1.33333333333201242699e-01, /* 3FC11111, 1110FE7A */ 792 5.39682539762260521377e-02, /* 3FABA1BA, 1BB341FE */ 793 2.18694882948595424599e-02, /* 3F9664F4, 8406D637 */ 794 8.86323982359930005737e-03, /* 3F8226E3, E96E8493 */ 795 3.59207910759131235356e-03, /* 3F6D6D22, C9560328 */ 796 1.45620945432529025516e-03, /* 3F57DBC8, FEE08315 */ 797 5.88041240820264096874e-04, /* 3F4344D8, F2F26501 */ 798 2.46463134818469906812e-04, /* 3F3026F7, 1A8D1068 */ 799 7.81794442939557092300e-05, /* 3F147E88, A03792A6 */ 800 7.14072491382608190305e-05, /* 3F12B80F, 32F0A7E9 */ 801 -1.85586374855275456654e-05, /* BEF375CB, DB605373 */ 802 2.59073051863633712884e-05, /* 3EFB2A70, 74BF7AD4 */ 803 /* one */ 1.00000000000000000000e+00, /* 3FF00000, 00000000 */ 804 /* pio4 */ 7.85398163397448278999e-01, /* 3FE921FB, 54442D18 */ 805 /* pio4lo */ 3.06161699786838301793e-17 /* 3C81A626, 33145C07 */ 806 }; 807#define one xxx[13] 808#define pio4 xxx[14] 809#define pio4lo xxx[15] 810#define T xxx 811 812 double z, r, v, w, s; 813 int32_t ix, hx; 814 815 GET_HIGH_WORD(hx, x); /* high word of x */ 816 ix = hx & 0x7fffffff; /* high word of |x| */ 817 if (ix < 0x3e300000) { /* x < 2**-28 */ 818 if (static_cast<int>(x) == 0) { /* generate inexact */ 819 uint32_t low; 820 GET_LOW_WORD(low, x); 821 if (((ix | low) | (iy + 1)) == 0) { 822 return one / fabs(x); 823 } else { 824 if (iy == 1) { 825 return x; 826 } else { /* compute -1 / (x+y) carefully */ 827 double a, t; 828 829 z = w = x + y; 830 SET_LOW_WORD(z, 0); 831 v = y - (z - x); 832 t = a = -one / w; 833 SET_LOW_WORD(t, 0); 834 s = one + t * z; 835 return t + a * (s + t * v); 836 } 837 } 838 } 839 } 840 if (ix >= 0x3FE59428) { /* |x| >= 0.6744 */ 841 if (hx < 0) { 842 x = -x; 843 y = -y; 844 } 845 z = pio4 - x; 846 w = pio4lo - y; 847 x = z + w; 848 y = 0.0; 849 } 850 z = x * x; 851 w = z * z; 852 /* 853 * Break x^5*(T[1]+x^2*T[2]+...) into 854 * x^5(T[1]+x^4*T[3]+...+x^20*T[11]) + 855 * x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12])) 856 */ 857 r = T[1] + w * (T[3] + w * (T[5] + w * (T[7] + w * (T[9] + w * T[11])))); 858 v = z * 859 (T[2] + w * (T[4] + w * (T[6] + w * (T[8] + w * (T[10] + w * T[12]))))); 860 s = z * x; 861 r = y + z * (s * (r + v) + y); 862 r += T[0] * s; 863 w = x + r; 864 if (ix >= 0x3FE59428) { 865 v = iy; 866 return (1 - ((hx >> 30) & 2)) * (v - 2.0 * (x - (w * w / (w + v) - r))); 867 } 868 if (iy == 1) { 869 return w; 870 } else { 871 /* 872 * if allow error up to 2 ulp, simply return 873 * -1.0 / (x+r) here 874 */ 875 /* compute -1.0 / (x+r) accurately */ 876 double a, t; 877 z = w; 878 SET_LOW_WORD(z, 0); 879 v = r - (z - x); /* z+v = r+x */ 880 t = a = -1.0 / w; /* a = -1.0/w */ 881 SET_LOW_WORD(t, 0); 882 s = 1.0 + t * z; 883 return t + a * (s + t * v); 884 } 885 886#undef one 887#undef pio4 888#undef pio4lo 889#undef T 890} 891 892} // namespace 893 894/* acos(x) 895 * Method : 896 * acos(x) = pi/2 - asin(x) 897 * acos(-x) = pi/2 + asin(x) 898 * For |x|<=0.5 899 * acos(x) = pi/2 - (x + x*x^2*R(x^2)) (see asin.c) 900 * For x>0.5 901 * acos(x) = pi/2 - (pi/2 - 2asin(sqrt((1-x)/2))) 902 * = 2asin(sqrt((1-x)/2)) 903 * = 2s + 2s*z*R(z) ...z=(1-x)/2, s=sqrt(z) 904 * = 2f + (2c + 2s*z*R(z)) 905 * where f=hi part of s, and c = (z-f*f)/(s+f) is the correction term 906 * for f so that f+c ~ sqrt(z). 907 * For x<-0.5 908 * acos(x) = pi - 2asin(sqrt((1-|x|)/2)) 909 * = pi - 0.5*(s+s*z*R(z)), where z=(1-|x|)/2,s=sqrt(z) 910 * 911 * Special cases: 912 * if x is NaN, return x itself; 913 * if |x|>1, return NaN with invalid signal. 914 * 915 * Function needed: sqrt 916 */ 917double acos(double x) { 918 static const double 919 one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */ 920 pi = 3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */ 921 pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */ 922 pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */ 923 pS0 = 1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */ 924 pS1 = -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */ 925 pS2 = 2.01212532134862925881e-01, /* 0x3FC9C155, 0x0E884455 */ 926 pS3 = -4.00555345006794114027e-02, /* 0xBFA48228, 0xB5688F3B */ 927 pS4 = 7.91534994289814532176e-04, /* 0x3F49EFE0, 0x7501B288 */ 928 pS5 = 3.47933107596021167570e-05, /* 0x3F023DE1, 0x0DFDF709 */ 929 qS1 = -2.40339491173441421878e+00, /* 0xC0033A27, 0x1C8A2D4B */ 930 qS2 = 2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */ 931 qS3 = -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */ 932 qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */ 933 934 double z, p, q, r, w, s, c, df; 935 int32_t hx, ix; 936 GET_HIGH_WORD(hx, x); 937 ix = hx & 0x7fffffff; 938 if (ix >= 0x3ff00000) { /* |x| >= 1 */ 939 uint32_t lx; 940 GET_LOW_WORD(lx, x); 941 if (((ix - 0x3ff00000) | lx) == 0) { /* |x|==1 */ 942 if (hx > 0) 943 return 0.0; /* acos(1) = 0 */ 944 else 945 return pi + 2.0 * pio2_lo; /* acos(-1)= pi */ 946 } 947 return (x - x) / (x - x); /* acos(|x|>1) is NaN */ 948 } 949 if (ix < 0x3fe00000) { /* |x| < 0.5 */ 950 if (ix <= 0x3c600000) return pio2_hi + pio2_lo; /*if|x|<2**-57*/ 951 z = x * x; 952 p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5))))); 953 q = one + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4))); 954 r = p / q; 955 return pio2_hi - (x - (pio2_lo - x * r)); 956 } else if (hx < 0) { /* x < -0.5 */ 957 z = (one + x) * 0.5; 958 p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5))))); 959 q = one + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4))); 960 s = sqrt(z); 961 r = p / q; 962 w = r * s - pio2_lo; 963 return pi - 2.0 * (s + w); 964 } else { /* x > 0.5 */ 965 z = (one - x) * 0.5; 966 s = sqrt(z); 967 df = s; 968 SET_LOW_WORD(df, 0); 969 c = (z - df * df) / (s + df); 970 p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5))))); 971 q = one + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4))); 972 r = p / q; 973 w = r * s + c; 974 return 2.0 * (df + w); 975 } 976} 977 978/* acosh(x) 979 * Method : 980 * Based on 981 * acosh(x) = log [ x + sqrt(x*x-1) ] 982 * we have 983 * acosh(x) := log(x)+ln2, if x is large; else 984 * acosh(x) := log(2x-1/(sqrt(x*x-1)+x)) if x>2; else 985 * acosh(x) := log1p(t+sqrt(2.0*t+t*t)); where t=x-1. 986 * 987 * Special cases: 988 * acosh(x) is NaN with signal if x<1. 989 * acosh(NaN) is NaN without signal. 990 */ 991double acosh(double x) { 992 static const double 993 one = 1.0, 994 ln2 = 6.93147180559945286227e-01; /* 0x3FE62E42, 0xFEFA39EF */ 995 double t; 996 int32_t hx; 997 uint32_t lx; 998 EXTRACT_WORDS(hx, lx, x); 999 if (hx < 0x3ff00000) { /* x < 1 */ 1000 return (x - x) / (x - x); 1001 } else if (hx >= 0x41b00000) { /* x > 2**28 */ 1002 if (hx >= 0x7ff00000) { /* x is inf of NaN */ 1003 return x + x; 1004 } else { 1005 return log(x) + ln2; /* acosh(huge)=log(2x) */ 1006 } 1007 } else if (((hx - 0x3ff00000) | lx) == 0) { 1008 return 0.0; /* acosh(1) = 0 */ 1009 } else if (hx > 0x40000000) { /* 2**28 > x > 2 */ 1010 t = x * x; 1011 return log(2.0 * x - one / (x + sqrt(t - one))); 1012 } else { /* 1<x<2 */ 1013 t = x - one; 1014 return log1p(t + sqrt(2.0 * t + t * t)); 1015 } 1016} 1017 1018/* asin(x) 1019 * Method : 1020 * Since asin(x) = x + x^3/6 + x^5*3/40 + x^7*15/336 + ... 1021 * we approximate asin(x) on [0,0.5] by 1022 * asin(x) = x + x*x^2*R(x^2) 1023 * where 1024 * R(x^2) is a rational approximation of (asin(x)-x)/x^3 1025 * and its remez error is bounded by 1026 * |(asin(x)-x)/x^3 - R(x^2)| < 2^(-58.75) 1027 * 1028 * For x in [0.5,1] 1029 * asin(x) = pi/2-2*asin(sqrt((1-x)/2)) 1030 * Let y = (1-x), z = y/2, s := sqrt(z), and pio2_hi+pio2_lo=pi/2; 1031 * then for x>0.98 1032 * asin(x) = pi/2 - 2*(s+s*z*R(z)) 1033 * = pio2_hi - (2*(s+s*z*R(z)) - pio2_lo) 1034 * For x<=0.98, let pio4_hi = pio2_hi/2, then 1035 * f = hi part of s; 1036 * c = sqrt(z) - f = (z-f*f)/(s+f) ...f+c=sqrt(z) 1037 * and 1038 * asin(x) = pi/2 - 2*(s+s*z*R(z)) 1039 * = pio4_hi+(pio4-2s)-(2s*z*R(z)-pio2_lo) 1040 * = pio4_hi+(pio4-2f)-(2s*z*R(z)-(pio2_lo+2c)) 1041 * 1042 * Special cases: 1043 * if x is NaN, return x itself; 1044 * if |x|>1, return NaN with invalid signal. 1045 */ 1046double asin(double x) { 1047 static const double 1048 one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */ 1049 huge = 1.000e+300, 1050 pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */ 1051 pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */ 1052 pio4_hi = 7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */ 1053 /* coefficient for R(x^2) */ 1054 pS0 = 1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */ 1055 pS1 = -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */ 1056 pS2 = 2.01212532134862925881e-01, /* 0x3FC9C155, 0x0E884455 */ 1057 pS3 = -4.00555345006794114027e-02, /* 0xBFA48228, 0xB5688F3B */ 1058 pS4 = 7.91534994289814532176e-04, /* 0x3F49EFE0, 0x7501B288 */ 1059 pS5 = 3.47933107596021167570e-05, /* 0x3F023DE1, 0x0DFDF709 */ 1060 qS1 = -2.40339491173441421878e+00, /* 0xC0033A27, 0x1C8A2D4B */ 1061 qS2 = 2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */ 1062 qS3 = -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */ 1063 qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */ 1064 1065 double t, w, p, q, c, r, s; 1066 int32_t hx, ix; 1067 1068 t = 0; 1069 GET_HIGH_WORD(hx, x); 1070 ix = hx & 0x7fffffff; 1071 if (ix >= 0x3ff00000) { /* |x|>= 1 */ 1072 uint32_t lx; 1073 GET_LOW_WORD(lx, x); 1074 if (((ix - 0x3ff00000) | lx) == 0) /* asin(1)=+-pi/2 with inexact */ 1075 return x * pio2_hi + x * pio2_lo; 1076 return (x - x) / (x - x); /* asin(|x|>1) is NaN */ 1077 } else if (ix < 0x3fe00000) { /* |x|<0.5 */ 1078 if (ix < 0x3e400000) { /* if |x| < 2**-27 */ 1079 if (huge + x > one) return x; /* return x with inexact if x!=0*/ 1080 } else { 1081 t = x * x; 1082 } 1083 p = t * (pS0 + t * (pS1 + t * (pS2 + t * (pS3 + t * (pS4 + t * pS5))))); 1084 q = one + t * (qS1 + t * (qS2 + t * (qS3 + t * qS4))); 1085 w = p / q; 1086 return x + x * w; 1087 } 1088 /* 1> |x|>= 0.5 */ 1089 w = one - fabs(x); 1090 t = w * 0.5; 1091 p = t * (pS0 + t * (pS1 + t * (pS2 + t * (pS3 + t * (pS4 + t * pS5))))); 1092 q = one + t * (qS1 + t * (qS2 + t * (qS3 + t * qS4))); 1093 s = sqrt(t); 1094 if (ix >= 0x3FEF3333) { /* if |x| > 0.975 */ 1095 w = p / q; 1096 t = pio2_hi - (2.0 * (s + s * w) - pio2_lo); 1097 } else { 1098 w = s; 1099 SET_LOW_WORD(w, 0); 1100 c = (t - w * w) / (s + w); 1101 r = p / q; 1102 p = 2.0 * s * r - (pio2_lo - 2.0 * c); 1103 q = pio4_hi - 2.0 * w; 1104 t = pio4_hi - (p - q); 1105 } 1106 if (hx > 0) 1107 return t; 1108 else 1109 return -t; 1110} 1111/* asinh(x) 1112 * Method : 1113 * Based on 1114 * asinh(x) = sign(x) * log [ |x| + sqrt(x*x+1) ] 1115 * we have 1116 * asinh(x) := x if 1+x*x=1, 1117 * := sign(x)*(log(x)+ln2)) for large |x|, else 1118 * := sign(x)*log(2|x|+1/(|x|+sqrt(x*x+1))) if|x|>2, else 1119 * := sign(x)*log1p(|x| + x^2/(1 + sqrt(1+x^2))) 1120 */ 1121double asinh(double x) { 1122 static const double 1123 one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */ 1124 ln2 = 6.93147180559945286227e-01, /* 0x3FE62E42, 0xFEFA39EF */ 1125 huge = 1.00000000000000000000e+300; 1126 1127 double t, w; 1128 int32_t hx, ix; 1129 GET_HIGH_WORD(hx, x); 1130 ix = hx & 0x7fffffff; 1131 if (ix >= 0x7ff00000) return x + x; /* x is inf or NaN */ 1132 if (ix < 0x3e300000) { /* |x|<2**-28 */ 1133 if (huge + x > one) return x; /* return x inexact except 0 */ 1134 } 1135 if (ix > 0x41b00000) { /* |x| > 2**28 */ 1136 w = log(fabs(x)) + ln2; 1137 } else if (ix > 0x40000000) { /* 2**28 > |x| > 2.0 */ 1138 t = fabs(x); 1139 w = log(2.0 * t + one / (sqrt(x * x + one) + t)); 1140 } else { /* 2.0 > |x| > 2**-28 */ 1141 t = x * x; 1142 w = log1p(fabs(x) + t / (one + sqrt(one + t))); 1143 } 1144 if (hx > 0) { 1145 return w; 1146 } else { 1147 return -w; 1148 } 1149} 1150 1151/* atan(x) 1152 * Method 1153 * 1. Reduce x to positive by atan(x) = -atan(-x). 1154 * 2. According to the integer k=4t+0.25 chopped, t=x, the argument 1155 * is further reduced to one of the following intervals and the 1156 * arctangent of t is evaluated by the corresponding formula: 1157 * 1158 * [0,7/16] atan(x) = t-t^3*(a1+t^2*(a2+...(a10+t^2*a11)...) 1159 * [7/16,11/16] atan(x) = atan(1/2) + atan( (t-0.5)/(1+t/2) ) 1160 * [11/16.19/16] atan(x) = atan( 1 ) + atan( (t-1)/(1+t) ) 1161 * [19/16,39/16] atan(x) = atan(3/2) + atan( (t-1.5)/(1+1.5t) ) 1162 * [39/16,INF] atan(x) = atan(INF) + atan( -1/t ) 1163 * 1164 * Constants: 1165 * The hexadecimal values are the intended ones for the following 1166 * constants. The decimal values may be used, provided that the 1167 * compiler will convert from decimal to binary accurately enough 1168 * to produce the hexadecimal values shown. 1169 */ 1170double atan(double x) { 1171 static const double atanhi[] = { 1172 4.63647609000806093515e-01, /* atan(0.5)hi 0x3FDDAC67, 0x0561BB4F */ 1173 7.85398163397448278999e-01, /* atan(1.0)hi 0x3FE921FB, 0x54442D18 */ 1174 9.82793723247329054082e-01, /* atan(1.5)hi 0x3FEF730B, 0xD281F69B */ 1175 1.57079632679489655800e+00, /* atan(inf)hi 0x3FF921FB, 0x54442D18 */ 1176 }; 1177 1178 static const double atanlo[] = { 1179 2.26987774529616870924e-17, /* atan(0.5)lo 0x3C7A2B7F, 0x222F65E2 */ 1180 3.06161699786838301793e-17, /* atan(1.0)lo 0x3C81A626, 0x33145C07 */ 1181 1.39033110312309984516e-17, /* atan(1.5)lo 0x3C700788, 0x7AF0CBBD */ 1182 6.12323399573676603587e-17, /* atan(inf)lo 0x3C91A626, 0x33145C07 */ 1183 }; 1184 1185 static const double aT[] = { 1186 3.33333333333329318027e-01, /* 0x3FD55555, 0x5555550D */ 1187 -1.99999999998764832476e-01, /* 0xBFC99999, 0x9998EBC4 */ 1188 1.42857142725034663711e-01, /* 0x3FC24924, 0x920083FF */ 1189 -1.11111104054623557880e-01, /* 0xBFBC71C6, 0xFE231671 */ 1190 9.09088713343650656196e-02, /* 0x3FB745CD, 0xC54C206E */ 1191 -7.69187620504482999495e-02, /* 0xBFB3B0F2, 0xAF749A6D */ 1192 6.66107313738753120669e-02, /* 0x3FB10D66, 0xA0D03D51 */ 1193 -5.83357013379057348645e-02, /* 0xBFADDE2D, 0x52DEFD9A */ 1194 4.97687799461593236017e-02, /* 0x3FA97B4B, 0x24760DEB */ 1195 -3.65315727442169155270e-02, /* 0xBFA2B444, 0x2C6A6C2F */ 1196 1.62858201153657823623e-02, /* 0x3F90AD3A, 0xE322DA11 */ 1197 }; 1198 1199 static const double one = 1.0, huge = 1.0e300; 1200 1201 double w, s1, s2, z; 1202 int32_t ix, hx, id; 1203 1204 GET_HIGH_WORD(hx, x); 1205 ix = hx & 0x7fffffff; 1206 if (ix >= 0x44100000) { /* if |x| >= 2^66 */ 1207 uint32_t low; 1208 GET_LOW_WORD(low, x); 1209 if (ix > 0x7ff00000 || (ix == 0x7ff00000 && (low != 0))) 1210 return x + x; /* NaN */ 1211 if (hx > 0) 1212 return atanhi[3] + *(volatile double *)&atanlo[3]; 1213 else 1214 return -atanhi[3] - *(volatile double *)&atanlo[3]; 1215 } 1216 if (ix < 0x3fdc0000) { /* |x| < 0.4375 */ 1217 if (ix < 0x3e400000) { /* |x| < 2^-27 */ 1218 if (huge + x > one) return x; /* raise inexact */ 1219 } 1220 id = -1; 1221 } else { 1222 x = fabs(x); 1223 if (ix < 0x3ff30000) { /* |x| < 1.1875 */ 1224 if (ix < 0x3fe60000) { /* 7/16 <=|x|<11/16 */ 1225 id = 0; 1226 x = (2.0 * x - one) / (2.0 + x); 1227 } else { /* 11/16<=|x|< 19/16 */ 1228 id = 1; 1229 x = (x - one) / (x + one); 1230 } 1231 } else { 1232 if (ix < 0x40038000) { /* |x| < 2.4375 */ 1233 id = 2; 1234 x = (x - 1.5) / (one + 1.5 * x); 1235 } else { /* 2.4375 <= |x| < 2^66 */ 1236 id = 3; 1237 x = -1.0 / x; 1238 } 1239 } 1240 } 1241 /* end of argument reduction */ 1242 z = x * x; 1243 w = z * z; 1244 /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */ 1245 s1 = z * (aT[0] + 1246 w * (aT[2] + w * (aT[4] + w * (aT[6] + w * (aT[8] + w * aT[10]))))); 1247 s2 = w * (aT[1] + w * (aT[3] + w * (aT[5] + w * (aT[7] + w * aT[9])))); 1248 if (id < 0) { 1249 return x - x * (s1 + s2); 1250 } else { 1251 z = atanhi[id] - ((x * (s1 + s2) - atanlo[id]) - x); 1252 return (hx < 0) ? -z : z; 1253 } 1254} 1255 1256/* atan2(y,x) 1257 * Method : 1258 * 1. Reduce y to positive by atan2(y,x)=-atan2(-y,x). 1259 * 2. Reduce x to positive by (if x and y are unexceptional): 1260 * ARG (x+iy) = arctan(y/x) ... if x > 0, 1261 * ARG (x+iy) = pi - arctan[y/(-x)] ... if x < 0, 1262 * 1263 * Special cases: 1264 * 1265 * ATAN2((anything), NaN ) is NaN; 1266 * ATAN2(NAN , (anything) ) is NaN; 1267 * ATAN2(+-0, +(anything but NaN)) is +-0 ; 1268 * ATAN2(+-0, -(anything but NaN)) is +-pi ; 1269 * ATAN2(+-(anything but 0 and NaN), 0) is +-pi/2; 1270 * ATAN2(+-(anything but INF and NaN), +INF) is +-0 ; 1271 * ATAN2(+-(anything but INF and NaN), -INF) is +-pi; 1272 * ATAN2(+-INF,+INF ) is +-pi/4 ; 1273 * ATAN2(+-INF,-INF ) is +-3pi/4; 1274 * ATAN2(+-INF, (anything but,0,NaN, and INF)) is +-pi/2; 1275 * 1276 * Constants: 1277 * The hexadecimal values are the intended ones for the following 1278 * constants. The decimal values may be used, provided that the 1279 * compiler will convert from decimal to binary accurately enough 1280 * to produce the hexadecimal values shown. 1281 */ 1282double atan2(double y, double x) { 1283 static volatile double tiny = 1.0e-300; 1284 static const double 1285 zero = 0.0, 1286 pi_o_4 = 7.8539816339744827900E-01, /* 0x3FE921FB, 0x54442D18 */ 1287 pi_o_2 = 1.5707963267948965580E+00, /* 0x3FF921FB, 0x54442D18 */ 1288 pi = 3.1415926535897931160E+00; /* 0x400921FB, 0x54442D18 */ 1289 static volatile double pi_lo = 1290 1.2246467991473531772E-16; /* 0x3CA1A626, 0x33145C07 */ 1291 1292 double z; 1293 int32_t k, m, hx, hy, ix, iy; 1294 uint32_t lx, ly; 1295 1296 EXTRACT_WORDS(hx, lx, x); 1297 ix = hx & 0x7fffffff; 1298 EXTRACT_WORDS(hy, ly, y); 1299 iy = hy & 0x7fffffff; 1300 if (((ix | ((lx | -static_cast<int32_t>(lx)) >> 31)) > 0x7ff00000) || 1301 ((iy | ((ly | -static_cast<int32_t>(ly)) >> 31)) > 0x7ff00000)) { 1302 return x + y; /* x or y is NaN */ 1303 } 1304 if (((hx - 0x3ff00000) | lx) == 0) return atan(y); /* x=1.0 */ 1305 m = ((hy >> 31) & 1) | ((hx >> 30) & 2); /* 2*sign(x)+sign(y) */ 1306 1307 /* when y = 0 */ 1308 if ((iy | ly) == 0) { 1309 switch (m) { 1310 case 0: 1311 case 1: 1312 return y; /* atan(+-0,+anything)=+-0 */ 1313 case 2: 1314 return pi + tiny; /* atan(+0,-anything) = pi */ 1315 case 3: 1316 return -pi - tiny; /* atan(-0,-anything) =-pi */ 1317 } 1318 } 1319 /* when x = 0 */ 1320 if ((ix | lx) == 0) return (hy < 0) ? -pi_o_2 - tiny : pi_o_2 + tiny; 1321 1322 /* when x is INF */ 1323 if (ix == 0x7ff00000) { 1324 if (iy == 0x7ff00000) { 1325 switch (m) { 1326 case 0: 1327 return pi_o_4 + tiny; /* atan(+INF,+INF) */ 1328 case 1: 1329 return -pi_o_4 - tiny; /* atan(-INF,+INF) */ 1330 case 2: 1331 return 3.0 * pi_o_4 + tiny; /*atan(+INF,-INF)*/ 1332 case 3: 1333 return -3.0 * pi_o_4 - tiny; /*atan(-INF,-INF)*/ 1334 } 1335 } else { 1336 switch (m) { 1337 case 0: 1338 return zero; /* atan(+...,+INF) */ 1339 case 1: 1340 return -zero; /* atan(-...,+INF) */ 1341 case 2: 1342 return pi + tiny; /* atan(+...,-INF) */ 1343 case 3: 1344 return -pi - tiny; /* atan(-...,-INF) */ 1345 } 1346 } 1347 } 1348 /* when y is INF */ 1349 if (iy == 0x7ff00000) return (hy < 0) ? -pi_o_2 - tiny : pi_o_2 + tiny; 1350 1351 /* compute y/x */ 1352 k = (iy - ix) >> 20; 1353 if (k > 60) { /* |y/x| > 2**60 */ 1354 z = pi_o_2 + 0.5 * pi_lo; 1355 m &= 1; 1356 } else if (hx < 0 && k < -60) { 1357 z = 0.0; /* 0 > |y|/x > -2**-60 */ 1358 } else { 1359 z = atan(fabs(y / x)); /* safe to do y/x */ 1360 } 1361 switch (m) { 1362 case 0: 1363 return z; /* atan(+,+) */ 1364 case 1: 1365 return -z; /* atan(-,+) */ 1366 case 2: 1367 return pi - (z - pi_lo); /* atan(+,-) */ 1368 default: /* case 3 */ 1369 return (z - pi_lo) - pi; /* atan(-,-) */ 1370 } 1371} 1372 1373/* cos(x) 1374 * Return cosine function of x. 1375 * 1376 * kernel function: 1377 * __kernel_sin ... sine function on [-pi/4,pi/4] 1378 * __kernel_cos ... cosine function on [-pi/4,pi/4] 1379 * __ieee754_rem_pio2 ... argument reduction routine 1380 * 1381 * Method. 1382 * Let S,C and T denote the sin, cos and tan respectively on 1383 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2 1384 * in [-pi/4 , +pi/4], and let n = k mod 4. 1385 * We have 1386 * 1387 * n sin(x) cos(x) tan(x) 1388 * ---------------------------------------------------------- 1389 * 0 S C T 1390 * 1 C -S -1/T 1391 * 2 -S -C T 1392 * 3 -C S -1/T 1393 * ---------------------------------------------------------- 1394 * 1395 * Special cases: 1396 * Let trig be any of sin, cos, or tan. 1397 * trig(+-INF) is NaN, with signals; 1398 * trig(NaN) is that NaN; 1399 * 1400 * Accuracy: 1401 * TRIG(x) returns trig(x) nearly rounded 1402 */ 1403double cos(double x) { 1404 double y[2], z = 0.0; 1405 int32_t n, ix; 1406 1407 /* High word of x. */ 1408 GET_HIGH_WORD(ix, x); 1409 1410 /* |x| ~< pi/4 */ 1411 ix &= 0x7fffffff; 1412 if (ix <= 0x3fe921fb) { 1413 return __kernel_cos(x, z); 1414 } else if (ix >= 0x7ff00000) { 1415 /* cos(Inf or NaN) is NaN */ 1416 return x - x; 1417 } else { 1418 /* argument reduction needed */ 1419 n = __ieee754_rem_pio2(x, y); 1420 switch (n & 3) { 1421 case 0: 1422 return __kernel_cos(y[0], y[1]); 1423 case 1: 1424 return -__kernel_sin(y[0], y[1], 1); 1425 case 2: 1426 return -__kernel_cos(y[0], y[1]); 1427 default: 1428 return __kernel_sin(y[0], y[1], 1); 1429 } 1430 } 1431} 1432 1433/* exp(x) 1434 * Returns the exponential of x. 1435 * 1436 * Method 1437 * 1. Argument reduction: 1438 * Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658. 1439 * Given x, find r and integer k such that 1440 * 1441 * x = k*ln2 + r, |r| <= 0.5*ln2. 1442 * 1443 * Here r will be represented as r = hi-lo for better 1444 * accuracy. 1445 * 1446 * 2. Approximation of exp(r) by a special rational function on 1447 * the interval [0,0.34658]: 1448 * Write 1449 * R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ... 1450 * We use a special Remes algorithm on [0,0.34658] to generate 1451 * a polynomial of degree 5 to approximate R. The maximum error 1452 * of this polynomial approximation is bounded by 2**-59. In 1453 * other words, 1454 * R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5 1455 * (where z=r*r, and the values of P1 to P5 are listed below) 1456 * and 1457 * | 5 | -59 1458 * | 2.0+P1*z+...+P5*z - R(z) | <= 2 1459 * | | 1460 * The computation of exp(r) thus becomes 1461 * 2*r 1462 * exp(r) = 1 + ------- 1463 * R - r 1464 * r*R1(r) 1465 * = 1 + r + ----------- (for better accuracy) 1466 * 2 - R1(r) 1467 * where 1468 * 2 4 10 1469 * R1(r) = r - (P1*r + P2*r + ... + P5*r ). 1470 * 1471 * 3. Scale back to obtain exp(x): 1472 * From step 1, we have 1473 * exp(x) = 2^k * exp(r) 1474 * 1475 * Special cases: 1476 * exp(INF) is INF, exp(NaN) is NaN; 1477 * exp(-INF) is 0, and 1478 * for finite argument, only exp(0)=1 is exact. 1479 * 1480 * Accuracy: 1481 * according to an error analysis, the error is always less than 1482 * 1 ulp (unit in the last place). 1483 * 1484 * Misc. info. 1485 * For IEEE double 1486 * if x > 7.09782712893383973096e+02 then exp(x) overflow 1487 * if x < -7.45133219101941108420e+02 then exp(x) underflow 1488 * 1489 * Constants: 1490 * The hexadecimal values are the intended ones for the following 1491 * constants. The decimal values may be used, provided that the 1492 * compiler will convert from decimal to binary accurately enough 1493 * to produce the hexadecimal values shown. 1494 */ 1495double exp(double x) { 1496 static const double 1497 one = 1.0, 1498 halF[2] = {0.5, -0.5}, 1499 o_threshold = 7.09782712893383973096e+02, /* 0x40862E42, 0xFEFA39EF */ 1500 u_threshold = -7.45133219101941108420e+02, /* 0xc0874910, 0xD52D3051 */ 1501 ln2HI[2] = {6.93147180369123816490e-01, /* 0x3fe62e42, 0xfee00000 */ 1502 -6.93147180369123816490e-01}, /* 0xbfe62e42, 0xfee00000 */ 1503 ln2LO[2] = {1.90821492927058770002e-10, /* 0x3dea39ef, 0x35793c76 */ 1504 -1.90821492927058770002e-10}, /* 0xbdea39ef, 0x35793c76 */ 1505 invln2 = 1.44269504088896338700e+00, /* 0x3ff71547, 0x652b82fe */ 1506 P1 = 1.66666666666666019037e-01, /* 0x3FC55555, 0x5555553E */ 1507 P2 = -2.77777777770155933842e-03, /* 0xBF66C16C, 0x16BEBD93 */ 1508 P3 = 6.61375632143793436117e-05, /* 0x3F11566A, 0xAF25DE2C */ 1509 P4 = -1.65339022054652515390e-06, /* 0xBEBBBD41, 0xC5D26BF1 */ 1510 P5 = 4.13813679705723846039e-08, /* 0x3E663769, 0x72BEA4D0 */ 1511 E = 2.718281828459045; /* 0x4005bf0a, 0x8b145769 */ 1512 1513 static volatile double 1514 huge = 1.0e+300, 1515 twom1000 = 9.33263618503218878990e-302, /* 2**-1000=0x01700000,0*/ 1516 two1023 = 8.988465674311579539e307; /* 0x1p1023 */ 1517 1518 double y, hi = 0.0, lo = 0.0, c, t, twopk; 1519 int32_t k = 0, xsb; 1520 uint32_t hx; 1521 1522 GET_HIGH_WORD(hx, x); 1523 xsb = (hx >> 31) & 1; /* sign bit of x */ 1524 hx &= 0x7fffffff; /* high word of |x| */ 1525 1526 /* filter out non-finite argument */ 1527 if (hx >= 0x40862E42) { /* if |x|>=709.78... */ 1528 if (hx >= 0x7ff00000) { 1529 uint32_t lx; 1530 GET_LOW_WORD(lx, x); 1531 if (((hx & 0xfffff) | lx) != 0) 1532 return x + x; /* NaN */ 1533 else 1534 return (xsb == 0) ? x : 0.0; /* exp(+-inf)={inf,0} */ 1535 } 1536 if (x > o_threshold) return huge * huge; /* overflow */ 1537 if (x < u_threshold) return twom1000 * twom1000; /* underflow */ 1538 } 1539 1540 /* argument reduction */ 1541 if (hx > 0x3fd62e42) { /* if |x| > 0.5 ln2 */ 1542 if (hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */ 1543 /* TODO(rtoy): We special case exp(1) here to return the correct 1544 * value of E, as the computation below would get the last bit 1545 * wrong. We should probably fix the algorithm instead. 1546 */ 1547 if (x == 1.0) return E; 1548 hi = x - ln2HI[xsb]; 1549 lo = ln2LO[xsb]; 1550 k = 1 - xsb - xsb; 1551 } else { 1552 k = static_cast<int>(invln2 * x + halF[xsb]); 1553 t = k; 1554 hi = x - t * ln2HI[0]; /* t*ln2HI is exact here */ 1555 lo = t * ln2LO[0]; 1556 } 1557 STRICT_ASSIGN(double, x, hi - lo); 1558 } else if (hx < 0x3e300000) { /* when |x|<2**-28 */ 1559 if (huge + x > one) return one + x; /* trigger inexact */ 1560 } else { 1561 k = 0; 1562 } 1563 1564 /* x is now in primary range */ 1565 t = x * x; 1566 if (k >= -1021) { 1567 INSERT_WORDS(twopk, 0x3ff00000 + (k << 20), 0); 1568 } else { 1569 INSERT_WORDS(twopk, 0x3ff00000 + ((k + 1000) << 20), 0); 1570 } 1571 c = x - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5)))); 1572 if (k == 0) { 1573 return one - ((x * c) / (c - 2.0) - x); 1574 } else { 1575 y = one - ((lo - (x * c) / (2.0 - c)) - hi); 1576 } 1577 if (k >= -1021) { 1578 if (k == 1024) return y * 2.0 * two1023; 1579 return y * twopk; 1580 } else { 1581 return y * twopk * twom1000; 1582 } 1583} 1584 1585/* 1586 * Method : 1587 * 1.Reduced x to positive by atanh(-x) = -atanh(x) 1588 * 2.For x>=0.5 1589 * 1 2x x 1590 * atanh(x) = --- * log(1 + -------) = 0.5 * log1p(2 * --------) 1591 * 2 1 - x 1 - x 1592 * 1593 * For x<0.5 1594 * atanh(x) = 0.5*log1p(2x+2x*x/(1-x)) 1595 * 1596 * Special cases: 1597 * atanh(x) is NaN if |x| > 1 with signal; 1598 * atanh(NaN) is that NaN with no signal; 1599 * atanh(+-1) is +-INF with signal. 1600 * 1601 */ 1602double atanh(double x) { 1603 static const double one = 1.0, huge = 1e300; 1604 static const double zero = 0.0; 1605 1606 double t; 1607 int32_t hx, ix; 1608 uint32_t lx; 1609 EXTRACT_WORDS(hx, lx, x); 1610 ix = hx & 0x7fffffff; 1611 if ((ix | ((lx | -static_cast<int32_t>(lx)) >> 31)) > 0x3ff00000) /* |x|>1 */ 1612 return (x - x) / (x - x); 1613 if (ix == 0x3ff00000) return x / zero; 1614 if (ix < 0x3e300000 && (huge + x) > zero) return x; /* x<2**-28 */ 1615 SET_HIGH_WORD(x, ix); 1616 if (ix < 0x3fe00000) { /* x < 0.5 */ 1617 t = x + x; 1618 t = 0.5 * log1p(t + t * x / (one - x)); 1619 } else { 1620 t = 0.5 * log1p((x + x) / (one - x)); 1621 } 1622 if (hx >= 0) 1623 return t; 1624 else 1625 return -t; 1626} 1627 1628/* log(x) 1629 * Return the logrithm of x 1630 * 1631 * Method : 1632 * 1. Argument Reduction: find k and f such that 1633 * x = 2^k * (1+f), 1634 * where sqrt(2)/2 < 1+f < sqrt(2) . 1635 * 1636 * 2. Approximation of log(1+f). 1637 * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s) 1638 * = 2s + 2/3 s**3 + 2/5 s**5 + ....., 1639 * = 2s + s*R 1640 * We use a special Reme algorithm on [0,0.1716] to generate 1641 * a polynomial of degree 14 to approximate R The maximum error 1642 * of this polynomial approximation is bounded by 2**-58.45. In 1643 * other words, 1644 * 2 4 6 8 10 12 14 1645 * R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s +Lg6*s +Lg7*s 1646 * (the values of Lg1 to Lg7 are listed in the program) 1647 * and 1648 * | 2 14 | -58.45 1649 * | Lg1*s +...+Lg7*s - R(z) | <= 2 1650 * | | 1651 * Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2. 1652 * In order to guarantee error in log below 1ulp, we compute log 1653 * by 1654 * log(1+f) = f - s*(f - R) (if f is not too large) 1655 * log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy) 1656 * 1657 * 3. Finally, log(x) = k*ln2 + log(1+f). 1658 * = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo))) 1659 * Here ln2 is split into two floating point number: 1660 * ln2_hi + ln2_lo, 1661 * where n*ln2_hi is always exact for |n| < 2000. 1662 * 1663 * Special cases: 1664 * log(x) is NaN with signal if x < 0 (including -INF) ; 1665 * log(+INF) is +INF; log(0) is -INF with signal; 1666 * log(NaN) is that NaN with no signal. 1667 * 1668 * Accuracy: 1669 * according to an error analysis, the error is always less than 1670 * 1 ulp (unit in the last place). 1671 * 1672 * Constants: 1673 * The hexadecimal values are the intended ones for the following 1674 * constants. The decimal values may be used, provided that the 1675 * compiler will convert from decimal to binary accurately enough 1676 * to produce the hexadecimal values shown. 1677 */ 1678double log(double x) { 1679 static const double /* -- */ 1680 ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */ 1681 ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */ 1682 two54 = 1.80143985094819840000e+16, /* 43500000 00000000 */ 1683 Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */ 1684 Lg2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */ 1685 Lg3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */ 1686 Lg4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */ 1687 Lg5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */ 1688 Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */ 1689 Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ 1690 1691 static const double zero = 0.0; 1692 static volatile double vzero = 0.0; 1693 1694 double hfsq, f, s, z, R, w, t1, t2, dk; 1695 int32_t k, hx, i, j; 1696 uint32_t lx; 1697 1698 EXTRACT_WORDS(hx, lx, x); 1699 1700 k = 0; 1701 if (hx < 0x00100000) { /* x < 2**-1022 */ 1702 if (((hx & 0x7fffffff) | lx) == 0) 1703 return -two54 / vzero; /* log(+-0)=-inf */ 1704 if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */ 1705 k -= 54; 1706 x *= two54; /* subnormal number, scale up x */ 1707 GET_HIGH_WORD(hx, x); 1708 } 1709 if (hx >= 0x7ff00000) return x + x; 1710 k += (hx >> 20) - 1023; 1711 hx &= 0x000fffff; 1712 i = (hx + 0x95f64) & 0x100000; 1713 SET_HIGH_WORD(x, hx | (i ^ 0x3ff00000)); /* normalize x or x/2 */ 1714 k += (i >> 20); 1715 f = x - 1.0; 1716 if ((0x000fffff & (2 + hx)) < 3) { /* -2**-20 <= f < 2**-20 */ 1717 if (f == zero) { 1718 if (k == 0) { 1719 return zero; 1720 } else { 1721 dk = static_cast<double>(k); 1722 return dk * ln2_hi + dk * ln2_lo; 1723 } 1724 } 1725 R = f * f * (0.5 - 0.33333333333333333 * f); 1726 if (k == 0) { 1727 return f - R; 1728 } else { 1729 dk = static_cast<double>(k); 1730 return dk * ln2_hi - ((R - dk * ln2_lo) - f); 1731 } 1732 } 1733 s = f / (2.0 + f); 1734 dk = static_cast<double>(k); 1735 z = s * s; 1736 i = hx - 0x6147a; 1737 w = z * z; 1738 j = 0x6b851 - hx; 1739 t1 = w * (Lg2 + w * (Lg4 + w * Lg6)); 1740 t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7))); 1741 i |= j; 1742 R = t2 + t1; 1743 if (i > 0) { 1744 hfsq = 0.5 * f * f; 1745 if (k == 0) 1746 return f - (hfsq - s * (hfsq + R)); 1747 else 1748 return dk * ln2_hi - ((hfsq - (s * (hfsq + R) + dk * ln2_lo)) - f); 1749 } else { 1750 if (k == 0) 1751 return f - s * (f - R); 1752 else 1753 return dk * ln2_hi - ((s * (f - R) - dk * ln2_lo) - f); 1754 } 1755} 1756 1757/* double log1p(double x) 1758 * 1759 * Method : 1760 * 1. Argument Reduction: find k and f such that 1761 * 1+x = 2^k * (1+f), 1762 * where sqrt(2)/2 < 1+f < sqrt(2) . 1763 * 1764 * Note. If k=0, then f=x is exact. However, if k!=0, then f 1765 * may not be representable exactly. In that case, a correction 1766 * term is need. Let u=1+x rounded. Let c = (1+x)-u, then 1767 * log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u), 1768 * and add back the correction term c/u. 1769 * (Note: when x > 2**53, one can simply return log(x)) 1770 * 1771 * 2. Approximation of log1p(f). 1772 * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s) 1773 * = 2s + 2/3 s**3 + 2/5 s**5 + ....., 1774 * = 2s + s*R 1775 * We use a special Reme algorithm on [0,0.1716] to generate 1776 * a polynomial of degree 14 to approximate R The maximum error 1777 * of this polynomial approximation is bounded by 2**-58.45. In 1778 * other words, 1779 * 2 4 6 8 10 12 14 1780 * R(z) ~ Lp1*s +Lp2*s +Lp3*s +Lp4*s +Lp5*s +Lp6*s +Lp7*s 1781 * (the values of Lp1 to Lp7 are listed in the program) 1782 * and 1783 * | 2 14 | -58.45 1784 * | Lp1*s +...+Lp7*s - R(z) | <= 2 1785 * | | 1786 * Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2. 1787 * In order to guarantee error in log below 1ulp, we compute log 1788 * by 1789 * log1p(f) = f - (hfsq - s*(hfsq+R)). 1790 * 1791 * 3. Finally, log1p(x) = k*ln2 + log1p(f). 1792 * = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo))) 1793 * Here ln2 is split into two floating point number: 1794 * ln2_hi + ln2_lo, 1795 * where n*ln2_hi is always exact for |n| < 2000. 1796 * 1797 * Special cases: 1798 * log1p(x) is NaN with signal if x < -1 (including -INF) ; 1799 * log1p(+INF) is +INF; log1p(-1) is -INF with signal; 1800 * log1p(NaN) is that NaN with no signal. 1801 * 1802 * Accuracy: 1803 * according to an error analysis, the error is always less than 1804 * 1 ulp (unit in the last place). 1805 * 1806 * Constants: 1807 * The hexadecimal values are the intended ones for the following 1808 * constants. The decimal values may be used, provided that the 1809 * compiler will convert from decimal to binary accurately enough 1810 * to produce the hexadecimal values shown. 1811 * 1812 * Note: Assuming log() return accurate answer, the following 1813 * algorithm can be used to compute log1p(x) to within a few ULP: 1814 * 1815 * u = 1+x; 1816 * if(u==1.0) return x ; else 1817 * return log(u)*(x/(u-1.0)); 1818 * 1819 * See HP-15C Advanced Functions Handbook, p.193. 1820 */ 1821double log1p(double x) { 1822 static const double /* -- */ 1823 ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */ 1824 ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */ 1825 two54 = 1.80143985094819840000e+16, /* 43500000 00000000 */ 1826 Lp1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */ 1827 Lp2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */ 1828 Lp3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */ 1829 Lp4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */ 1830 Lp5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */ 1831 Lp6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */ 1832 Lp7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ 1833 1834 static const double zero = 0.0; 1835 static volatile double vzero = 0.0; 1836 1837 double hfsq, f, c, s, z, R, u; 1838 int32_t k, hx, hu, ax; 1839 1840 GET_HIGH_WORD(hx, x); 1841 ax = hx & 0x7fffffff; 1842 1843 k = 1; 1844 if (hx < 0x3FDA827A) { /* 1+x < sqrt(2)+ */ 1845 if (ax >= 0x3ff00000) { /* x <= -1.0 */ 1846 if (x == -1.0) 1847 return -two54 / vzero; /* log1p(-1)=+inf */ 1848 else 1849 return (x - x) / (x - x); /* log1p(x<-1)=NaN */ 1850 } 1851 if (ax < 0x3e200000) { /* |x| < 2**-29 */ 1852 if (two54 + x > zero /* raise inexact */ 1853 && ax < 0x3c900000) /* |x| < 2**-54 */ 1854 return x; 1855 else 1856 return x - x * x * 0.5; 1857 } 1858 if (hx > 0 || hx <= static_cast<int32_t>(0xbfd2bec4)) { 1859 k = 0; 1860 f = x; 1861 hu = 1; 1862 } /* sqrt(2)/2- <= 1+x < sqrt(2)+ */ 1863 } 1864 if (hx >= 0x7ff00000) return x + x; 1865 if (k != 0) { 1866 if (hx < 0x43400000) { 1867 STRICT_ASSIGN(double, u, 1.0 + x); 1868 GET_HIGH_WORD(hu, u); 1869 k = (hu >> 20) - 1023; 1870 c = (k > 0) ? 1.0 - (u - x) : x - (u - 1.0); /* correction term */ 1871 c /= u; 1872 } else { 1873 u = x; 1874 GET_HIGH_WORD(hu, u); 1875 k = (hu >> 20) - 1023; 1876 c = 0; 1877 } 1878 hu &= 0x000fffff; 1879 /* 1880 * The approximation to sqrt(2) used in thresholds is not 1881 * critical. However, the ones used above must give less 1882 * strict bounds than the one here so that the k==0 case is 1883 * never reached from here, since here we have committed to 1884 * using the correction term but don't use it if k==0. 1885 */ 1886 if (hu < 0x6a09e) { /* u ~< sqrt(2) */ 1887 SET_HIGH_WORD(u, hu | 0x3ff00000); /* normalize u */ 1888 } else { 1889 k += 1; 1890 SET_HIGH_WORD(u, hu | 0x3fe00000); /* normalize u/2 */ 1891 hu = (0x00100000 - hu) >> 2; 1892 } 1893 f = u - 1.0; 1894 } 1895 hfsq = 0.5 * f * f; 1896 if (hu == 0) { /* |f| < 2**-20 */ 1897 if (f == zero) { 1898 if (k == 0) { 1899 return zero; 1900 } else { 1901 c += k * ln2_lo; 1902 return k * ln2_hi + c; 1903 } 1904 } 1905 R = hfsq * (1.0 - 0.66666666666666666 * f); 1906 if (k == 0) 1907 return f - R; 1908 else 1909 return k * ln2_hi - ((R - (k * ln2_lo + c)) - f); 1910 } 1911 s = f / (2.0 + f); 1912 z = s * s; 1913 R = z * (Lp1 + 1914 z * (Lp2 + z * (Lp3 + z * (Lp4 + z * (Lp5 + z * (Lp6 + z * Lp7)))))); 1915 if (k == 0) 1916 return f - (hfsq - s * (hfsq + R)); 1917 else 1918 return k * ln2_hi - ((hfsq - (s * (hfsq + R) + (k * ln2_lo + c))) - f); 1919} 1920 1921/* 1922 * k_log1p(f): 1923 * Return log(1+f) - f for 1+f in ~[sqrt(2)/2, sqrt(2)]. 1924 * 1925 * The following describes the overall strategy for computing 1926 * logarithms in base e. The argument reduction and adding the final 1927 * term of the polynomial are done by the caller for increased accuracy 1928 * when different bases are used. 1929 * 1930 * Method : 1931 * 1. Argument Reduction: find k and f such that 1932 * x = 2^k * (1+f), 1933 * where sqrt(2)/2 < 1+f < sqrt(2) . 1934 * 1935 * 2. Approximation of log(1+f). 1936 * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s) 1937 * = 2s + 2/3 s**3 + 2/5 s**5 + ....., 1938 * = 2s + s*R 1939 * We use a special Reme algorithm on [0,0.1716] to generate 1940 * a polynomial of degree 14 to approximate R The maximum error 1941 * of this polynomial approximation is bounded by 2**-58.45. In 1942 * other words, 1943 * 2 4 6 8 10 12 14 1944 * R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s +Lg6*s +Lg7*s 1945 * (the values of Lg1 to Lg7 are listed in the program) 1946 * and 1947 * | 2 14 | -58.45 1948 * | Lg1*s +...+Lg7*s - R(z) | <= 2 1949 * | | 1950 * Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2. 1951 * In order to guarantee error in log below 1ulp, we compute log 1952 * by 1953 * log(1+f) = f - s*(f - R) (if f is not too large) 1954 * log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy) 1955 * 1956 * 3. Finally, log(x) = k*ln2 + log(1+f). 1957 * = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo))) 1958 * Here ln2 is split into two floating point number: 1959 * ln2_hi + ln2_lo, 1960 * where n*ln2_hi is always exact for |n| < 2000. 1961 * 1962 * Special cases: 1963 * log(x) is NaN with signal if x < 0 (including -INF) ; 1964 * log(+INF) is +INF; log(0) is -INF with signal; 1965 * log(NaN) is that NaN with no signal. 1966 * 1967 * Accuracy: 1968 * according to an error analysis, the error is always less than 1969 * 1 ulp (unit in the last place). 1970 * 1971 * Constants: 1972 * The hexadecimal values are the intended ones for the following 1973 * constants. The decimal values may be used, provided that the 1974 * compiler will convert from decimal to binary accurately enough 1975 * to produce the hexadecimal values shown. 1976 */ 1977 1978static const double Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */ 1979 Lg2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */ 1980 Lg3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */ 1981 Lg4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */ 1982 Lg5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */ 1983 Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */ 1984 Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ 1985 1986/* 1987 * We always inline k_log1p(), since doing so produces a 1988 * substantial performance improvement (~40% on amd64). 1989 */ 1990static inline double k_log1p(double f) { 1991 double hfsq, s, z, R, w, t1, t2; 1992 1993 s = f / (2.0 + f); 1994 z = s * s; 1995 w = z * z; 1996 t1 = w * (Lg2 + w * (Lg4 + w * Lg6)); 1997 t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7))); 1998 R = t2 + t1; 1999 hfsq = 0.5 * f * f; 2000 return s * (hfsq + R); 2001} 2002 2003/* 2004 * Return the base 2 logarithm of x. See e_log.c and k_log.h for most 2005 * comments. 2006 * 2007 * This reduces x to {k, 1+f} exactly as in e_log.c, then calls the kernel, 2008 * then does the combining and scaling steps 2009 * log2(x) = (f - 0.5*f*f + k_log1p(f)) / ln2 + k 2010 * in not-quite-routine extra precision. 2011 */ 2012double log2(double x) { 2013 static const double 2014 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */ 2015 ivln2hi = 1.44269504072144627571e+00, /* 0x3ff71547, 0x65200000 */ 2016 ivln2lo = 1.67517131648865118353e-10; /* 0x3de705fc, 0x2eefa200 */ 2017 2018 static const double zero = 0.0; 2019 static volatile double vzero = 0.0; 2020 2021 double f, hfsq, hi, lo, r, val_hi, val_lo, w, y; 2022 int32_t i, k, hx; 2023 uint32_t lx; 2024 2025 EXTRACT_WORDS(hx, lx, x); 2026 2027 k = 0; 2028 if (hx < 0x00100000) { /* x < 2**-1022 */ 2029 if (((hx & 0x7fffffff) | lx) == 0) 2030 return -two54 / vzero; /* log(+-0)=-inf */ 2031 if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */ 2032 k -= 54; 2033 x *= two54; /* subnormal number, scale up x */ 2034 GET_HIGH_WORD(hx, x); 2035 } 2036 if (hx >= 0x7ff00000) return x + x; 2037 if (hx == 0x3ff00000 && lx == 0) return zero; /* log(1) = +0 */ 2038 k += (hx >> 20) - 1023; 2039 hx &= 0x000fffff; 2040 i = (hx + 0x95f64) & 0x100000; 2041 SET_HIGH_WORD(x, hx | (i ^ 0x3ff00000)); /* normalize x or x/2 */ 2042 k += (i >> 20); 2043 y = static_cast<double>(k); 2044 f = x - 1.0; 2045 hfsq = 0.5 * f * f; 2046 r = k_log1p(f); 2047 2048 /* 2049 * f-hfsq must (for args near 1) be evaluated in extra precision 2050 * to avoid a large cancellation when x is near sqrt(2) or 1/sqrt(2). 2051 * This is fairly efficient since f-hfsq only depends on f, so can 2052 * be evaluated in parallel with R. Not combining hfsq with R also 2053 * keeps R small (though not as small as a true `lo' term would be), 2054 * so that extra precision is not needed for terms involving R. 2055 * 2056 * Compiler bugs involving extra precision used to break Dekker's 2057 * theorem for spitting f-hfsq as hi+lo, unless double_t was used 2058 * or the multi-precision calculations were avoided when double_t 2059 * has extra precision. These problems are now automatically 2060 * avoided as a side effect of the optimization of combining the 2061 * Dekker splitting step with the clear-low-bits step. 2062 * 2063 * y must (for args near sqrt(2) and 1/sqrt(2)) be added in extra 2064 * precision to avoid a very large cancellation when x is very near 2065 * these values. Unlike the above cancellations, this problem is 2066 * specific to base 2. It is strange that adding +-1 is so much 2067 * harder than adding +-ln2 or +-log10_2. 2068 * 2069 * This uses Dekker's theorem to normalize y+val_hi, so the 2070 * compiler bugs are back in some configurations, sigh. And I 2071 * don't want to used double_t to avoid them, since that gives a 2072 * pessimization and the support for avoiding the pessimization 2073 * is not yet available. 2074 * 2075 * The multi-precision calculations for the multiplications are 2076 * routine. 2077 */ 2078 hi = f - hfsq; 2079 SET_LOW_WORD(hi, 0); 2080 lo = (f - hi) - hfsq + r; 2081 val_hi = hi * ivln2hi; 2082 val_lo = (lo + hi) * ivln2lo + lo * ivln2hi; 2083 2084 /* spadd(val_hi, val_lo, y), except for not using double_t: */ 2085 w = y + val_hi; 2086 val_lo += (y - w) + val_hi; 2087 val_hi = w; 2088 2089 return val_lo + val_hi; 2090} 2091 2092/* 2093 * Return the base 10 logarithm of x 2094 * 2095 * Method : 2096 * Let log10_2hi = leading 40 bits of log10(2) and 2097 * log10_2lo = log10(2) - log10_2hi, 2098 * ivln10 = 1/log(10) rounded. 2099 * Then 2100 * n = ilogb(x), 2101 * if(n<0) n = n+1; 2102 * x = scalbn(x,-n); 2103 * log10(x) := n*log10_2hi + (n*log10_2lo + ivln10*log(x)) 2104 * 2105 * Note 1: 2106 * To guarantee log10(10**n)=n, where 10**n is normal, the rounding 2107 * mode must set to Round-to-Nearest. 2108 * Note 2: 2109 * [1/log(10)] rounded to 53 bits has error .198 ulps; 2110 * log10 is monotonic at all binary break points. 2111 * 2112 * Special cases: 2113 * log10(x) is NaN if x < 0; 2114 * log10(+INF) is +INF; log10(0) is -INF; 2115 * log10(NaN) is that NaN; 2116 * log10(10**N) = N for N=0,1,...,22. 2117 */ 2118double log10(double x) { 2119 static const double 2120 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */ 2121 ivln10 = 4.34294481903251816668e-01, 2122 log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */ 2123 log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */ 2124 2125 static const double zero = 0.0; 2126 static volatile double vzero = 0.0; 2127 2128 double y; 2129 int32_t i, k, hx; 2130 uint32_t lx; 2131 2132 EXTRACT_WORDS(hx, lx, x); 2133 2134 k = 0; 2135 if (hx < 0x00100000) { /* x < 2**-1022 */ 2136 if (((hx & 0x7fffffff) | lx) == 0) 2137 return -two54 / vzero; /* log(+-0)=-inf */ 2138 if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */ 2139 k -= 54; 2140 x *= two54; /* subnormal number, scale up x */ 2141 GET_HIGH_WORD(hx, x); 2142 GET_LOW_WORD(lx, x); 2143 } 2144 if (hx >= 0x7ff00000) return x + x; 2145 if (hx == 0x3ff00000 && lx == 0) return zero; /* log(1) = +0 */ 2146 k += (hx >> 20) - 1023; 2147 2148 i = (k & 0x80000000) >> 31; 2149 hx = (hx & 0x000fffff) | ((0x3ff - i) << 20); 2150 y = k + i; 2151 SET_HIGH_WORD(x, hx); 2152 SET_LOW_WORD(x, lx); 2153 2154 double z = y * log10_2lo + ivln10 * log(x); 2155 return z + y * log10_2hi; 2156} 2157 2158/* expm1(x) 2159 * Returns exp(x)-1, the exponential of x minus 1. 2160 * 2161 * Method 2162 * 1. Argument reduction: 2163 * Given x, find r and integer k such that 2164 * 2165 * x = k*ln2 + r, |r| <= 0.5*ln2 ~ 0.34658 2166 * 2167 * Here a correction term c will be computed to compensate 2168 * the error in r when rounded to a floating-point number. 2169 * 2170 * 2. Approximating expm1(r) by a special rational function on 2171 * the interval [0,0.34658]: 2172 * Since 2173 * r*(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 - r^4/360 + ... 2174 * we define R1(r*r) by 2175 * r*(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 * R1(r*r) 2176 * That is, 2177 * R1(r**2) = 6/r *((exp(r)+1)/(exp(r)-1) - 2/r) 2178 * = 6/r * ( 1 + 2.0*(1/(exp(r)-1) - 1/r)) 2179 * = 1 - r^2/60 + r^4/2520 - r^6/100800 + ... 2180 * We use a special Reme algorithm on [0,0.347] to generate 2181 * a polynomial of degree 5 in r*r to approximate R1. The 2182 * maximum error of this polynomial approximation is bounded 2183 * by 2**-61. In other words, 2184 * R1(z) ~ 1.0 + Q1*z + Q2*z**2 + Q3*z**3 + Q4*z**4 + Q5*z**5 2185 * where Q1 = -1.6666666666666567384E-2, 2186 * Q2 = 3.9682539681370365873E-4, 2187 * Q3 = -9.9206344733435987357E-6, 2188 * Q4 = 2.5051361420808517002E-7, 2189 * Q5 = -6.2843505682382617102E-9; 2190 * z = r*r, 2191 * with error bounded by 2192 * | 5 | -61 2193 * | 1.0+Q1*z+...+Q5*z - R1(z) | <= 2 2194 * | | 2195 * 2196 * expm1(r) = exp(r)-1 is then computed by the following 2197 * specific way which minimize the accumulation rounding error: 2198 * 2 3 2199 * r r [ 3 - (R1 + R1*r/2) ] 2200 * expm1(r) = r + --- + --- * [--------------------] 2201 * 2 2 [ 6 - r*(3 - R1*r/2) ] 2202 * 2203 * To compensate the error in the argument reduction, we use 2204 * expm1(r+c) = expm1(r) + c + expm1(r)*c 2205 * ~ expm1(r) + c + r*c 2206 * Thus c+r*c will be added in as the correction terms for 2207 * expm1(r+c). Now rearrange the term to avoid optimization 2208 * screw up: 2209 * ( 2 2 ) 2210 * ({ ( r [ R1 - (3 - R1*r/2) ] ) } r ) 2211 * expm1(r+c)~r - ({r*(--- * [--------------------]-c)-c} - --- ) 2212 * ({ ( 2 [ 6 - r*(3 - R1*r/2) ] ) } 2 ) 2213 * ( ) 2214 * 2215 * = r - E 2216 * 3. Scale back to obtain expm1(x): 2217 * From step 1, we have 2218 * expm1(x) = either 2^k*[expm1(r)+1] - 1 2219 * = or 2^k*[expm1(r) + (1-2^-k)] 2220 * 4. Implementation notes: 2221 * (A). To save one multiplication, we scale the coefficient Qi 2222 * to Qi*2^i, and replace z by (x^2)/2. 2223 * (B). To achieve maximum accuracy, we compute expm1(x) by 2224 * (i) if x < -56*ln2, return -1.0, (raise inexact if x!=inf) 2225 * (ii) if k=0, return r-E 2226 * (iii) if k=-1, return 0.5*(r-E)-0.5 2227 * (iv) if k=1 if r < -0.25, return 2*((r+0.5)- E) 2228 * else return 1.0+2.0*(r-E); 2229 * (v) if (k<-2||k>56) return 2^k(1-(E-r)) - 1 (or exp(x)-1) 2230 * (vi) if k <= 20, return 2^k((1-2^-k)-(E-r)), else 2231 * (vii) return 2^k(1-((E+2^-k)-r)) 2232 * 2233 * Special cases: 2234 * expm1(INF) is INF, expm1(NaN) is NaN; 2235 * expm1(-INF) is -1, and 2236 * for finite argument, only expm1(0)=0 is exact. 2237 * 2238 * Accuracy: 2239 * according to an error analysis, the error is always less than 2240 * 1 ulp (unit in the last place). 2241 * 2242 * Misc. info. 2243 * For IEEE double 2244 * if x > 7.09782712893383973096e+02 then expm1(x) overflow 2245 * 2246 * Constants: 2247 * The hexadecimal values are the intended ones for the following 2248 * constants. The decimal values may be used, provided that the 2249 * compiler will convert from decimal to binary accurately enough 2250 * to produce the hexadecimal values shown. 2251 */ 2252double expm1(double x) { 2253 static const double 2254 one = 1.0, 2255 tiny = 1.0e-300, 2256 o_threshold = 7.09782712893383973096e+02, /* 0x40862E42, 0xFEFA39EF */ 2257 ln2_hi = 6.93147180369123816490e-01, /* 0x3fe62e42, 0xfee00000 */ 2258 ln2_lo = 1.90821492927058770002e-10, /* 0x3dea39ef, 0x35793c76 */ 2259 invln2 = 1.44269504088896338700e+00, /* 0x3ff71547, 0x652b82fe */ 2260 /* Scaled Q's: Qn_here = 2**n * Qn_above, for R(2*z) where z = hxs = 2261 x*x/2: */ 2262 Q1 = -3.33333333333331316428e-02, /* BFA11111 111110F4 */ 2263 Q2 = 1.58730158725481460165e-03, /* 3F5A01A0 19FE5585 */ 2264 Q3 = -7.93650757867487942473e-05, /* BF14CE19 9EAADBB7 */ 2265 Q4 = 4.00821782732936239552e-06, /* 3ED0CFCA 86E65239 */ 2266 Q5 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */ 2267 2268 static volatile double huge = 1.0e+300; 2269 2270 double y, hi, lo, c, t, e, hxs, hfx, r1, twopk; 2271 int32_t k, xsb; 2272 uint32_t hx; 2273 2274 GET_HIGH_WORD(hx, x); 2275 xsb = hx & 0x80000000; /* sign bit of x */ 2276 hx &= 0x7fffffff; /* high word of |x| */ 2277 2278 /* filter out huge and non-finite argument */ 2279 if (hx >= 0x4043687A) { /* if |x|>=56*ln2 */ 2280 if (hx >= 0x40862E42) { /* if |x|>=709.78... */ 2281 if (hx >= 0x7ff00000) { 2282 uint32_t low; 2283 GET_LOW_WORD(low, x); 2284 if (((hx & 0xfffff) | low) != 0) 2285 return x + x; /* NaN */ 2286 else 2287 return (xsb == 0) ? x : -1.0; /* exp(+-inf)={inf,-1} */ 2288 } 2289 if (x > o_threshold) return huge * huge; /* overflow */ 2290 } 2291 if (xsb != 0) { /* x < -56*ln2, return -1.0 with inexact */ 2292 if (x + tiny < 0.0) /* raise inexact */ 2293 return tiny - one; /* return -1 */ 2294 } 2295 } 2296 2297 /* argument reduction */ 2298 if (hx > 0x3fd62e42) { /* if |x| > 0.5 ln2 */ 2299 if (hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */ 2300 if (xsb == 0) { 2301 hi = x - ln2_hi; 2302 lo = ln2_lo; 2303 k = 1; 2304 } else { 2305 hi = x + ln2_hi; 2306 lo = -ln2_lo; 2307 k = -1; 2308 } 2309 } else { 2310 k = invln2 * x + ((xsb == 0) ? 0.5 : -0.5); 2311 t = k; 2312 hi = x - t * ln2_hi; /* t*ln2_hi is exact here */ 2313 lo = t * ln2_lo; 2314 } 2315 STRICT_ASSIGN(double, x, hi - lo); 2316 c = (hi - x) - lo; 2317 } else if (hx < 0x3c900000) { /* when |x|<2**-54, return x */ 2318 t = huge + x; /* return x with inexact flags when x!=0 */ 2319 return x - (t - (huge + x)); 2320 } else { 2321 k = 0; 2322 } 2323 2324 /* x is now in primary range */ 2325 hfx = 0.5 * x; 2326 hxs = x * hfx; 2327 r1 = one + hxs * (Q1 + hxs * (Q2 + hxs * (Q3 + hxs * (Q4 + hxs * Q5)))); 2328 t = 3.0 - r1 * hfx; 2329 e = hxs * ((r1 - t) / (6.0 - x * t)); 2330 if (k == 0) { 2331 return x - (x * e - hxs); /* c is 0 */ 2332 } else { 2333 INSERT_WORDS(twopk, 0x3ff00000 + (k << 20), 0); /* 2^k */ 2334 e = (x * (e - c) - c); 2335 e -= hxs; 2336 if (k == -1) return 0.5 * (x - e) - 0.5; 2337 if (k == 1) { 2338 if (x < -0.25) 2339 return -2.0 * (e - (x + 0.5)); 2340 else 2341 return one + 2.0 * (x - e); 2342 } 2343 if (k <= -2 || k > 56) { /* suffice to return exp(x)-1 */ 2344 y = one - (e - x); 2345 // TODO(mvstanton): is this replacement for the hex float 2346 // sufficient? 2347 // if (k == 1024) y = y*2.0*0x1p1023; 2348 if (k == 1024) 2349 y = y * 2.0 * 8.98846567431158e+307; 2350 else 2351 y = y * twopk; 2352 return y - one; 2353 } 2354 t = one; 2355 if (k < 20) { 2356 SET_HIGH_WORD(t, 0x3ff00000 - (0x200000 >> k)); /* t=1-2^-k */ 2357 y = t - (e - x); 2358 y = y * twopk; 2359 } else { 2360 SET_HIGH_WORD(t, ((0x3ff - k) << 20)); /* 2^-k */ 2361 y = x - (e + t); 2362 y += one; 2363 y = y * twopk; 2364 } 2365 } 2366 return y; 2367} 2368 2369double cbrt(double x) { 2370 static const uint32_t 2371 B1 = 715094163, /* B1 = (1023-1023/3-0.03306235651)*2**20 */ 2372 B2 = 696219795; /* B2 = (1023-1023/3-54/3-0.03306235651)*2**20 */ 2373 2374 /* |1/cbrt(x) - p(x)| < 2**-23.5 (~[-7.93e-8, 7.929e-8]). */ 2375 static const double P0 = 1.87595182427177009643, /* 0x3ffe03e6, 0x0f61e692 */ 2376 P1 = -1.88497979543377169875, /* 0xbffe28e0, 0x92f02420 */ 2377 P2 = 1.621429720105354466140, /* 0x3ff9f160, 0x4a49d6c2 */ 2378 P3 = -0.758397934778766047437, /* 0xbfe844cb, 0xbee751d9 */ 2379 P4 = 0.145996192886612446982; /* 0x3fc2b000, 0xd4e4edd7 */ 2380 2381 int32_t hx; 2382 union { 2383 double value; 2384 uint64_t bits; 2385 } u; 2386 double r, s, t = 0.0, w; 2387 uint32_t sign; 2388 uint32_t high, low; 2389 2390 EXTRACT_WORDS(hx, low, x); 2391 sign = hx & 0x80000000; /* sign= sign(x) */ 2392 hx ^= sign; 2393 if (hx >= 0x7ff00000) return (x + x); /* cbrt(NaN,INF) is itself */ 2394 2395 /* 2396 * Rough cbrt to 5 bits: 2397 * cbrt(2**e*(1+m) ~= 2**(e/3)*(1+(e%3+m)/3) 2398 * where e is integral and >= 0, m is real and in [0, 1), and "/" and 2399 * "%" are integer division and modulus with rounding towards minus 2400 * infinity. The RHS is always >= the LHS and has a maximum relative 2401 * error of about 1 in 16. Adding a bias of -0.03306235651 to the 2402 * (e%3+m)/3 term reduces the error to about 1 in 32. With the IEEE 2403 * floating point representation, for finite positive normal values, 2404 * ordinary integer divison of the value in bits magically gives 2405 * almost exactly the RHS of the above provided we first subtract the 2406 * exponent bias (1023 for doubles) and later add it back. We do the 2407 * subtraction virtually to keep e >= 0 so that ordinary integer 2408 * division rounds towards minus infinity; this is also efficient. 2409 */ 2410 if (hx < 0x00100000) { /* zero or subnormal? */ 2411 if ((hx | low) == 0) return (x); /* cbrt(0) is itself */ 2412 SET_HIGH_WORD(t, 0x43500000); /* set t= 2**54 */ 2413 t *= x; 2414 GET_HIGH_WORD(high, t); 2415 INSERT_WORDS(t, sign | ((high & 0x7fffffff) / 3 + B2), 0); 2416 } else { 2417 INSERT_WORDS(t, sign | (hx / 3 + B1), 0); 2418 } 2419 2420 /* 2421 * New cbrt to 23 bits: 2422 * cbrt(x) = t*cbrt(x/t**3) ~= t*P(t**3/x) 2423 * where P(r) is a polynomial of degree 4 that approximates 1/cbrt(r) 2424 * to within 2**-23.5 when |r - 1| < 1/10. The rough approximation 2425 * has produced t such than |t/cbrt(x) - 1| ~< 1/32, and cubing this 2426 * gives us bounds for r = t**3/x. 2427 * 2428 * Try to optimize for parallel evaluation as in k_tanf.c. 2429 */ 2430 r = (t * t) * (t / x); 2431 t = t * ((P0 + r * (P1 + r * P2)) + ((r * r) * r) * (P3 + r * P4)); 2432 2433 /* 2434 * Round t away from zero to 23 bits (sloppily except for ensuring that 2435 * the result is larger in magnitude than cbrt(x) but not much more than 2436 * 2 23-bit ulps larger). With rounding towards zero, the error bound 2437 * would be ~5/6 instead of ~4/6. With a maximum error of 2 23-bit ulps 2438 * in the rounded t, the infinite-precision error in the Newton 2439 * approximation barely affects third digit in the final error 2440 * 0.667; the error in the rounded t can be up to about 3 23-bit ulps 2441 * before the final error is larger than 0.667 ulps. 2442 */ 2443 u.value = t; 2444 u.bits = (u.bits + 0x80000000) & 0xffffffffc0000000ULL; 2445 t = u.value; 2446 2447 /* one step Newton iteration to 53 bits with error < 0.667 ulps */ 2448 s = t * t; /* t*t is exact */ 2449 r = x / s; /* error <= 0.5 ulps; |r| < |t| */ 2450 w = t + t; /* t+t is exact */ 2451 r = (r - t) / (w + r); /* r-t is exact; w+r ~= 3*t */ 2452 t = t + t * r; /* error <= 0.5 + 0.5/3 + epsilon */ 2453 2454 return (t); 2455} 2456 2457/* sin(x) 2458 * Return sine function of x. 2459 * 2460 * kernel function: 2461 * __kernel_sin ... sine function on [-pi/4,pi/4] 2462 * __kernel_cos ... cose function on [-pi/4,pi/4] 2463 * __ieee754_rem_pio2 ... argument reduction routine 2464 * 2465 * Method. 2466 * Let S,C and T denote the sin, cos and tan respectively on 2467 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2 2468 * in [-pi/4 , +pi/4], and let n = k mod 4. 2469 * We have 2470 * 2471 * n sin(x) cos(x) tan(x) 2472 * ---------------------------------------------------------- 2473 * 0 S C T 2474 * 1 C -S -1/T 2475 * 2 -S -C T 2476 * 3 -C S -1/T 2477 * ---------------------------------------------------------- 2478 * 2479 * Special cases: 2480 * Let trig be any of sin, cos, or tan. 2481 * trig(+-INF) is NaN, with signals; 2482 * trig(NaN) is that NaN; 2483 * 2484 * Accuracy: 2485 * TRIG(x) returns trig(x) nearly rounded 2486 */ 2487double sin(double x) { 2488 double y[2], z = 0.0; 2489 int32_t n, ix; 2490 2491 /* High word of x. */ 2492 GET_HIGH_WORD(ix, x); 2493 2494 /* |x| ~< pi/4 */ 2495 ix &= 0x7fffffff; 2496 if (ix <= 0x3fe921fb) { 2497 return __kernel_sin(x, z, 0); 2498 } else if (ix >= 0x7ff00000) { 2499 /* sin(Inf or NaN) is NaN */ 2500 return x - x; 2501 } else { 2502 /* argument reduction needed */ 2503 n = __ieee754_rem_pio2(x, y); 2504 switch (n & 3) { 2505 case 0: 2506 return __kernel_sin(y[0], y[1], 1); 2507 case 1: 2508 return __kernel_cos(y[0], y[1]); 2509 case 2: 2510 return -__kernel_sin(y[0], y[1], 1); 2511 default: 2512 return -__kernel_cos(y[0], y[1]); 2513 } 2514 } 2515} 2516 2517/* tan(x) 2518 * Return tangent function of x. 2519 * 2520 * kernel function: 2521 * __kernel_tan ... tangent function on [-pi/4,pi/4] 2522 * __ieee754_rem_pio2 ... argument reduction routine 2523 * 2524 * Method. 2525 * Let S,C and T denote the sin, cos and tan respectively on 2526 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2 2527 * in [-pi/4 , +pi/4], and let n = k mod 4. 2528 * We have 2529 * 2530 * n sin(x) cos(x) tan(x) 2531 * ---------------------------------------------------------- 2532 * 0 S C T 2533 * 1 C -S -1/T 2534 * 2 -S -C T 2535 * 3 -C S -1/T 2536 * ---------------------------------------------------------- 2537 * 2538 * Special cases: 2539 * Let trig be any of sin, cos, or tan. 2540 * trig(+-INF) is NaN, with signals; 2541 * trig(NaN) is that NaN; 2542 * 2543 * Accuracy: 2544 * TRIG(x) returns trig(x) nearly rounded 2545 */ 2546double tan(double x) { 2547 double y[2], z = 0.0; 2548 int32_t n, ix; 2549 2550 /* High word of x. */ 2551 GET_HIGH_WORD(ix, x); 2552 2553 /* |x| ~< pi/4 */ 2554 ix &= 0x7fffffff; 2555 if (ix <= 0x3fe921fb) { 2556 return __kernel_tan(x, z, 1); 2557 } else if (ix >= 0x7ff00000) { 2558 /* tan(Inf or NaN) is NaN */ 2559 return x - x; /* NaN */ 2560 } else { 2561 /* argument reduction needed */ 2562 n = __ieee754_rem_pio2(x, y); 2563 /* 1 -> n even, -1 -> n odd */ 2564 return __kernel_tan(y[0], y[1], 1 - ((n & 1) << 1)); 2565 } 2566} 2567 2568/* 2569 * ES6 draft 09-27-13, section 20.2.2.12. 2570 * Math.cosh 2571 * Method : 2572 * mathematically cosh(x) if defined to be (exp(x)+exp(-x))/2 2573 * 1. Replace x by |x| (cosh(x) = cosh(-x)). 2574 * 2. 2575 * [ exp(x) - 1 ]^2 2576 * 0 <= x <= ln2/2 : cosh(x) := 1 + ------------------- 2577 * 2*exp(x) 2578 * 2579 * exp(x) + 1/exp(x) 2580 * ln2/2 <= x <= 22 : cosh(x) := ------------------- 2581 * 2 2582 * 22 <= x <= lnovft : cosh(x) := exp(x)/2 2583 * lnovft <= x <= ln2ovft: cosh(x) := exp(x/2)/2 * exp(x/2) 2584 * ln2ovft < x : cosh(x) := huge*huge (overflow) 2585 * 2586 * Special cases: 2587 * cosh(x) is |x| if x is +INF, -INF, or NaN. 2588 * only cosh(0)=1 is exact for finite x. 2589 */ 2590double cosh(double x) { 2591 static const double KCOSH_OVERFLOW = 710.4758600739439; 2592 static const double one = 1.0, half = 0.5; 2593 static volatile double huge = 1.0e+300; 2594 2595 int32_t ix; 2596 2597 /* High word of |x|. */ 2598 GET_HIGH_WORD(ix, x); 2599 ix &= 0x7fffffff; 2600 2601 // |x| in [0,0.5*log2], return 1+expm1(|x|)^2/(2*exp(|x|)) 2602 if (ix < 0x3fd62e43) { 2603 double t = expm1(fabs(x)); 2604 double w = one + t; 2605 // For |x| < 2^-55, cosh(x) = 1 2606 if (ix < 0x3c800000) return w; 2607 return one + (t * t) / (w + w); 2608 } 2609 2610 // |x| in [0.5*log2, 22], return (exp(|x|)+1/exp(|x|)/2 2611 if (ix < 0x40360000) { 2612 double t = exp(fabs(x)); 2613 return half * t + half / t; 2614 } 2615 2616 // |x| in [22, log(maxdouble)], return half*exp(|x|) 2617 if (ix < 0x40862e42) return half * exp(fabs(x)); 2618 2619 // |x| in [log(maxdouble), overflowthreshold] 2620 if (fabs(x) <= KCOSH_OVERFLOW) { 2621 double w = exp(half * fabs(x)); 2622 double t = half * w; 2623 return t * w; 2624 } 2625 2626 /* x is INF or NaN */ 2627 if (ix >= 0x7ff00000) return x * x; 2628 2629 // |x| > overflowthreshold. 2630 return huge * huge; 2631} 2632 2633/* 2634 * ES6 draft 09-27-13, section 20.2.2.30. 2635 * Math.sinh 2636 * Method : 2637 * mathematically sinh(x) if defined to be (exp(x)-exp(-x))/2 2638 * 1. Replace x by |x| (sinh(-x) = -sinh(x)). 2639 * 2. 2640 * E + E/(E+1) 2641 * 0 <= x <= 22 : sinh(x) := --------------, E=expm1(x) 2642 * 2 2643 * 2644 * 22 <= x <= lnovft : sinh(x) := exp(x)/2 2645 * lnovft <= x <= ln2ovft: sinh(x) := exp(x/2)/2 * exp(x/2) 2646 * ln2ovft < x : sinh(x) := x*shuge (overflow) 2647 * 2648 * Special cases: 2649 * sinh(x) is |x| if x is +Infinity, -Infinity, or NaN. 2650 * only sinh(0)=0 is exact for finite x. 2651 */ 2652double sinh(double x) { 2653 static const double KSINH_OVERFLOW = 710.4758600739439, 2654 TWO_M28 = 2655 3.725290298461914e-9, // 2^-28, empty lower half 2656 LOG_MAXD = 709.7822265625; // 0x40862e42 00000000, empty lower half 2657 static const double shuge = 1.0e307; 2658 2659 double h = (x < 0) ? -0.5 : 0.5; 2660 // |x| in [0, 22]. return sign(x)*0.5*(E+E/(E+1)) 2661 double ax = fabs(x); 2662 if (ax < 22) { 2663 // For |x| < 2^-28, sinh(x) = x 2664 if (ax < TWO_M28) return x; 2665 double t = expm1(ax); 2666 if (ax < 1) { 2667 return h * (2 * t - t * t / (t + 1)); 2668 } 2669 return h * (t + t / (t + 1)); 2670 } 2671 // |x| in [22, log(maxdouble)], return 0.5 * exp(|x|) 2672 if (ax < LOG_MAXD) return h * exp(ax); 2673 // |x| in [log(maxdouble), overflowthreshold] 2674 // overflowthreshold = 710.4758600739426 2675 if (ax <= KSINH_OVERFLOW) { 2676 double w = exp(0.5 * ax); 2677 double t = h * w; 2678 return t * w; 2679 } 2680 // |x| > overflowthreshold or is NaN. 2681 // Return Infinity of the appropriate sign or NaN. 2682 return x * shuge; 2683} 2684 2685/* Tanh(x) 2686 * Return the Hyperbolic Tangent of x 2687 * 2688 * Method : 2689 * x -x 2690 * e - e 2691 * 0. tanh(x) is defined to be ----------- 2692 * x -x 2693 * e + e 2694 * 1. reduce x to non-negative by tanh(-x) = -tanh(x). 2695 * 2. 0 <= x < 2**-28 : tanh(x) := x with inexact if x != 0 2696 * -t 2697 * 2**-28 <= x < 1 : tanh(x) := -----; t = expm1(-2x) 2698 * t + 2 2699 * 2 2700 * 1 <= x < 22 : tanh(x) := 1 - -----; t = expm1(2x) 2701 * t + 2 2702 * 22 <= x <= INF : tanh(x) := 1. 2703 * 2704 * Special cases: 2705 * tanh(NaN) is NaN; 2706 * only tanh(0)=0 is exact for finite argument. 2707 */ 2708double tanh(double x) { 2709 static const volatile double tiny = 1.0e-300; 2710 static const double one = 1.0, two = 2.0, huge = 1.0e300; 2711 double t, z; 2712 int32_t jx, ix; 2713 2714 GET_HIGH_WORD(jx, x); 2715 ix = jx & 0x7fffffff; 2716 2717 /* x is INF or NaN */ 2718 if (ix >= 0x7ff00000) { 2719 if (jx >= 0) 2720 return one / x + one; /* tanh(+-inf)=+-1 */ 2721 else 2722 return one / x - one; /* tanh(NaN) = NaN */ 2723 } 2724 2725 /* |x| < 22 */ 2726 if (ix < 0x40360000) { /* |x|<22 */ 2727 if (ix < 0x3e300000) { /* |x|<2**-28 */ 2728 if (huge + x > one) return x; /* tanh(tiny) = tiny with inexact */ 2729 } 2730 if (ix >= 0x3ff00000) { /* |x|>=1 */ 2731 t = expm1(two * fabs(x)); 2732 z = one - two / (t + two); 2733 } else { 2734 t = expm1(-two * fabs(x)); 2735 z = -t / (t + two); 2736 } 2737 /* |x| >= 22, return +-1 */ 2738 } else { 2739 z = one - tiny; /* raise inexact flag */ 2740 } 2741 return (jx >= 0) ? z : -z; 2742} 2743 2744} // namespace ieee754 2745} // namespace base 2746} // namespace v8 2747