146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner/* @(#)e_sqrt.c 5.1 93/09/24 */ 246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner/* 346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * ==================================================== 446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. 546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * 646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * Developed at SunPro, a Sun Microsystems, Inc. business. 746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * Permission to use, copy, modify, and distribute this 846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * software is freely granted, provided that this notice 946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * is preserved. 1046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * ==================================================== 1146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner */ 1246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 1346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner#if defined(LIBM_SCCS) && !defined(lint) 1446be48730333120a7b939116cef075e61c12c703David 'Digit' Turnerstatic char rcsid[] = "$NetBSD: e_sqrt.c,v 1.8 1995/05/10 20:46:17 jtc Exp $"; 1546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner#endif 1646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 1746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner/* __ieee754_sqrt(x) 1846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * Return correctly rounded sqrt. 1946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * ------------------------------------------ 2046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * | Use the hardware sqrt if you have one | 2146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * ------------------------------------------ 2246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * Method: 2346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * Bit by bit method using integer arithmetic. (Slow, but portable) 2446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * 1. Normalization 2546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * Scale x to y in [1,4) with even powers of 2: 2646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * find an integer k such that 1 <= (y=x*2^(2k)) < 4, then 2746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * sqrt(x) = 2^k * sqrt(y) 2846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * 2. Bit by bit computation 2946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * Let q = sqrt(y) truncated to i bit after binary point (q = 1), 3046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * i 0 3146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * i+1 2 3246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * s = 2*q , and y = 2 * ( y - q ). (1) 3346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * i i i i 3446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * 3546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * To compute q from q , one checks whether 3646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * i+1 i 3746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * 3846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * -(i+1) 2 3946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * (q + 2 ) <= y. (2) 4046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * i 4146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * -(i+1) 4246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * If (2) is false, then q = q ; otherwise q = q + 2 . 4346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * i+1 i i+1 i 4446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * 4546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * With some algebric manipulation, it is not difficult to see 4646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * that (2) is equivalent to 4746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * -(i+1) 4846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * s + 2 <= y (3) 4946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * i i 5046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * 5146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * The advantage of (3) is that s and y can be computed by 5246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * i i 5346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * the following recurrence formula: 5446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * if (3) is false 5546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * 5646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * s = s , y = y ; (4) 5746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * i+1 i i+1 i 5846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * 5946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * otherwise, 6046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * -i -(i+1) 6146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * s = s + 2 , y = y - s - 2 (5) 6246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * i+1 i i+1 i i 6346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * 6446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * One may easily use induction to prove (4) and (5). 6546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * Note. Since the left hand side of (3) contain only i+2 bits, 6646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * it does not necessary to do a full (53-bit) comparison 6746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * in (3). 6846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * 3. Final rounding 6946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * After generating the 53 bits result, we compute one more bit. 7046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * Together with the remainder, we can decide whether the 7146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * result is exact, bigger than 1/2ulp, or less than 1/2ulp 7246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * (it will never equal to 1/2ulp). 7346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * The rounding mode can be detected by checking whether 7446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * huge + tiny is equal to huge, and whether huge - tiny is 7546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * equal to huge for some floating point number "huge" and "tiny". 7646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * 7746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * Special cases: 7846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * sqrt(+-0) = +-0 ... exact 7946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * sqrt(inf) = inf 8046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * sqrt(-ve) = NaN ... with invalid signal 8146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * sqrt(NaN) = NaN ... with invalid signal for signaling NaN 8246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * 8346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * Other methods : see the appended file at the end of the program below. 8446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *--------------- 8546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner */ 8646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 8746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner/*#include "math.h"*/ 8846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner#include "math_private.h" 8946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 9046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner#ifdef __STDC__ 9146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner double SDL_NAME(copysign)(double x, double y) 9246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner#else 9346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner double SDL_NAME(copysign)(x,y) 9446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner double x,y; 9546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner#endif 9646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner{ 9746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner u_int32_t hx,hy; 9846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner GET_HIGH_WORD(hx,x); 9946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner GET_HIGH_WORD(hy,y); 10046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner SET_HIGH_WORD(x,(hx&0x7fffffff)|(hy&0x80000000)); 10146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner return x; 10246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner} 10346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 10446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner#ifdef __STDC__ 10546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner double SDL_NAME(scalbn) (double x, int n) 10646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner#else 10746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner double SDL_NAME(scalbn) (x,n) 10846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner double x; int n; 10946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner#endif 11046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner{ 11146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner int32_t k,hx,lx; 11246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner EXTRACT_WORDS(hx,lx,x); 11346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner k = (hx&0x7ff00000)>>20; /* extract exponent */ 11446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner if (k==0) { /* 0 or subnormal x */ 11546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner if ((lx|(hx&0x7fffffff))==0) return x; /* +-0 */ 11646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner x *= two54; 11746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner GET_HIGH_WORD(hx,x); 11846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner k = ((hx&0x7ff00000)>>20) - 54; 11946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner if (n< -50000) return tiny*x; /*underflow*/ 12046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner } 12146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner if (k==0x7ff) return x+x; /* NaN or Inf */ 12246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner k = k+n; 12346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner if (k > 0x7fe) return huge*SDL_NAME(copysign)(huge,x); /* overflow */ 12446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner if (k > 0) /* normal result */ 12546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner {SET_HIGH_WORD(x,(hx&0x800fffff)|(k<<20)); return x;} 12646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner if (k <= -54) { 12746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner if (n > 50000) /* in case integer overflow in n+k */ 12846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner return huge*SDL_NAME(copysign)(huge,x); /*overflow*/ 12946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner else return tiny*SDL_NAME(copysign)(tiny,x); /*underflow*/ 13046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner } 13146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner k += 54; /* subnormal result */ 13246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner SET_HIGH_WORD(x,(hx&0x800fffff)|(k<<20)); 13346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner return x*twom54; 13446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner} 13546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 13646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner#ifdef __STDC__ 13746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner double __ieee754_sqrt(double x) 13846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner#else 13946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner double __ieee754_sqrt(x) 14046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner double x; 14146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner#endif 14246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner{ 14346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner double z; 14446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner int32_t sign = (int)0x80000000; 14546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner int32_t ix0,s0,q,m,t,i; 14646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner u_int32_t r,t1,s1,ix1,q1; 14746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 14846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner EXTRACT_WORDS(ix0,ix1,x); 14946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 15046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner /* take care of Inf and NaN */ 15146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner if((ix0&0x7ff00000)==0x7ff00000) { 15246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner return x*x+x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf 15346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner sqrt(-inf)=sNaN */ 15446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner } 15546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner /* take care of zero */ 15646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner if(ix0<=0) { 15746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner if(((ix0&(~sign))|ix1)==0) return x;/* sqrt(+-0) = +-0 */ 15846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner else if(ix0<0) 15946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner return (x-x)/(x-x); /* sqrt(-ve) = sNaN */ 16046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner } 16146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner /* normalize x */ 16246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner m = (ix0>>20); 16346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner if(m==0) { /* subnormal x */ 16446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner while(ix0==0) { 16546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner m -= 21; 16646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner ix0 |= (ix1>>11); ix1 <<= 21; 16746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner } 16846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner for(i=0;(ix0&0x00100000)==0;i++) ix0<<=1; 16946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner m -= i-1; 17046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner ix0 |= (ix1>>(32-i)); 17146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner ix1 <<= i; 17246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner } 17346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner m -= 1023; /* unbias exponent */ 17446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner ix0 = (ix0&0x000fffff)|0x00100000; 17546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner if(m&1){ /* odd m, double x to make it even */ 17646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner ix0 += ix0 + ((ix1&sign)>>31); 17746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner ix1 += ix1; 17846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner } 17946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner m >>= 1; /* m = [m/2] */ 18046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 18146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner /* generate sqrt(x) bit by bit */ 18246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner ix0 += ix0 + ((ix1&sign)>>31); 18346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner ix1 += ix1; 18446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner q = q1 = s0 = s1 = 0; /* [q,q1] = sqrt(x) */ 18546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner r = 0x00200000; /* r = moving bit from right to left */ 18646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 18746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner while(r!=0) { 18846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner t = s0+r; 18946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner if(t<=ix0) { 19046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner s0 = t+r; 19146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner ix0 -= t; 19246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner q += r; 19346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner } 19446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner ix0 += ix0 + ((ix1&sign)>>31); 19546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner ix1 += ix1; 19646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner r>>=1; 19746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner } 19846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 19946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner r = sign; 20046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner while(r!=0) { 20146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner t1 = s1+r; 20246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner t = s0; 20346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner if((t<ix0)||((t==ix0)&&(t1<=ix1))) { 20446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner s1 = t1+r; 20546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner if(((int32_t)(t1&sign)==sign)&&(s1&sign)==0) s0 += 1; 20646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner ix0 -= t; 20746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner if (ix1 < t1) ix0 -= 1; 20846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner ix1 -= t1; 20946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner q1 += r; 21046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner } 21146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner ix0 += ix0 + ((ix1&sign)>>31); 21246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner ix1 += ix1; 21346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner r>>=1; 21446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner } 21546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 21646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner /* use floating add to find out rounding direction */ 21746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner if((ix0|ix1)!=0) { 21846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner z = one-tiny; /* trigger inexact flag */ 21946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner if (z>=one) { 22046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner z = one+tiny; 22146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner if (q1==(u_int32_t)0xffffffff) { q1=0; q += 1;} 22246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner else if (z>one) { 22346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner if (q1==(u_int32_t)0xfffffffe) q+=1; 22446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner q1+=2; 22546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner } else 22646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner q1 += (q1&1); 22746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner } 22846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner } 22946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner ix0 = (q>>1)+0x3fe00000; 23046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner ix1 = q1>>1; 23146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner if ((q&1)==1) ix1 |= sign; 23246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner ix0 += (m <<20); 23346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner INSERT_WORDS(z,ix0,ix1); 23446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner return z; 23546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner} 23646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 23746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner/* 23846be48730333120a7b939116cef075e61c12c703David 'Digit' TurnerOther methods (use floating-point arithmetic) 23946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner------------- 24046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner(This is a copy of a drafted paper by Prof W. Kahan 24146be48730333120a7b939116cef075e61c12c703David 'Digit' Turnerand K.C. Ng, written in May, 1986) 24246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 24346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Two algorithms are given here to implement sqrt(x) 24446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner (IEEE double precision arithmetic) in software. 24546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Both supply sqrt(x) correctly rounded. The first algorithm (in 24646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Section A) uses newton iterations and involves four divisions. 24746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner The second one uses reciproot iterations to avoid division, but 24846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner requires more multiplications. Both algorithms need the ability 24946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner to chop results of arithmetic operations instead of round them, 25046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner and the INEXACT flag to indicate when an arithmetic operation 25146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner is executed exactly with no roundoff error, all part of the 25246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner standard (IEEE 754-1985). The ability to perform shift, add, 25346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner subtract and logical AND operations upon 32-bit words is needed 25446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner too, though not part of the standard. 25546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 25646be48730333120a7b939116cef075e61c12c703David 'Digit' TurnerA. sqrt(x) by Newton Iteration 25746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 25846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner (1) Initial approximation 25946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 26046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Let x0 and x1 be the leading and the trailing 32-bit words of 26146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner a floating point number x (in IEEE double format) respectively 26246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 26346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 1 11 52 ...widths 26446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner ------------------------------------------------------ 26546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner x: |s| e | f | 26646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner ------------------------------------------------------ 26746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner msb lsb msb lsb ...order 26846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 26946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 27046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner ------------------------ ------------------------ 27146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner x0: |s| e | f1 | x1: | f2 | 27246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner ------------------------ ------------------------ 27346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 27446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner By performing shifts and subtracts on x0 and x1 (both regarded 27546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner as integers), we obtain an 8-bit approximation of sqrt(x) as 27646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner follows. 27746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 27846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner k := (x0>>1) + 0x1ff80000; 27946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner y0 := k - T1[31&(k>>15)]. ... y ~ sqrt(x) to 8 bits 28046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Here k is a 32-bit integer and T1[] is an integer array containing 28146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner correction terms. Now magically the floating value of y (y's 28246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner leading 32-bit word is y0, the value of its trailing word is 0) 28346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner approximates sqrt(x) to almost 8-bit. 28446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 28546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Value of T1: 28646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner static int T1[32]= { 28746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 0, 1024, 3062, 5746, 9193, 13348, 18162, 23592, 28846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 29598, 36145, 43202, 50740, 58733, 67158, 75992, 85215, 28946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 83599, 71378, 60428, 50647, 41945, 34246, 27478, 21581, 29046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 16499, 12183, 8588, 5674, 3403, 1742, 661, 130,}; 29146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 29246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner (2) Iterative refinement 29346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 29446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Apply Heron's rule three times to y, we have y approximates 29546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner sqrt(x) to within 1 ulp (Unit in the Last Place): 29646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 29746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner y := (y+x/y)/2 ... almost 17 sig. bits 29846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner y := (y+x/y)/2 ... almost 35 sig. bits 29946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner y := y-(y-x/y)/2 ... within 1 ulp 30046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 30146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 30246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Remark 1. 30346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Another way to improve y to within 1 ulp is: 30446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 30546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner y := (y+x/y) ... almost 17 sig. bits to 2*sqrt(x) 30646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner y := y - 0x00100006 ... almost 18 sig. bits to sqrt(x) 30746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 30846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 2 30946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner (x-y )*y 31046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner y := y + 2* ---------- ...within 1 ulp 31146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 2 31246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 3y + x 31346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 31446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 31546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner This formula has one division fewer than the one above; however, 31646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner it requires more multiplications and additions. Also x must be 31746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner scaled in advance to avoid spurious overflow in evaluating the 31846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner expression 3y*y+x. Hence it is not recommended uless division 31946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner is slow. If division is very slow, then one should use the 32046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner reciproot algorithm given in section B. 32146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 32246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner (3) Final adjustment 32346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 32446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner By twiddling y's last bit it is possible to force y to be 32546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner correctly rounded according to the prevailing rounding mode 32646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner as follows. Let r and i be copies of the rounding mode and 32746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner inexact flag before entering the square root program. Also we 32846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner use the expression y+-ulp for the next representable floating 32946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner numbers (up and down) of y. Note that y+-ulp = either fixed 33046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner point y+-1, or multiply y by nextafter(1,+-inf) in chopped 33146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner mode. 33246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 33346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner I := FALSE; ... reset INEXACT flag I 33446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner R := RZ; ... set rounding mode to round-toward-zero 33546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner z := x/y; ... chopped quotient, possibly inexact 33646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner If(not I) then { ... if the quotient is exact 33746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner if(z=y) { 33846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner I := i; ... restore inexact flag 33946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner R := r; ... restore rounded mode 34046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner return sqrt(x):=y. 34146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner } else { 34246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner z := z - ulp; ... special rounding 34346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner } 34446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner } 34546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner i := TRUE; ... sqrt(x) is inexact 34646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner If (r=RN) then z=z+ulp ... rounded-to-nearest 34746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner If (r=RP) then { ... round-toward-+inf 34846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner y = y+ulp; z=z+ulp; 34946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner } 35046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner y := y+z; ... chopped sum 35146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner y0:=y0-0x00100000; ... y := y/2 is correctly rounded. 35246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner I := i; ... restore inexact flag 35346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner R := r; ... restore rounded mode 35446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner return sqrt(x):=y. 35546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 35646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner (4) Special cases 35746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 35846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Square root of +inf, +-0, or NaN is itself; 35946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Square root of a negative number is NaN with invalid signal. 36046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 36146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 36246be48730333120a7b939116cef075e61c12c703David 'Digit' TurnerB. sqrt(x) by Reciproot Iteration 36346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 36446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner (1) Initial approximation 36546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 36646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Let x0 and x1 be the leading and the trailing 32-bit words of 36746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner a floating point number x (in IEEE double format) respectively 36846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner (see section A). By performing shifs and subtracts on x0 and y0, 36946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner we obtain a 7.8-bit approximation of 1/sqrt(x) as follows. 37046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 37146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner k := 0x5fe80000 - (x0>>1); 37246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner y0:= k - T2[63&(k>>14)]. ... y ~ 1/sqrt(x) to 7.8 bits 37346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 37446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Here k is a 32-bit integer and T2[] is an integer array 37546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner containing correction terms. Now magically the floating 37646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner value of y (y's leading 32-bit word is y0, the value of 37746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner its trailing word y1 is set to zero) approximates 1/sqrt(x) 37846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner to almost 7.8-bit. 37946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 38046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Value of T2: 38146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner static int T2[64]= { 38246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 0x1500, 0x2ef8, 0x4d67, 0x6b02, 0x87be, 0xa395, 0xbe7a, 0xd866, 38346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 0xf14a, 0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f, 38446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d, 38546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0, 38646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989, 38746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd, 38846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e, 38946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 0x1527f,0x1334a,0x11051,0xe951, 0xbe01, 0x8e0d, 0x5924, 0x1edd,}; 39046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 39146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner (2) Iterative refinement 39246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 39346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Apply Reciproot iteration three times to y and multiply the 39446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner result by x to get an approximation z that matches sqrt(x) 39546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner to about 1 ulp. To be exact, we will have 39646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner -1ulp < sqrt(x)-z<1.0625ulp. 39746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 39846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner ... set rounding mode to Round-to-nearest 39946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner y := y*(1.5-0.5*x*y*y) ... almost 15 sig. bits to 1/sqrt(x) 40046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner y := y*((1.5-2^-30)+0.5*x*y*y)... about 29 sig. bits to 1/sqrt(x) 40146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner ... special arrangement for better accuracy 40246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner z := x*y ... 29 bits to sqrt(x), with z*y<1 40346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner z := z + 0.5*z*(1-z*y) ... about 1 ulp to sqrt(x) 40446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 40546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Remark 2. The constant 1.5-2^-30 is chosen to bias the error so that 40646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner (a) the term z*y in the final iteration is always less than 1; 40746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner (b) the error in the final result is biased upward so that 40846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner -1 ulp < sqrt(x) - z < 1.0625 ulp 40946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner instead of |sqrt(x)-z|<1.03125ulp. 41046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 41146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner (3) Final adjustment 41246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 41346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner By twiddling y's last bit it is possible to force y to be 41446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner correctly rounded according to the prevailing rounding mode 41546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner as follows. Let r and i be copies of the rounding mode and 41646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner inexact flag before entering the square root program. Also we 41746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner use the expression y+-ulp for the next representable floating 41846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner numbers (up and down) of y. Note that y+-ulp = either fixed 41946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner point y+-1, or multiply y by nextafter(1,+-inf) in chopped 42046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner mode. 42146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 42246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner R := RZ; ... set rounding mode to round-toward-zero 42346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner switch(r) { 42446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner case RN: ... round-to-nearest 42546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner if(x<= z*(z-ulp)...chopped) z = z - ulp; else 42646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp; 42746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner break; 42846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner case RZ:case RM: ... round-to-zero or round-to--inf 42946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner R:=RP; ... reset rounding mod to round-to-+inf 43046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner if(x<z*z ... rounded up) z = z - ulp; else 43146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp; 43246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner break; 43346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner case RP: ... round-to-+inf 43446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner if(x>(z+ulp)*(z+ulp)...chopped) z = z+2*ulp; else 43546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner if(x>z*z ...chopped) z = z+ulp; 43646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner break; 43746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner } 43846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 43946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Remark 3. The above comparisons can be done in fixed point. For 44046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner example, to compare x and w=z*z chopped, it suffices to compare 44146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner x1 and w1 (the trailing parts of x and w), regarding them as 44246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner two's complement integers. 44346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 44446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner ...Is z an exact square root? 44546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner To determine whether z is an exact square root of x, let z1 be the 44646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner trailing part of z, and also let x0 and x1 be the leading and 44746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner trailing parts of x. 44846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 44946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner If ((z1&0x03ffffff)!=0) ... not exact if trailing 26 bits of z!=0 45046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner I := 1; ... Raise Inexact flag: z is not exact 45146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner else { 45246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner j := 1 - [(x0>>20)&1] ... j = logb(x) mod 2 45346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner k := z1 >> 26; ... get z's 25-th and 26-th 45446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner fraction bits 45546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner I := i or (k&j) or ((k&(j+j+1))!=(x1&3)); 45646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner } 45746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner R:= r ... restore rounded mode 45846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner return sqrt(x):=z. 45946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 46046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner If multiplication is cheaper then the foregoing red tape, the 46146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Inexact flag can be evaluated by 46246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 46346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner I := i; 46446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner I := (z*z!=x) or I. 46546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 46646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Note that z*z can overwrite I; this value must be sensed if it is 46746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner True. 46846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 46946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be 47046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner zero. 47146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 47246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner -------------------- 47346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner z1: | f2 | 47446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner -------------------- 47546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner bit 31 bit 0 47646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 47746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd 47846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner or even of logb(x) have the following relations: 47946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 48046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner ------------------------------------------------- 48146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner bit 27,26 of z1 bit 1,0 of x1 logb(x) 48246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner ------------------------------------------------- 48346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 00 00 odd and even 48446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 01 01 even 48546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 10 10 odd 48646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 10 00 even 48746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 11 01 even 48846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner ------------------------------------------------- 48946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 49046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner (4) Special cases (see (4) of Section A). 49146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 49246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner */ 49346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 494