146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner/* @(#)e_sqrt.c 5.1 93/09/24 */
246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner/*
346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * ====================================================
446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *
646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * Developed at SunPro, a Sun Microsystems, Inc. business.
746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * Permission to use, copy, modify, and distribute this
846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * software is freely granted, provided that this notice
946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * is preserved.
1046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * ====================================================
1146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner */
1246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
1346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner#if defined(LIBM_SCCS) && !defined(lint)
1446be48730333120a7b939116cef075e61c12c703David 'Digit' Turnerstatic char rcsid[] = "$NetBSD: e_sqrt.c,v 1.8 1995/05/10 20:46:17 jtc Exp $";
1546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner#endif
1646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
1746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner/* __ieee754_sqrt(x)
1846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * Return correctly rounded sqrt.
1946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *           ------------------------------------------
2046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *	     |  Use the hardware sqrt if you have one |
2146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *           ------------------------------------------
2246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * Method:
2346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *   Bit by bit method using integer arithmetic. (Slow, but portable)
2446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *   1. Normalization
2546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *	Scale x to y in [1,4) with even powers of 2:
2646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *	find an integer k such that  1 <= (y=x*2^(2k)) < 4, then
2746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *		sqrt(x) = 2^k * sqrt(y)
2846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *   2. Bit by bit computation
2946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *	Let q  = sqrt(y) truncated to i bit after binary point (q = 1),
3046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *	     i							 0
3146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *                                     i+1         2
3246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *	    s  = 2*q , and	y  =  2   * ( y - q  ).		(1)
3346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *	     i      i            i                 i
3446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *
3546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *	To compute q    from q , one checks whether
3646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *		    i+1       i
3746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *
3846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *			      -(i+1) 2
3946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *			(q + 2      ) <= y.			(2)
4046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *     			  i
4146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *							      -(i+1)
4246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *	If (2) is false, then q   = q ; otherwise q   = q  + 2      .
4346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *		 	       i+1   i             i+1   i
4446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *
4546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *	With some algebric manipulation, it is not difficult to see
4646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *	that (2) is equivalent to
4746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *                             -(i+1)
4846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *			s  +  2       <= y			(3)
4946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *			 i                i
5046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *
5146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *	The advantage of (3) is that s  and y  can be computed by
5246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *				      i      i
5346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *	the following recurrence formula:
5446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *	    if (3) is false
5546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *
5646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *	    s     =  s  ,	y    = y   ;			(4)
5746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *	     i+1      i		 i+1    i
5846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *
5946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *	    otherwise,
6046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *                         -i                     -(i+1)
6146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *	    s	  =  s  + 2  ,  y    = y  -  s  - 2  		(5)
6246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *           i+1      i          i+1    i     i
6346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *
6446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *	One may easily use induction to prove (4) and (5).
6546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *	Note. Since the left hand side of (3) contain only i+2 bits,
6646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *	      it does not necessary to do a full (53-bit) comparison
6746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *	      in (3).
6846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *   3. Final rounding
6946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *	After generating the 53 bits result, we compute one more bit.
7046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *	Together with the remainder, we can decide whether the
7146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *	result is exact, bigger than 1/2ulp, or less than 1/2ulp
7246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *	(it will never equal to 1/2ulp).
7346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *	The rounding mode can be detected by checking whether
7446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *	huge + tiny is equal to huge, and whether huge - tiny is
7546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *	equal to huge for some floating point number "huge" and "tiny".
7646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *
7746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * Special cases:
7846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *	sqrt(+-0) = +-0 	... exact
7946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *	sqrt(inf) = inf
8046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *	sqrt(-ve) = NaN		... with invalid signal
8146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *	sqrt(NaN) = NaN		... with invalid signal for signaling NaN
8246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *
8346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner * Other methods : see the appended file at the end of the program below.
8446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner *---------------
8546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner */
8646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
8746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner/*#include "math.h"*/
8846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner#include "math_private.h"
8946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
9046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner#ifdef __STDC__
9146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	double SDL_NAME(copysign)(double x, double y)
9246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner#else
9346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	double SDL_NAME(copysign)(x,y)
9446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	double x,y;
9546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner#endif
9646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner{
9746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	u_int32_t hx,hy;
9846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	GET_HIGH_WORD(hx,x);
9946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	GET_HIGH_WORD(hy,y);
10046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	SET_HIGH_WORD(x,(hx&0x7fffffff)|(hy&0x80000000));
10146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner        return x;
10246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner}
10346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
10446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner#ifdef __STDC__
10546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	double SDL_NAME(scalbn) (double x, int n)
10646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner#else
10746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	double SDL_NAME(scalbn) (x,n)
10846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	double x; int n;
10946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner#endif
11046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner{
11146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	int32_t k,hx,lx;
11246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	EXTRACT_WORDS(hx,lx,x);
11346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner        k = (hx&0x7ff00000)>>20;		/* extract exponent */
11446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner        if (k==0) {				/* 0 or subnormal x */
11546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner            if ((lx|(hx&0x7fffffff))==0) return x; /* +-0 */
11646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    x *= two54;
11746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    GET_HIGH_WORD(hx,x);
11846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    k = ((hx&0x7ff00000)>>20) - 54;
11946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner            if (n< -50000) return tiny*x; 	/*underflow*/
12046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    }
12146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner        if (k==0x7ff) return x+x;		/* NaN or Inf */
12246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner        k = k+n;
12346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner        if (k >  0x7fe) return huge*SDL_NAME(copysign)(huge,x); /* overflow  */
12446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner        if (k > 0) 				/* normal result */
12546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    {SET_HIGH_WORD(x,(hx&0x800fffff)|(k<<20)); return x;}
12646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner        if (k <= -54) {
12746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner            if (n > 50000) 	/* in case integer overflow in n+k */
12846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		return huge*SDL_NAME(copysign)(huge,x);	/*overflow*/
12946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    else return tiny*SDL_NAME(copysign)(tiny,x); 	/*underflow*/
13046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	}
13146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner        k += 54;				/* subnormal result */
13246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	SET_HIGH_WORD(x,(hx&0x800fffff)|(k<<20));
13346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner        return x*twom54;
13446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner}
13546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
13646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner#ifdef __STDC__
13746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	double __ieee754_sqrt(double x)
13846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner#else
13946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	double __ieee754_sqrt(x)
14046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	double x;
14146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner#endif
14246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner{
14346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	double z;
14446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	int32_t sign = (int)0x80000000;
14546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	int32_t ix0,s0,q,m,t,i;
14646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	u_int32_t r,t1,s1,ix1,q1;
14746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
14846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	EXTRACT_WORDS(ix0,ix1,x);
14946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
15046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner    /* take care of Inf and NaN */
15146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	if((ix0&0x7ff00000)==0x7ff00000) {
15246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    return x*x+x;		/* sqrt(NaN)=NaN, sqrt(+inf)=+inf
15346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner					   sqrt(-inf)=sNaN */
15446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	}
15546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner    /* take care of zero */
15646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	if(ix0<=0) {
15746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    if(((ix0&(~sign))|ix1)==0) return x;/* sqrt(+-0) = +-0 */
15846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    else if(ix0<0)
15946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		return (x-x)/(x-x);		/* sqrt(-ve) = sNaN */
16046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	}
16146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner    /* normalize x */
16246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	m = (ix0>>20);
16346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	if(m==0) {				/* subnormal x */
16446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    while(ix0==0) {
16546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		m -= 21;
16646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		ix0 |= (ix1>>11); ix1 <<= 21;
16746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    }
16846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    for(i=0;(ix0&0x00100000)==0;i++) ix0<<=1;
16946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    m -= i-1;
17046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    ix0 |= (ix1>>(32-i));
17146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    ix1 <<= i;
17246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	}
17346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	m -= 1023;	/* unbias exponent */
17446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	ix0 = (ix0&0x000fffff)|0x00100000;
17546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	if(m&1){	/* odd m, double x to make it even */
17646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    ix0 += ix0 + ((ix1&sign)>>31);
17746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    ix1 += ix1;
17846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	}
17946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	m >>= 1;	/* m = [m/2] */
18046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
18146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner    /* generate sqrt(x) bit by bit */
18246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	ix0 += ix0 + ((ix1&sign)>>31);
18346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	ix1 += ix1;
18446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	q = q1 = s0 = s1 = 0;	/* [q,q1] = sqrt(x) */
18546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	r = 0x00200000;		/* r = moving bit from right to left */
18646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
18746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	while(r!=0) {
18846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    t = s0+r;
18946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    if(t<=ix0) {
19046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		s0   = t+r;
19146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		ix0 -= t;
19246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		q   += r;
19346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    }
19446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    ix0 += ix0 + ((ix1&sign)>>31);
19546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    ix1 += ix1;
19646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    r>>=1;
19746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	}
19846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
19946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	r = sign;
20046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	while(r!=0) {
20146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    t1 = s1+r;
20246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    t  = s0;
20346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    if((t<ix0)||((t==ix0)&&(t1<=ix1))) {
20446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		s1  = t1+r;
20546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		if(((int32_t)(t1&sign)==sign)&&(s1&sign)==0) s0 += 1;
20646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		ix0 -= t;
20746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		if (ix1 < t1) ix0 -= 1;
20846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		ix1 -= t1;
20946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		q1  += r;
21046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    }
21146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    ix0 += ix0 + ((ix1&sign)>>31);
21246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    ix1 += ix1;
21346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    r>>=1;
21446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	}
21546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
21646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner    /* use floating add to find out rounding direction */
21746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	if((ix0|ix1)!=0) {
21846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    z = one-tiny; /* trigger inexact flag */
21946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    if (z>=one) {
22046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	        z = one+tiny;
22146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	        if (q1==(u_int32_t)0xffffffff) { q1=0; q += 1;}
22246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		else if (z>one) {
22346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		    if (q1==(u_int32_t)0xfffffffe) q+=1;
22446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		    q1+=2;
22546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		} else
22646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	            q1 += (q1&1);
22746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    }
22846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	}
22946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	ix0 = (q>>1)+0x3fe00000;
23046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	ix1 =  q1>>1;
23146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	if ((q&1)==1) ix1 |= sign;
23246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	ix0 += (m <<20);
23346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	INSERT_WORDS(z,ix0,ix1);
23446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	return z;
23546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner}
23646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
23746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner/*
23846be48730333120a7b939116cef075e61c12c703David 'Digit' TurnerOther methods  (use floating-point arithmetic)
23946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner-------------
24046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner(This is a copy of a drafted paper by Prof W. Kahan
24146be48730333120a7b939116cef075e61c12c703David 'Digit' Turnerand K.C. Ng, written in May, 1986)
24246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
24346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	Two algorithms are given here to implement sqrt(x)
24446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	(IEEE double precision arithmetic) in software.
24546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	Both supply sqrt(x) correctly rounded. The first algorithm (in
24646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	Section A) uses newton iterations and involves four divisions.
24746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	The second one uses reciproot iterations to avoid division, but
24846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	requires more multiplications. Both algorithms need the ability
24946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	to chop results of arithmetic operations instead of round them,
25046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	and the INEXACT flag to indicate when an arithmetic operation
25146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	is executed exactly with no roundoff error, all part of the
25246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	standard (IEEE 754-1985). The ability to perform shift, add,
25346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	subtract and logical AND operations upon 32-bit words is needed
25446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	too, though not part of the standard.
25546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
25646be48730333120a7b939116cef075e61c12c703David 'Digit' TurnerA.  sqrt(x) by Newton Iteration
25746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
25846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner   (1)	Initial approximation
25946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
26046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	Let x0 and x1 be the leading and the trailing 32-bit words of
26146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	a floating point number x (in IEEE double format) respectively
26246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
26346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    1    11		     52				  ...widths
26446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	   ------------------------------------------------------
26546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	x: |s|	  e     |	      f				|
26646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	   ------------------------------------------------------
26746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	      msb    lsb  msb				      lsb ...order
26846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
26946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
27046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	     ------------------------  	     ------------------------
27146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	x0:  |s|   e    |    f1     |	 x1: |          f2           |
27246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	     ------------------------  	     ------------------------
27346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
27446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	By performing shifts and subtracts on x0 and x1 (both regarded
27546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	as integers), we obtain an 8-bit approximation of sqrt(x) as
27646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	follows.
27746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
27846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		k  := (x0>>1) + 0x1ff80000;
27946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		y0 := k - T1[31&(k>>15)].	... y ~ sqrt(x) to 8 bits
28046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	Here k is a 32-bit integer and T1[] is an integer array containing
28146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	correction terms. Now magically the floating value of y (y's
28246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	leading 32-bit word is y0, the value of its trailing word is 0)
28346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	approximates sqrt(x) to almost 8-bit.
28446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
28546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	Value of T1:
28646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	static int T1[32]= {
28746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	0,	1024,	3062,	5746,	9193,	13348,	18162,	23592,
28846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	29598,	36145,	43202,	50740,	58733,	67158,	75992,	85215,
28946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	83599,	71378,	60428,	50647,	41945,	34246,	27478,	21581,
29046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	16499,	12183,	8588,	5674,	3403,	1742,	661,	130,};
29146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
29246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner    (2)	Iterative refinement
29346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
29446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	Apply Heron's rule three times to y, we have y approximates
29546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	sqrt(x) to within 1 ulp (Unit in the Last Place):
29646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
29746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		y := (y+x/y)/2		... almost 17 sig. bits
29846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		y := (y+x/y)/2		... almost 35 sig. bits
29946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		y := y-(y-x/y)/2	... within 1 ulp
30046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
30146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
30246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	Remark 1.
30346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    Another way to improve y to within 1 ulp is:
30446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
30546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		y := (y+x/y)		... almost 17 sig. bits to 2*sqrt(x)
30646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		y := y - 0x00100006	... almost 18 sig. bits to sqrt(x)
30746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
30846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner				2
30946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner			    (x-y )*y
31046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		y := y + 2* ----------	...within 1 ulp
31146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner			       2
31246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner			     3y  + x
31346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
31446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
31546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	This formula has one division fewer than the one above; however,
31646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	it requires more multiplications and additions. Also x must be
31746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	scaled in advance to avoid spurious overflow in evaluating the
31846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	expression 3y*y+x. Hence it is not recommended uless division
31946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	is slow. If division is very slow, then one should use the
32046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	reciproot algorithm given in section B.
32146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
32246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner    (3) Final adjustment
32346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
32446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	By twiddling y's last bit it is possible to force y to be
32546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	correctly rounded according to the prevailing rounding mode
32646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	as follows. Let r and i be copies of the rounding mode and
32746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	inexact flag before entering the square root program. Also we
32846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	use the expression y+-ulp for the next representable floating
32946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	numbers (up and down) of y. Note that y+-ulp = either fixed
33046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	point y+-1, or multiply y by nextafter(1,+-inf) in chopped
33146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	mode.
33246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
33346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		I := FALSE;	... reset INEXACT flag I
33446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		R := RZ;	... set rounding mode to round-toward-zero
33546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		z := x/y;	... chopped quotient, possibly inexact
33646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		If(not I) then {	... if the quotient is exact
33746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		    if(z=y) {
33846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		        I := i;	 ... restore inexact flag
33946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		        R := r;  ... restore rounded mode
34046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		        return sqrt(x):=y.
34146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		    } else {
34246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner			z := z - ulp;	... special rounding
34346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		    }
34446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		}
34546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		i := TRUE;		... sqrt(x) is inexact
34646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		If (r=RN) then z=z+ulp	... rounded-to-nearest
34746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		If (r=RP) then {	... round-toward-+inf
34846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		    y = y+ulp; z=z+ulp;
34946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		}
35046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		y := y+z;		... chopped sum
35146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		y0:=y0-0x00100000;	... y := y/2 is correctly rounded.
35246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	        I := i;	 		... restore inexact flag
35346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	        R := r;  		... restore rounded mode
35446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	        return sqrt(x):=y.
35546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
35646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner    (4)	Special cases
35746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
35846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	Square root of +inf, +-0, or NaN is itself;
35946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	Square root of a negative number is NaN with invalid signal.
36046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
36146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
36246be48730333120a7b939116cef075e61c12c703David 'Digit' TurnerB.  sqrt(x) by Reciproot Iteration
36346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
36446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner   (1)	Initial approximation
36546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
36646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	Let x0 and x1 be the leading and the trailing 32-bit words of
36746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	a floating point number x (in IEEE double format) respectively
36846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	(see section A). By performing shifs and subtracts on x0 and y0,
36946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	we obtain a 7.8-bit approximation of 1/sqrt(x) as follows.
37046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
37146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    k := 0x5fe80000 - (x0>>1);
37246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    y0:= k - T2[63&(k>>14)].	... y ~ 1/sqrt(x) to 7.8 bits
37346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
37446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	Here k is a 32-bit integer and T2[] is an integer array
37546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	containing correction terms. Now magically the floating
37646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	value of y (y's leading 32-bit word is y0, the value of
37746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	its trailing word y1 is set to zero) approximates 1/sqrt(x)
37846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	to almost 7.8-bit.
37946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
38046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	Value of T2:
38146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	static int T2[64]= {
38246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	0x1500,	0x2ef8,	0x4d67,	0x6b02,	0x87be,	0xa395,	0xbe7a,	0xd866,
38346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	0xf14a,	0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f,
38446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d,
38546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0,
38646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989,
38746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd,
38846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e,
38946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	0x1527f,0x1334a,0x11051,0xe951,	0xbe01,	0x8e0d,	0x5924,	0x1edd,};
39046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
39146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner    (2)	Iterative refinement
39246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
39346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	Apply Reciproot iteration three times to y and multiply the
39446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	result by x to get an approximation z that matches sqrt(x)
39546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	to about 1 ulp. To be exact, we will have
39646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		-1ulp < sqrt(x)-z<1.0625ulp.
39746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
39846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	... set rounding mode to Round-to-nearest
39946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	   y := y*(1.5-0.5*x*y*y)	... almost 15 sig. bits to 1/sqrt(x)
40046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	   y := y*((1.5-2^-30)+0.5*x*y*y)... about 29 sig. bits to 1/sqrt(x)
40146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	... special arrangement for better accuracy
40246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	   z := x*y			... 29 bits to sqrt(x), with z*y<1
40346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	   z := z + 0.5*z*(1-z*y)	... about 1 ulp to sqrt(x)
40446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
40546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	Remark 2. The constant 1.5-2^-30 is chosen to bias the error so that
40646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	(a) the term z*y in the final iteration is always less than 1;
40746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	(b) the error in the final result is biased upward so that
40846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		-1 ulp < sqrt(x) - z < 1.0625 ulp
40946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    instead of |sqrt(x)-z|<1.03125ulp.
41046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
41146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner    (3)	Final adjustment
41246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
41346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	By twiddling y's last bit it is possible to force y to be
41446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	correctly rounded according to the prevailing rounding mode
41546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	as follows. Let r and i be copies of the rounding mode and
41646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	inexact flag before entering the square root program. Also we
41746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	use the expression y+-ulp for the next representable floating
41846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	numbers (up and down) of y. Note that y+-ulp = either fixed
41946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	point y+-1, or multiply y by nextafter(1,+-inf) in chopped
42046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	mode.
42146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
42246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	R := RZ;		... set rounding mode to round-toward-zero
42346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	switch(r) {
42446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    case RN:		... round-to-nearest
42546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	       if(x<= z*(z-ulp)...chopped) z = z - ulp; else
42646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	       if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp;
42746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	       break;
42846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    case RZ:case RM:	... round-to-zero or round-to--inf
42946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	       R:=RP;		... reset rounding mod to round-to-+inf
43046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	       if(x<z*z ... rounded up) z = z - ulp; else
43146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	       if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp;
43246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	       break;
43346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    case RP:		... round-to-+inf
43446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	       if(x>(z+ulp)*(z+ulp)...chopped) z = z+2*ulp; else
43546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	       if(x>z*z ...chopped) z = z+ulp;
43646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	       break;
43746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	}
43846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
43946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	Remark 3. The above comparisons can be done in fixed point. For
44046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	example, to compare x and w=z*z chopped, it suffices to compare
44146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	x1 and w1 (the trailing parts of x and w), regarding them as
44246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	two's complement integers.
44346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
44446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	...Is z an exact square root?
44546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	To determine whether z is an exact square root of x, let z1 be the
44646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	trailing part of z, and also let x0 and x1 be the leading and
44746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	trailing parts of x.
44846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
44946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	If ((z1&0x03ffffff)!=0)	... not exact if trailing 26 bits of z!=0
45046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    I := 1;		... Raise Inexact flag: z is not exact
45146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	else {
45246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    j := 1 - [(x0>>20)&1]	... j = logb(x) mod 2
45346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    k := z1 >> 26;		... get z's 25-th and 26-th
45446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner					    fraction bits
45546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    I := i or (k&j) or ((k&(j+j+1))!=(x1&3));
45646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	}
45746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	R:= r		... restore rounded mode
45846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	return sqrt(x):=z.
45946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
46046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	If multiplication is cheaper then the foregoing red tape, the
46146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	Inexact flag can be evaluated by
46246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
46346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    I := i;
46446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	    I := (z*z!=x) or I.
46546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
46646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	Note that z*z can overwrite I; this value must be sensed if it is
46746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	True.
46846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
46946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be
47046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	zero.
47146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
47246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		    --------------------
47346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		z1: |        f2        |
47446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		    --------------------
47546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner		bit 31		   bit 0
47646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
47746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd
47846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	or even of logb(x) have the following relations:
47946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
48046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	-------------------------------------------------
48146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	bit 27,26 of z1		bit 1,0 of x1	logb(x)
48246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	-------------------------------------------------
48346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	00			00		odd and even
48446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	01			01		even
48546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	10			10		odd
48646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	10			00		even
48746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	11			01		even
48846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner	-------------------------------------------------
48946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
49046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner    (4)	Special cases (see (4) of Section A).
49146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
49246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner */
49346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner
494