1#include <tommath.h> 2#ifdef BN_FAST_S_MP_SQR_C 3/* LibTomMath, multiple-precision integer library -- Tom St Denis 4 * 5 * LibTomMath is a library that provides multiple-precision 6 * integer arithmetic as well as number theoretic functionality. 7 * 8 * The library was designed directly after the MPI library by 9 * Michael Fromberger but has been written from scratch with 10 * additional optimizations in place. 11 * 12 * The library is free for all purposes without any express 13 * guarantee it works. 14 * 15 * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com 16 */ 17 18/* the jist of squaring... 19 * you do like mult except the offset of the tmpx [one that 20 * starts closer to zero] can't equal the offset of tmpy. 21 * So basically you set up iy like before then you min it with 22 * (ty-tx) so that it never happens. You double all those 23 * you add in the inner loop 24 25After that loop you do the squares and add them in. 26*/ 27 28int fast_s_mp_sqr (mp_int * a, mp_int * b) 29{ 30 int olduse, res, pa, ix, iz; 31 mp_digit W[MP_WARRAY], *tmpx; 32 mp_word W1; 33 34 /* grow the destination as required */ 35 pa = a->used + a->used; 36 if (b->alloc < pa) { 37 if ((res = mp_grow (b, pa)) != MP_OKAY) { 38 return res; 39 } 40 } 41 42 /* number of output digits to produce */ 43 W1 = 0; 44 for (ix = 0; ix < pa; ix++) { 45 int tx, ty, iy; 46 mp_word _W; 47 mp_digit *tmpy; 48 49 /* clear counter */ 50 _W = 0; 51 52 /* get offsets into the two bignums */ 53 ty = MIN(a->used-1, ix); 54 tx = ix - ty; 55 56 /* setup temp aliases */ 57 tmpx = a->dp + tx; 58 tmpy = a->dp + ty; 59 60 /* this is the number of times the loop will iterrate, essentially 61 while (tx++ < a->used && ty-- >= 0) { ... } 62 */ 63 iy = MIN(a->used-tx, ty+1); 64 65 /* now for squaring tx can never equal ty 66 * we halve the distance since they approach at a rate of 2x 67 * and we have to round because odd cases need to be executed 68 */ 69 iy = MIN(iy, (ty-tx+1)>>1); 70 71 /* execute loop */ 72 for (iz = 0; iz < iy; iz++) { 73 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); 74 } 75 76 /* double the inner product and add carry */ 77 _W = _W + _W + W1; 78 79 /* even columns have the square term in them */ 80 if ((ix&1) == 0) { 81 _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]); 82 } 83 84 /* store it */ 85 W[ix] = (mp_digit)(_W & MP_MASK); 86 87 /* make next carry */ 88 W1 = _W >> ((mp_word)DIGIT_BIT); 89 } 90 91 /* setup dest */ 92 olduse = b->used; 93 b->used = a->used+a->used; 94 95 { 96 mp_digit *tmpb; 97 tmpb = b->dp; 98 for (ix = 0; ix < pa; ix++) { 99 *tmpb++ = W[ix] & MP_MASK; 100 } 101 102 /* clear unused digits [that existed in the old copy of c] */ 103 for (; ix < olduse; ix++) { 104 *tmpb++ = 0; 105 } 106 } 107 mp_clamp (b); 108 return MP_OKAY; 109} 110#endif 111 112/* $Source: /cvs/libtom/libtommath/bn_fast_s_mp_sqr.c,v $ */ 113/* $Revision: 1.3 $ */ 114/* $Date: 2006/03/31 14:18:44 $ */ 115