1/*
2 * Minimal code for RSA support from LibTomMath 0.3.9
3 * http://math.libtomcrypt.com/
4 * http://math.libtomcrypt.com/files/ltm-0.39.tar.bz2
5 * This library was released in public domain by Tom St Denis.
6 *
7 * The combination in this file is not using many of the optimized algorithms
8 * (e.g., Montgomery reduction) and is considerable slower than the LibTomMath
9 * with its default of SC_RSA_1 settins. The main purpose of having this
10 * version here is to make it easier to build bignum.c wrapper without having
11 * to install and build an external library. However, it is likely worth the
12 * effort to use the full library with SC_RSA_1 instead of this minimized copy.
13 * Including the optimized algorithms may increase the size requirements by
14 * 15 kB or so (measured with x86 build).
15 *
16 * If CONFIG_INTERNAL_LIBTOMMATH is defined, bignum.c includes this
17 * libtommath.c file instead of using the external LibTomMath library.
18 */
19
20#ifndef CHAR_BIT
21#define CHAR_BIT 8
22#endif
23
24#define BN_MP_INVMOD_C
25#define BN_S_MP_EXPTMOD_C /* Note: #undef in tommath_superclass.h; this would
26			   * require BN_MP_EXPTMOD_FAST_C instead */
27#define BN_S_MP_MUL_DIGS_C
28#define BN_MP_INVMOD_SLOW_C
29#define BN_S_MP_SQR_C
30#define BN_S_MP_MUL_HIGH_DIGS_C /* Note: #undef in tommath_superclass.h; this
31				 * would require other than mp_reduce */
32
33
34/* from tommath.h */
35
36#ifndef MIN
37   #define MIN(x,y) ((x)<(y)?(x):(y))
38#endif
39
40#ifndef MAX
41   #define MAX(x,y) ((x)>(y)?(x):(y))
42#endif
43
44#define  OPT_CAST(x)
45
46typedef unsigned long mp_digit;
47typedef u64 mp_word;
48
49#define DIGIT_BIT          28
50#define MP_28BIT
51
52
53#define XMALLOC  os_malloc
54#define XFREE    os_free
55#define XREALLOC os_realloc
56
57
58#define MP_MASK          ((((mp_digit)1)<<((mp_digit)DIGIT_BIT))-((mp_digit)1))
59
60#define MP_LT        -1   /* less than */
61#define MP_EQ         0   /* equal to */
62#define MP_GT         1   /* greater than */
63
64#define MP_ZPOS       0   /* positive integer */
65#define MP_NEG        1   /* negative */
66
67#define MP_OKAY       0   /* ok result */
68#define MP_MEM        -2  /* out of mem */
69#define MP_VAL        -3  /* invalid input */
70
71#define MP_YES        1   /* yes response */
72#define MP_NO         0   /* no response */
73
74typedef int           mp_err;
75
76/* define this to use lower memory usage routines (exptmods mostly) */
77#define MP_LOW_MEM
78
79/* default precision */
80#ifndef MP_PREC
81   #ifndef MP_LOW_MEM
82      #define MP_PREC                 32     /* default digits of precision */
83   #else
84      #define MP_PREC                 8      /* default digits of precision */
85   #endif
86#endif
87
88/* size of comba arrays, should be at least 2 * 2**(BITS_PER_WORD - BITS_PER_DIGIT*2) */
89#define MP_WARRAY               (1 << (sizeof(mp_word) * CHAR_BIT - 2 * DIGIT_BIT + 1))
90
91/* the infamous mp_int structure */
92typedef struct  {
93    int used, alloc, sign;
94    mp_digit *dp;
95} mp_int;
96
97
98/* ---> Basic Manipulations <--- */
99#define mp_iszero(a) (((a)->used == 0) ? MP_YES : MP_NO)
100#define mp_iseven(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 0)) ? MP_YES : MP_NO)
101#define mp_isodd(a)  (((a)->used > 0 && (((a)->dp[0] & 1) == 1)) ? MP_YES : MP_NO)
102
103
104/* prototypes for copied functions */
105#define s_mp_mul(a, b, c) s_mp_mul_digs(a, b, c, (a)->used + (b)->used + 1)
106static int s_mp_exptmod(mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode);
107static int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs);
108static int s_mp_sqr(mp_int * a, mp_int * b);
109static int s_mp_mul_high_digs(mp_int * a, mp_int * b, mp_int * c, int digs);
110
111static int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs);
112
113static int mp_init_multi(mp_int *mp, ...);
114static void mp_clear_multi(mp_int *mp, ...);
115static int mp_lshd(mp_int * a, int b);
116static void mp_set(mp_int * a, mp_digit b);
117static void mp_clamp(mp_int * a);
118static void mp_exch(mp_int * a, mp_int * b);
119static void mp_rshd(mp_int * a, int b);
120static void mp_zero(mp_int * a);
121static int mp_mod_2d(mp_int * a, int b, mp_int * c);
122static int mp_div_2d(mp_int * a, int b, mp_int * c, mp_int * d);
123static int mp_init_copy(mp_int * a, mp_int * b);
124static int mp_mul_2d(mp_int * a, int b, mp_int * c);
125static int mp_div_2(mp_int * a, mp_int * b);
126static int mp_copy(mp_int * a, mp_int * b);
127static int mp_count_bits(mp_int * a);
128static int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d);
129static int mp_mod(mp_int * a, mp_int * b, mp_int * c);
130static int mp_grow(mp_int * a, int size);
131static int mp_cmp_mag(mp_int * a, mp_int * b);
132static int mp_invmod(mp_int * a, mp_int * b, mp_int * c);
133static int mp_abs(mp_int * a, mp_int * b);
134static int mp_invmod_slow(mp_int * a, mp_int * b, mp_int * c);
135static int mp_sqr(mp_int * a, mp_int * b);
136static int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d);
137static int mp_reduce_2k_setup_l(mp_int *a, mp_int *d);
138static int mp_2expt(mp_int * a, int b);
139static int mp_reduce_setup(mp_int * a, mp_int * b);
140static int mp_reduce(mp_int * x, mp_int * m, mp_int * mu);
141static int mp_init_size(mp_int * a, int size);
142
143
144
145/* functions from bn_<func name>.c */
146
147
148/* reverse an array, used for radix code */
149static void bn_reverse (unsigned char *s, int len)
150{
151  int     ix, iy;
152  unsigned char t;
153
154  ix = 0;
155  iy = len - 1;
156  while (ix < iy) {
157    t     = s[ix];
158    s[ix] = s[iy];
159    s[iy] = t;
160    ++ix;
161    --iy;
162  }
163}
164
165
166/* low level addition, based on HAC pp.594, Algorithm 14.7 */
167static int s_mp_add (mp_int * a, mp_int * b, mp_int * c)
168{
169  mp_int *x;
170  int     olduse, res, min, max;
171
172  /* find sizes, we let |a| <= |b| which means we have to sort
173   * them.  "x" will point to the input with the most digits
174   */
175  if (a->used > b->used) {
176    min = b->used;
177    max = a->used;
178    x = a;
179  } else {
180    min = a->used;
181    max = b->used;
182    x = b;
183  }
184
185  /* init result */
186  if (c->alloc < max + 1) {
187    if ((res = mp_grow (c, max + 1)) != MP_OKAY) {
188      return res;
189    }
190  }
191
192  /* get old used digit count and set new one */
193  olduse = c->used;
194  c->used = max + 1;
195
196  {
197    register mp_digit u, *tmpa, *tmpb, *tmpc;
198    register int i;
199
200    /* alias for digit pointers */
201
202    /* first input */
203    tmpa = a->dp;
204
205    /* second input */
206    tmpb = b->dp;
207
208    /* destination */
209    tmpc = c->dp;
210
211    /* zero the carry */
212    u = 0;
213    for (i = 0; i < min; i++) {
214      /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
215      *tmpc = *tmpa++ + *tmpb++ + u;
216
217      /* U = carry bit of T[i] */
218      u = *tmpc >> ((mp_digit)DIGIT_BIT);
219
220      /* take away carry bit from T[i] */
221      *tmpc++ &= MP_MASK;
222    }
223
224    /* now copy higher words if any, that is in A+B
225     * if A or B has more digits add those in
226     */
227    if (min != max) {
228      for (; i < max; i++) {
229        /* T[i] = X[i] + U */
230        *tmpc = x->dp[i] + u;
231
232        /* U = carry bit of T[i] */
233        u = *tmpc >> ((mp_digit)DIGIT_BIT);
234
235        /* take away carry bit from T[i] */
236        *tmpc++ &= MP_MASK;
237      }
238    }
239
240    /* add carry */
241    *tmpc++ = u;
242
243    /* clear digits above oldused */
244    for (i = c->used; i < olduse; i++) {
245      *tmpc++ = 0;
246    }
247  }
248
249  mp_clamp (c);
250  return MP_OKAY;
251}
252
253
254/* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */
255static int s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
256{
257  int     olduse, res, min, max;
258
259  /* find sizes */
260  min = b->used;
261  max = a->used;
262
263  /* init result */
264  if (c->alloc < max) {
265    if ((res = mp_grow (c, max)) != MP_OKAY) {
266      return res;
267    }
268  }
269  olduse = c->used;
270  c->used = max;
271
272  {
273    register mp_digit u, *tmpa, *tmpb, *tmpc;
274    register int i;
275
276    /* alias for digit pointers */
277    tmpa = a->dp;
278    tmpb = b->dp;
279    tmpc = c->dp;
280
281    /* set carry to zero */
282    u = 0;
283    for (i = 0; i < min; i++) {
284      /* T[i] = A[i] - B[i] - U */
285      *tmpc = *tmpa++ - *tmpb++ - u;
286
287      /* U = carry bit of T[i]
288       * Note this saves performing an AND operation since
289       * if a carry does occur it will propagate all the way to the
290       * MSB.  As a result a single shift is enough to get the carry
291       */
292      u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
293
294      /* Clear carry from T[i] */
295      *tmpc++ &= MP_MASK;
296    }
297
298    /* now copy higher words if any, e.g. if A has more digits than B  */
299    for (; i < max; i++) {
300      /* T[i] = A[i] - U */
301      *tmpc = *tmpa++ - u;
302
303      /* U = carry bit of T[i] */
304      u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
305
306      /* Clear carry from T[i] */
307      *tmpc++ &= MP_MASK;
308    }
309
310    /* clear digits above used (since we may not have grown result above) */
311    for (i = c->used; i < olduse; i++) {
312      *tmpc++ = 0;
313    }
314  }
315
316  mp_clamp (c);
317  return MP_OKAY;
318}
319
320
321/* init a new mp_int */
322static int mp_init (mp_int * a)
323{
324  int i;
325
326  /* allocate memory required and clear it */
327  a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * MP_PREC);
328  if (a->dp == NULL) {
329    return MP_MEM;
330  }
331
332  /* set the digits to zero */
333  for (i = 0; i < MP_PREC; i++) {
334      a->dp[i] = 0;
335  }
336
337  /* set the used to zero, allocated digits to the default precision
338   * and sign to positive */
339  a->used  = 0;
340  a->alloc = MP_PREC;
341  a->sign  = MP_ZPOS;
342
343  return MP_OKAY;
344}
345
346
347/* clear one (frees)  */
348static void mp_clear (mp_int * a)
349{
350  int i;
351
352  /* only do anything if a hasn't been freed previously */
353  if (a->dp != NULL) {
354    /* first zero the digits */
355    for (i = 0; i < a->used; i++) {
356        a->dp[i] = 0;
357    }
358
359    /* free ram */
360    XFREE(a->dp);
361
362    /* reset members to make debugging easier */
363    a->dp    = NULL;
364    a->alloc = a->used = 0;
365    a->sign  = MP_ZPOS;
366  }
367}
368
369
370/* high level addition (handles signs) */
371static int mp_add (mp_int * a, mp_int * b, mp_int * c)
372{
373  int     sa, sb, res;
374
375  /* get sign of both inputs */
376  sa = a->sign;
377  sb = b->sign;
378
379  /* handle two cases, not four */
380  if (sa == sb) {
381    /* both positive or both negative */
382    /* add their magnitudes, copy the sign */
383    c->sign = sa;
384    res = s_mp_add (a, b, c);
385  } else {
386    /* one positive, the other negative */
387    /* subtract the one with the greater magnitude from */
388    /* the one of the lesser magnitude.  The result gets */
389    /* the sign of the one with the greater magnitude. */
390    if (mp_cmp_mag (a, b) == MP_LT) {
391      c->sign = sb;
392      res = s_mp_sub (b, a, c);
393    } else {
394      c->sign = sa;
395      res = s_mp_sub (a, b, c);
396    }
397  }
398  return res;
399}
400
401
402/* high level subtraction (handles signs) */
403static int mp_sub (mp_int * a, mp_int * b, mp_int * c)
404{
405  int     sa, sb, res;
406
407  sa = a->sign;
408  sb = b->sign;
409
410  if (sa != sb) {
411    /* subtract a negative from a positive, OR */
412    /* subtract a positive from a negative. */
413    /* In either case, ADD their magnitudes, */
414    /* and use the sign of the first number. */
415    c->sign = sa;
416    res = s_mp_add (a, b, c);
417  } else {
418    /* subtract a positive from a positive, OR */
419    /* subtract a negative from a negative. */
420    /* First, take the difference between their */
421    /* magnitudes, then... */
422    if (mp_cmp_mag (a, b) != MP_LT) {
423      /* Copy the sign from the first */
424      c->sign = sa;
425      /* The first has a larger or equal magnitude */
426      res = s_mp_sub (a, b, c);
427    } else {
428      /* The result has the *opposite* sign from */
429      /* the first number. */
430      c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
431      /* The second has a larger magnitude */
432      res = s_mp_sub (b, a, c);
433    }
434  }
435  return res;
436}
437
438
439/* high level multiplication (handles sign) */
440static int mp_mul (mp_int * a, mp_int * b, mp_int * c)
441{
442  int     res, neg;
443  neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
444
445  /* use Toom-Cook? */
446#ifdef BN_MP_TOOM_MUL_C
447  if (MIN (a->used, b->used) >= TOOM_MUL_CUTOFF) {
448    res = mp_toom_mul(a, b, c);
449  } else
450#endif
451#ifdef BN_MP_KARATSUBA_MUL_C
452  /* use Karatsuba? */
453  if (MIN (a->used, b->used) >= KARATSUBA_MUL_CUTOFF) {
454    res = mp_karatsuba_mul (a, b, c);
455  } else
456#endif
457  {
458    /* can we use the fast multiplier?
459     *
460     * The fast multiplier can be used if the output will
461     * have less than MP_WARRAY digits and the number of
462     * digits won't affect carry propagation
463     */
464#ifdef BN_FAST_S_MP_MUL_DIGS_C
465    int     digs = a->used + b->used + 1;
466
467    if ((digs < MP_WARRAY) &&
468        MIN(a->used, b->used) <=
469        (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
470      res = fast_s_mp_mul_digs (a, b, c, digs);
471    } else
472#endif
473#ifdef BN_S_MP_MUL_DIGS_C
474      res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */
475#else
476#error mp_mul could fail
477      res = MP_VAL;
478#endif
479
480  }
481  c->sign = (c->used > 0) ? neg : MP_ZPOS;
482  return res;
483}
484
485
486/* d = a * b (mod c) */
487static int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
488{
489  int     res;
490  mp_int  t;
491
492  if ((res = mp_init (&t)) != MP_OKAY) {
493    return res;
494  }
495
496  if ((res = mp_mul (a, b, &t)) != MP_OKAY) {
497    mp_clear (&t);
498    return res;
499  }
500  res = mp_mod (&t, c, d);
501  mp_clear (&t);
502  return res;
503}
504
505
506/* c = a mod b, 0 <= c < b */
507static int mp_mod (mp_int * a, mp_int * b, mp_int * c)
508{
509  mp_int  t;
510  int     res;
511
512  if ((res = mp_init (&t)) != MP_OKAY) {
513    return res;
514  }
515
516  if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) {
517    mp_clear (&t);
518    return res;
519  }
520
521  if (t.sign != b->sign) {
522    res = mp_add (b, &t, c);
523  } else {
524    res = MP_OKAY;
525    mp_exch (&t, c);
526  }
527
528  mp_clear (&t);
529  return res;
530}
531
532
533/* this is a shell function that calls either the normal or Montgomery
534 * exptmod functions.  Originally the call to the montgomery code was
535 * embedded in the normal function but that wasted alot of stack space
536 * for nothing (since 99% of the time the Montgomery code would be called)
537 */
538static int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
539{
540  int dr;
541
542  /* modulus P must be positive */
543  if (P->sign == MP_NEG) {
544     return MP_VAL;
545  }
546
547  /* if exponent X is negative we have to recurse */
548  if (X->sign == MP_NEG) {
549#ifdef BN_MP_INVMOD_C
550     mp_int tmpG, tmpX;
551     int err;
552
553     /* first compute 1/G mod P */
554     if ((err = mp_init(&tmpG)) != MP_OKAY) {
555        return err;
556     }
557     if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {
558        mp_clear(&tmpG);
559        return err;
560     }
561
562     /* now get |X| */
563     if ((err = mp_init(&tmpX)) != MP_OKAY) {
564        mp_clear(&tmpG);
565        return err;
566     }
567     if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
568        mp_clear_multi(&tmpG, &tmpX, NULL);
569        return err;
570     }
571
572     /* and now compute (1/G)**|X| instead of G**X [X < 0] */
573     err = mp_exptmod(&tmpG, &tmpX, P, Y);
574     mp_clear_multi(&tmpG, &tmpX, NULL);
575     return err;
576#else
577#error mp_exptmod would always fail
578     /* no invmod */
579     return MP_VAL;
580#endif
581  }
582
583/* modified diminished radix reduction */
584#if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) && defined(BN_S_MP_EXPTMOD_C)
585  if (mp_reduce_is_2k_l(P) == MP_YES) {
586     return s_mp_exptmod(G, X, P, Y, 1);
587  }
588#endif
589
590#ifdef BN_MP_DR_IS_MODULUS_C
591  /* is it a DR modulus? */
592  dr = mp_dr_is_modulus(P);
593#else
594  /* default to no */
595  dr = 0;
596#endif
597
598#ifdef BN_MP_REDUCE_IS_2K_C
599  /* if not, is it a unrestricted DR modulus? */
600  if (dr == 0) {
601     dr = mp_reduce_is_2k(P) << 1;
602  }
603#endif
604
605  /* if the modulus is odd or dr != 0 use the montgomery method */
606#ifdef BN_MP_EXPTMOD_FAST_C
607  if (mp_isodd (P) == 1 || dr !=  0) {
608    return mp_exptmod_fast (G, X, P, Y, dr);
609  } else {
610#endif
611#ifdef BN_S_MP_EXPTMOD_C
612    /* otherwise use the generic Barrett reduction technique */
613    return s_mp_exptmod (G, X, P, Y, 0);
614#else
615#error mp_exptmod could fail
616    /* no exptmod for evens */
617    return MP_VAL;
618#endif
619#ifdef BN_MP_EXPTMOD_FAST_C
620  }
621#endif
622}
623
624
625/* compare two ints (signed)*/
626static int mp_cmp (mp_int * a, mp_int * b)
627{
628  /* compare based on sign */
629  if (a->sign != b->sign) {
630     if (a->sign == MP_NEG) {
631        return MP_LT;
632     } else {
633        return MP_GT;
634     }
635  }
636
637  /* compare digits */
638  if (a->sign == MP_NEG) {
639     /* if negative compare opposite direction */
640     return mp_cmp_mag(b, a);
641  } else {
642     return mp_cmp_mag(a, b);
643  }
644}
645
646
647/* compare a digit */
648static int mp_cmp_d(mp_int * a, mp_digit b)
649{
650  /* compare based on sign */
651  if (a->sign == MP_NEG) {
652    return MP_LT;
653  }
654
655  /* compare based on magnitude */
656  if (a->used > 1) {
657    return MP_GT;
658  }
659
660  /* compare the only digit of a to b */
661  if (a->dp[0] > b) {
662    return MP_GT;
663  } else if (a->dp[0] < b) {
664    return MP_LT;
665  } else {
666    return MP_EQ;
667  }
668}
669
670
671/* hac 14.61, pp608 */
672static int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
673{
674  /* b cannot be negative */
675  if (b->sign == MP_NEG || mp_iszero(b) == 1) {
676    return MP_VAL;
677  }
678
679#ifdef BN_FAST_MP_INVMOD_C
680  /* if the modulus is odd we can use a faster routine instead */
681  if (mp_isodd (b) == 1) {
682    return fast_mp_invmod (a, b, c);
683  }
684#endif
685
686#ifdef BN_MP_INVMOD_SLOW_C
687  return mp_invmod_slow(a, b, c);
688#endif
689
690#ifndef BN_FAST_MP_INVMOD_C
691#ifndef BN_MP_INVMOD_SLOW_C
692#error mp_invmod would always fail
693#endif
694#endif
695  return MP_VAL;
696}
697
698
699/* get the size for an unsigned equivalent */
700static int mp_unsigned_bin_size (mp_int * a)
701{
702  int     size = mp_count_bits (a);
703  return (size / 8 + ((size & 7) != 0 ? 1 : 0));
704}
705
706
707/* hac 14.61, pp608 */
708static int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c)
709{
710  mp_int  x, y, u, v, A, B, C, D;
711  int     res;
712
713  /* b cannot be negative */
714  if (b->sign == MP_NEG || mp_iszero(b) == 1) {
715    return MP_VAL;
716  }
717
718  /* init temps */
719  if ((res = mp_init_multi(&x, &y, &u, &v,
720                           &A, &B, &C, &D, NULL)) != MP_OKAY) {
721     return res;
722  }
723
724  /* x = a, y = b */
725  if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
726      goto LBL_ERR;
727  }
728  if ((res = mp_copy (b, &y)) != MP_OKAY) {
729    goto LBL_ERR;
730  }
731
732  /* 2. [modified] if x,y are both even then return an error! */
733  if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) {
734    res = MP_VAL;
735    goto LBL_ERR;
736  }
737
738  /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
739  if ((res = mp_copy (&x, &u)) != MP_OKAY) {
740    goto LBL_ERR;
741  }
742  if ((res = mp_copy (&y, &v)) != MP_OKAY) {
743    goto LBL_ERR;
744  }
745  mp_set (&A, 1);
746  mp_set (&D, 1);
747
748top:
749  /* 4.  while u is even do */
750  while (mp_iseven (&u) == 1) {
751    /* 4.1 u = u/2 */
752    if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
753      goto LBL_ERR;
754    }
755    /* 4.2 if A or B is odd then */
756    if (mp_isodd (&A) == 1 || mp_isodd (&B) == 1) {
757      /* A = (A+y)/2, B = (B-x)/2 */
758      if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
759         goto LBL_ERR;
760      }
761      if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
762         goto LBL_ERR;
763      }
764    }
765    /* A = A/2, B = B/2 */
766    if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
767      goto LBL_ERR;
768    }
769    if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
770      goto LBL_ERR;
771    }
772  }
773
774  /* 5.  while v is even do */
775  while (mp_iseven (&v) == 1) {
776    /* 5.1 v = v/2 */
777    if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
778      goto LBL_ERR;
779    }
780    /* 5.2 if C or D is odd then */
781    if (mp_isodd (&C) == 1 || mp_isodd (&D) == 1) {
782      /* C = (C+y)/2, D = (D-x)/2 */
783      if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
784         goto LBL_ERR;
785      }
786      if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
787         goto LBL_ERR;
788      }
789    }
790    /* C = C/2, D = D/2 */
791    if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
792      goto LBL_ERR;
793    }
794    if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
795      goto LBL_ERR;
796    }
797  }
798
799  /* 6.  if u >= v then */
800  if (mp_cmp (&u, &v) != MP_LT) {
801    /* u = u - v, A = A - C, B = B - D */
802    if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
803      goto LBL_ERR;
804    }
805
806    if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
807      goto LBL_ERR;
808    }
809
810    if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
811      goto LBL_ERR;
812    }
813  } else {
814    /* v - v - u, C = C - A, D = D - B */
815    if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
816      goto LBL_ERR;
817    }
818
819    if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
820      goto LBL_ERR;
821    }
822
823    if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
824      goto LBL_ERR;
825    }
826  }
827
828  /* if not zero goto step 4 */
829  if (mp_iszero (&u) == 0)
830    goto top;
831
832  /* now a = C, b = D, gcd == g*v */
833
834  /* if v != 1 then there is no inverse */
835  if (mp_cmp_d (&v, 1) != MP_EQ) {
836    res = MP_VAL;
837    goto LBL_ERR;
838  }
839
840  /* if its too low */
841  while (mp_cmp_d(&C, 0) == MP_LT) {
842      if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
843         goto LBL_ERR;
844      }
845  }
846
847  /* too big */
848  while (mp_cmp_mag(&C, b) != MP_LT) {
849      if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
850         goto LBL_ERR;
851      }
852  }
853
854  /* C is now the inverse */
855  mp_exch (&C, c);
856  res = MP_OKAY;
857LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL);
858  return res;
859}
860
861
862/* compare maginitude of two ints (unsigned) */
863static int mp_cmp_mag (mp_int * a, mp_int * b)
864{
865  int     n;
866  mp_digit *tmpa, *tmpb;
867
868  /* compare based on # of non-zero digits */
869  if (a->used > b->used) {
870    return MP_GT;
871  }
872
873  if (a->used < b->used) {
874    return MP_LT;
875  }
876
877  /* alias for a */
878  tmpa = a->dp + (a->used - 1);
879
880  /* alias for b */
881  tmpb = b->dp + (a->used - 1);
882
883  /* compare based on digits  */
884  for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
885    if (*tmpa > *tmpb) {
886      return MP_GT;
887    }
888
889    if (*tmpa < *tmpb) {
890      return MP_LT;
891    }
892  }
893  return MP_EQ;
894}
895
896
897/* reads a unsigned char array, assumes the msb is stored first [big endian] */
898static int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
899{
900  int     res;
901
902  /* make sure there are at least two digits */
903  if (a->alloc < 2) {
904     if ((res = mp_grow(a, 2)) != MP_OKAY) {
905        return res;
906     }
907  }
908
909  /* zero the int */
910  mp_zero (a);
911
912  /* read the bytes in */
913  while (c-- > 0) {
914    if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
915      return res;
916    }
917
918#ifndef MP_8BIT
919      a->dp[0] |= *b++;
920      a->used += 1;
921#else
922      a->dp[0] = (*b & MP_MASK);
923      a->dp[1] |= ((*b++ >> 7U) & 1);
924      a->used += 2;
925#endif
926  }
927  mp_clamp (a);
928  return MP_OKAY;
929}
930
931
932/* store in unsigned [big endian] format */
933static int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
934{
935  int     x, res;
936  mp_int  t;
937
938  if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
939    return res;
940  }
941
942  x = 0;
943  while (mp_iszero (&t) == 0) {
944#ifndef MP_8BIT
945      b[x++] = (unsigned char) (t.dp[0] & 255);
946#else
947      b[x++] = (unsigned char) (t.dp[0] | ((t.dp[1] & 0x01) << 7));
948#endif
949    if ((res = mp_div_2d (&t, 8, &t, NULL)) != MP_OKAY) {
950      mp_clear (&t);
951      return res;
952    }
953  }
954  bn_reverse (b, x);
955  mp_clear (&t);
956  return MP_OKAY;
957}
958
959
960/* shift right by a certain bit count (store quotient in c, optional remainder in d) */
961static int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
962{
963  mp_digit D, r, rr;
964  int     x, res;
965  mp_int  t;
966
967
968  /* if the shift count is <= 0 then we do no work */
969  if (b <= 0) {
970    res = mp_copy (a, c);
971    if (d != NULL) {
972      mp_zero (d);
973    }
974    return res;
975  }
976
977  if ((res = mp_init (&t)) != MP_OKAY) {
978    return res;
979  }
980
981  /* get the remainder */
982  if (d != NULL) {
983    if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
984      mp_clear (&t);
985      return res;
986    }
987  }
988
989  /* copy */
990  if ((res = mp_copy (a, c)) != MP_OKAY) {
991    mp_clear (&t);
992    return res;
993  }
994
995  /* shift by as many digits in the bit count */
996  if (b >= (int)DIGIT_BIT) {
997    mp_rshd (c, b / DIGIT_BIT);
998  }
999
1000  /* shift any bit count < DIGIT_BIT */
1001  D = (mp_digit) (b % DIGIT_BIT);
1002  if (D != 0) {
1003    register mp_digit *tmpc, mask, shift;
1004
1005    /* mask */
1006    mask = (((mp_digit)1) << D) - 1;
1007
1008    /* shift for lsb */
1009    shift = DIGIT_BIT - D;
1010
1011    /* alias */
1012    tmpc = c->dp + (c->used - 1);
1013
1014    /* carry */
1015    r = 0;
1016    for (x = c->used - 1; x >= 0; x--) {
1017      /* get the lower  bits of this word in a temp */
1018      rr = *tmpc & mask;
1019
1020      /* shift the current word and mix in the carry bits from the previous word */
1021      *tmpc = (*tmpc >> D) | (r << shift);
1022      --tmpc;
1023
1024      /* set the carry to the carry bits of the current word found above */
1025      r = rr;
1026    }
1027  }
1028  mp_clamp (c);
1029  if (d != NULL) {
1030    mp_exch (&t, d);
1031  }
1032  mp_clear (&t);
1033  return MP_OKAY;
1034}
1035
1036
1037static int mp_init_copy (mp_int * a, mp_int * b)
1038{
1039  int     res;
1040
1041  if ((res = mp_init (a)) != MP_OKAY) {
1042    return res;
1043  }
1044  return mp_copy (b, a);
1045}
1046
1047
1048/* set to zero */
1049static void mp_zero (mp_int * a)
1050{
1051  int       n;
1052  mp_digit *tmp;
1053
1054  a->sign = MP_ZPOS;
1055  a->used = 0;
1056
1057  tmp = a->dp;
1058  for (n = 0; n < a->alloc; n++) {
1059     *tmp++ = 0;
1060  }
1061}
1062
1063
1064/* copy, b = a */
1065static int mp_copy (mp_int * a, mp_int * b)
1066{
1067  int     res, n;
1068
1069  /* if dst == src do nothing */
1070  if (a == b) {
1071    return MP_OKAY;
1072  }
1073
1074  /* grow dest */
1075  if (b->alloc < a->used) {
1076     if ((res = mp_grow (b, a->used)) != MP_OKAY) {
1077        return res;
1078     }
1079  }
1080
1081  /* zero b and copy the parameters over */
1082  {
1083    register mp_digit *tmpa, *tmpb;
1084
1085    /* pointer aliases */
1086
1087    /* source */
1088    tmpa = a->dp;
1089
1090    /* destination */
1091    tmpb = b->dp;
1092
1093    /* copy all the digits */
1094    for (n = 0; n < a->used; n++) {
1095      *tmpb++ = *tmpa++;
1096    }
1097
1098    /* clear high digits */
1099    for (; n < b->used; n++) {
1100      *tmpb++ = 0;
1101    }
1102  }
1103
1104  /* copy used count and sign */
1105  b->used = a->used;
1106  b->sign = a->sign;
1107  return MP_OKAY;
1108}
1109
1110
1111/* shift right a certain amount of digits */
1112static void mp_rshd (mp_int * a, int b)
1113{
1114  int     x;
1115
1116  /* if b <= 0 then ignore it */
1117  if (b <= 0) {
1118    return;
1119  }
1120
1121  /* if b > used then simply zero it and return */
1122  if (a->used <= b) {
1123    mp_zero (a);
1124    return;
1125  }
1126
1127  {
1128    register mp_digit *bottom, *top;
1129
1130    /* shift the digits down */
1131
1132    /* bottom */
1133    bottom = a->dp;
1134
1135    /* top [offset into digits] */
1136    top = a->dp + b;
1137
1138    /* this is implemented as a sliding window where
1139     * the window is b-digits long and digits from
1140     * the top of the window are copied to the bottom
1141     *
1142     * e.g.
1143
1144     b-2 | b-1 | b0 | b1 | b2 | ... | bb |   ---->
1145                 /\                   |      ---->
1146                  \-------------------/      ---->
1147     */
1148    for (x = 0; x < (a->used - b); x++) {
1149      *bottom++ = *top++;
1150    }
1151
1152    /* zero the top digits */
1153    for (; x < a->used; x++) {
1154      *bottom++ = 0;
1155    }
1156  }
1157
1158  /* remove excess digits */
1159  a->used -= b;
1160}
1161
1162
1163/* swap the elements of two integers, for cases where you can't simply swap the
1164 * mp_int pointers around
1165 */
1166static void mp_exch (mp_int * a, mp_int * b)
1167{
1168  mp_int  t;
1169
1170  t  = *a;
1171  *a = *b;
1172  *b = t;
1173}
1174
1175
1176/* trim unused digits
1177 *
1178 * This is used to ensure that leading zero digits are
1179 * trimed and the leading "used" digit will be non-zero
1180 * Typically very fast.  Also fixes the sign if there
1181 * are no more leading digits
1182 */
1183static void mp_clamp (mp_int * a)
1184{
1185  /* decrease used while the most significant digit is
1186   * zero.
1187   */
1188  while (a->used > 0 && a->dp[a->used - 1] == 0) {
1189    --(a->used);
1190  }
1191
1192  /* reset the sign flag if used == 0 */
1193  if (a->used == 0) {
1194    a->sign = MP_ZPOS;
1195  }
1196}
1197
1198
1199/* grow as required */
1200static int mp_grow (mp_int * a, int size)
1201{
1202  int     i;
1203  mp_digit *tmp;
1204
1205  /* if the alloc size is smaller alloc more ram */
1206  if (a->alloc < size) {
1207    /* ensure there are always at least MP_PREC digits extra on top */
1208    size += (MP_PREC * 2) - (size % MP_PREC);
1209
1210    /* reallocate the array a->dp
1211     *
1212     * We store the return in a temporary variable
1213     * in case the operation failed we don't want
1214     * to overwrite the dp member of a.
1215     */
1216    tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * size);
1217    if (tmp == NULL) {
1218      /* reallocation failed but "a" is still valid [can be freed] */
1219      return MP_MEM;
1220    }
1221
1222    /* reallocation succeeded so set a->dp */
1223    a->dp = tmp;
1224
1225    /* zero excess digits */
1226    i        = a->alloc;
1227    a->alloc = size;
1228    for (; i < a->alloc; i++) {
1229      a->dp[i] = 0;
1230    }
1231  }
1232  return MP_OKAY;
1233}
1234
1235
1236/* b = |a|
1237 *
1238 * Simple function copies the input and fixes the sign to positive
1239 */
1240static int mp_abs (mp_int * a, mp_int * b)
1241{
1242  int     res;
1243
1244  /* copy a to b */
1245  if (a != b) {
1246     if ((res = mp_copy (a, b)) != MP_OKAY) {
1247       return res;
1248     }
1249  }
1250
1251  /* force the sign of b to positive */
1252  b->sign = MP_ZPOS;
1253
1254  return MP_OKAY;
1255}
1256
1257
1258/* set to a digit */
1259static void mp_set (mp_int * a, mp_digit b)
1260{
1261  mp_zero (a);
1262  a->dp[0] = b & MP_MASK;
1263  a->used  = (a->dp[0] != 0) ? 1 : 0;
1264}
1265
1266
1267/* b = a/2 */
1268static int mp_div_2(mp_int * a, mp_int * b)
1269{
1270  int     x, res, oldused;
1271
1272  /* copy */
1273  if (b->alloc < a->used) {
1274    if ((res = mp_grow (b, a->used)) != MP_OKAY) {
1275      return res;
1276    }
1277  }
1278
1279  oldused = b->used;
1280  b->used = a->used;
1281  {
1282    register mp_digit r, rr, *tmpa, *tmpb;
1283
1284    /* source alias */
1285    tmpa = a->dp + b->used - 1;
1286
1287    /* dest alias */
1288    tmpb = b->dp + b->used - 1;
1289
1290    /* carry */
1291    r = 0;
1292    for (x = b->used - 1; x >= 0; x--) {
1293      /* get the carry for the next iteration */
1294      rr = *tmpa & 1;
1295
1296      /* shift the current digit, add in carry and store */
1297      *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
1298
1299      /* forward carry to next iteration */
1300      r = rr;
1301    }
1302
1303    /* zero excess digits */
1304    tmpb = b->dp + b->used;
1305    for (x = b->used; x < oldused; x++) {
1306      *tmpb++ = 0;
1307    }
1308  }
1309  b->sign = a->sign;
1310  mp_clamp (b);
1311  return MP_OKAY;
1312}
1313
1314
1315/* shift left by a certain bit count */
1316static int mp_mul_2d (mp_int * a, int b, mp_int * c)
1317{
1318  mp_digit d;
1319  int      res;
1320
1321  /* copy */
1322  if (a != c) {
1323     if ((res = mp_copy (a, c)) != MP_OKAY) {
1324       return res;
1325     }
1326  }
1327
1328  if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) {
1329     if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
1330       return res;
1331     }
1332  }
1333
1334  /* shift by as many digits in the bit count */
1335  if (b >= (int)DIGIT_BIT) {
1336    if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
1337      return res;
1338    }
1339  }
1340
1341  /* shift any bit count < DIGIT_BIT */
1342  d = (mp_digit) (b % DIGIT_BIT);
1343  if (d != 0) {
1344    register mp_digit *tmpc, shift, mask, r, rr;
1345    register int x;
1346
1347    /* bitmask for carries */
1348    mask = (((mp_digit)1) << d) - 1;
1349
1350    /* shift for msbs */
1351    shift = DIGIT_BIT - d;
1352
1353    /* alias */
1354    tmpc = c->dp;
1355
1356    /* carry */
1357    r    = 0;
1358    for (x = 0; x < c->used; x++) {
1359      /* get the higher bits of the current word */
1360      rr = (*tmpc >> shift) & mask;
1361
1362      /* shift the current word and OR in the carry */
1363      *tmpc = ((*tmpc << d) | r) & MP_MASK;
1364      ++tmpc;
1365
1366      /* set the carry to the carry bits of the current word */
1367      r = rr;
1368    }
1369
1370    /* set final carry */
1371    if (r != 0) {
1372       c->dp[(c->used)++] = r;
1373    }
1374  }
1375  mp_clamp (c);
1376  return MP_OKAY;
1377}
1378
1379
1380static int mp_init_multi(mp_int *mp, ...)
1381{
1382    mp_err res = MP_OKAY;      /* Assume ok until proven otherwise */
1383    int n = 0;                 /* Number of ok inits */
1384    mp_int* cur_arg = mp;
1385    va_list args;
1386
1387    va_start(args, mp);        /* init args to next argument from caller */
1388    while (cur_arg != NULL) {
1389        if (mp_init(cur_arg) != MP_OKAY) {
1390            /* Oops - error! Back-track and mp_clear what we already
1391               succeeded in init-ing, then return error.
1392            */
1393            va_list clean_args;
1394
1395            /* end the current list */
1396            va_end(args);
1397
1398            /* now start cleaning up */
1399            cur_arg = mp;
1400            va_start(clean_args, mp);
1401            while (n--) {
1402                mp_clear(cur_arg);
1403                cur_arg = va_arg(clean_args, mp_int*);
1404            }
1405            va_end(clean_args);
1406            res = MP_MEM;
1407            break;
1408        }
1409        n++;
1410        cur_arg = va_arg(args, mp_int*);
1411    }
1412    va_end(args);
1413    return res;                /* Assumed ok, if error flagged above. */
1414}
1415
1416
1417static void mp_clear_multi(mp_int *mp, ...)
1418{
1419    mp_int* next_mp = mp;
1420    va_list args;
1421    va_start(args, mp);
1422    while (next_mp != NULL) {
1423        mp_clear(next_mp);
1424        next_mp = va_arg(args, mp_int*);
1425    }
1426    va_end(args);
1427}
1428
1429
1430/* shift left a certain amount of digits */
1431static int mp_lshd (mp_int * a, int b)
1432{
1433  int     x, res;
1434
1435  /* if its less than zero return */
1436  if (b <= 0) {
1437    return MP_OKAY;
1438  }
1439
1440  /* grow to fit the new digits */
1441  if (a->alloc < a->used + b) {
1442     if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
1443       return res;
1444     }
1445  }
1446
1447  {
1448    register mp_digit *top, *bottom;
1449
1450    /* increment the used by the shift amount then copy upwards */
1451    a->used += b;
1452
1453    /* top */
1454    top = a->dp + a->used - 1;
1455
1456    /* base */
1457    bottom = a->dp + a->used - 1 - b;
1458
1459    /* much like mp_rshd this is implemented using a sliding window
1460     * except the window goes the otherway around.  Copying from
1461     * the bottom to the top.  see bn_mp_rshd.c for more info.
1462     */
1463    for (x = a->used - 1; x >= b; x--) {
1464      *top-- = *bottom--;
1465    }
1466
1467    /* zero the lower digits */
1468    top = a->dp;
1469    for (x = 0; x < b; x++) {
1470      *top++ = 0;
1471    }
1472  }
1473  return MP_OKAY;
1474}
1475
1476
1477/* returns the number of bits in an int */
1478static int mp_count_bits (mp_int * a)
1479{
1480  int     r;
1481  mp_digit q;
1482
1483  /* shortcut */
1484  if (a->used == 0) {
1485    return 0;
1486  }
1487
1488  /* get number of digits and add that */
1489  r = (a->used - 1) * DIGIT_BIT;
1490
1491  /* take the last digit and count the bits in it */
1492  q = a->dp[a->used - 1];
1493  while (q > ((mp_digit) 0)) {
1494    ++r;
1495    q >>= ((mp_digit) 1);
1496  }
1497  return r;
1498}
1499
1500
1501/* calc a value mod 2**b */
1502static int mp_mod_2d (mp_int * a, int b, mp_int * c)
1503{
1504  int     x, res;
1505
1506  /* if b is <= 0 then zero the int */
1507  if (b <= 0) {
1508    mp_zero (c);
1509    return MP_OKAY;
1510  }
1511
1512  /* if the modulus is larger than the value than return */
1513  if (b >= (int) (a->used * DIGIT_BIT)) {
1514    res = mp_copy (a, c);
1515    return res;
1516  }
1517
1518  /* copy */
1519  if ((res = mp_copy (a, c)) != MP_OKAY) {
1520    return res;
1521  }
1522
1523  /* zero digits above the last digit of the modulus */
1524  for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
1525    c->dp[x] = 0;
1526  }
1527  /* clear the digit that is not completely outside/inside the modulus */
1528  c->dp[b / DIGIT_BIT] &=
1529    (mp_digit) ((((mp_digit) 1) << (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1));
1530  mp_clamp (c);
1531  return MP_OKAY;
1532}
1533
1534
1535/* slower bit-bang division... also smaller */
1536static int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
1537{
1538   mp_int ta, tb, tq, q;
1539   int    res, n, n2;
1540
1541  /* is divisor zero ? */
1542  if (mp_iszero (b) == 1) {
1543    return MP_VAL;
1544  }
1545
1546  /* if a < b then q=0, r = a */
1547  if (mp_cmp_mag (a, b) == MP_LT) {
1548    if (d != NULL) {
1549      res = mp_copy (a, d);
1550    } else {
1551      res = MP_OKAY;
1552    }
1553    if (c != NULL) {
1554      mp_zero (c);
1555    }
1556    return res;
1557  }
1558
1559  /* init our temps */
1560  if ((res = mp_init_multi(&ta, &tb, &tq, &q, NULL) != MP_OKAY)) {
1561     return res;
1562  }
1563
1564
1565  mp_set(&tq, 1);
1566  n = mp_count_bits(a) - mp_count_bits(b);
1567  if (((res = mp_abs(a, &ta)) != MP_OKAY) ||
1568      ((res = mp_abs(b, &tb)) != MP_OKAY) ||
1569      ((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) ||
1570      ((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) {
1571      goto LBL_ERR;
1572  }
1573
1574  while (n-- >= 0) {
1575     if (mp_cmp(&tb, &ta) != MP_GT) {
1576        if (((res = mp_sub(&ta, &tb, &ta)) != MP_OKAY) ||
1577            ((res = mp_add(&q, &tq, &q)) != MP_OKAY)) {
1578           goto LBL_ERR;
1579        }
1580     }
1581     if (((res = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) ||
1582         ((res = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY)) {
1583           goto LBL_ERR;
1584     }
1585  }
1586
1587  /* now q == quotient and ta == remainder */
1588  n  = a->sign;
1589  n2 = (a->sign == b->sign ? MP_ZPOS : MP_NEG);
1590  if (c != NULL) {
1591     mp_exch(c, &q);
1592     c->sign  = (mp_iszero(c) == MP_YES) ? MP_ZPOS : n2;
1593  }
1594  if (d != NULL) {
1595     mp_exch(d, &ta);
1596     d->sign = (mp_iszero(d) == MP_YES) ? MP_ZPOS : n;
1597  }
1598LBL_ERR:
1599   mp_clear_multi(&ta, &tb, &tq, &q, NULL);
1600   return res;
1601}
1602
1603
1604#ifdef MP_LOW_MEM
1605   #define TAB_SIZE 32
1606#else
1607   #define TAB_SIZE 256
1608#endif
1609
1610static int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
1611{
1612  mp_int  M[TAB_SIZE], res, mu;
1613  mp_digit buf;
1614  int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
1615  int (*redux)(mp_int*,mp_int*,mp_int*);
1616
1617  /* find window size */
1618  x = mp_count_bits (X);
1619  if (x <= 7) {
1620    winsize = 2;
1621  } else if (x <= 36) {
1622    winsize = 3;
1623  } else if (x <= 140) {
1624    winsize = 4;
1625  } else if (x <= 450) {
1626    winsize = 5;
1627  } else if (x <= 1303) {
1628    winsize = 6;
1629  } else if (x <= 3529) {
1630    winsize = 7;
1631  } else {
1632    winsize = 8;
1633  }
1634
1635#ifdef MP_LOW_MEM
1636    if (winsize > 5) {
1637       winsize = 5;
1638    }
1639#endif
1640
1641  /* init M array */
1642  /* init first cell */
1643  if ((err = mp_init(&M[1])) != MP_OKAY) {
1644     return err;
1645  }
1646
1647  /* now init the second half of the array */
1648  for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
1649    if ((err = mp_init(&M[x])) != MP_OKAY) {
1650      for (y = 1<<(winsize-1); y < x; y++) {
1651        mp_clear (&M[y]);
1652      }
1653      mp_clear(&M[1]);
1654      return err;
1655    }
1656  }
1657
1658  /* create mu, used for Barrett reduction */
1659  if ((err = mp_init (&mu)) != MP_OKAY) {
1660    goto LBL_M;
1661  }
1662
1663  if (redmode == 0) {
1664     if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
1665        goto LBL_MU;
1666     }
1667     redux = mp_reduce;
1668  } else {
1669     if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) {
1670        goto LBL_MU;
1671     }
1672     redux = mp_reduce_2k_l;
1673  }
1674
1675  /* create M table
1676   *
1677   * The M table contains powers of the base,
1678   * e.g. M[x] = G**x mod P
1679   *
1680   * The first half of the table is not
1681   * computed though accept for M[0] and M[1]
1682   */
1683  if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
1684    goto LBL_MU;
1685  }
1686
1687  /* compute the value at M[1<<(winsize-1)] by squaring
1688   * M[1] (winsize-1) times
1689   */
1690  if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
1691    goto LBL_MU;
1692  }
1693
1694  for (x = 0; x < (winsize - 1); x++) {
1695    /* square it */
1696    if ((err = mp_sqr (&M[1 << (winsize - 1)],
1697                       &M[1 << (winsize - 1)])) != MP_OKAY) {
1698      goto LBL_MU;
1699    }
1700
1701    /* reduce modulo P */
1702    if ((err = redux (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
1703      goto LBL_MU;
1704    }
1705  }
1706
1707  /* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
1708   * for x = (2**(winsize - 1) + 1) to (2**winsize - 1)
1709   */
1710  for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
1711    if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
1712      goto LBL_MU;
1713    }
1714    if ((err = redux (&M[x], P, &mu)) != MP_OKAY) {
1715      goto LBL_MU;
1716    }
1717  }
1718
1719  /* setup result */
1720  if ((err = mp_init (&res)) != MP_OKAY) {
1721    goto LBL_MU;
1722  }
1723  mp_set (&res, 1);
1724
1725  /* set initial mode and bit cnt */
1726  mode   = 0;
1727  bitcnt = 1;
1728  buf    = 0;
1729  digidx = X->used - 1;
1730  bitcpy = 0;
1731  bitbuf = 0;
1732
1733  for (;;) {
1734    /* grab next digit as required */
1735    if (--bitcnt == 0) {
1736      /* if digidx == -1 we are out of digits */
1737      if (digidx == -1) {
1738        break;
1739      }
1740      /* read next digit and reset the bitcnt */
1741      buf    = X->dp[digidx--];
1742      bitcnt = (int) DIGIT_BIT;
1743    }
1744
1745    /* grab the next msb from the exponent */
1746    y     = (buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
1747    buf <<= (mp_digit)1;
1748
1749    /* if the bit is zero and mode == 0 then we ignore it
1750     * These represent the leading zero bits before the first 1 bit
1751     * in the exponent.  Technically this opt is not required but it
1752     * does lower the # of trivial squaring/reductions used
1753     */
1754    if (mode == 0 && y == 0) {
1755      continue;
1756    }
1757
1758    /* if the bit is zero and mode == 1 then we square */
1759    if (mode == 1 && y == 0) {
1760      if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
1761        goto LBL_RES;
1762      }
1763      if ((err = redux (&res, P, &mu)) != MP_OKAY) {
1764        goto LBL_RES;
1765      }
1766      continue;
1767    }
1768
1769    /* else we add it to the window */
1770    bitbuf |= (y << (winsize - ++bitcpy));
1771    mode    = 2;
1772
1773    if (bitcpy == winsize) {
1774      /* ok window is filled so square as required and multiply  */
1775      /* square first */
1776      for (x = 0; x < winsize; x++) {
1777        if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
1778          goto LBL_RES;
1779        }
1780        if ((err = redux (&res, P, &mu)) != MP_OKAY) {
1781          goto LBL_RES;
1782        }
1783      }
1784
1785      /* then multiply */
1786      if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
1787        goto LBL_RES;
1788      }
1789      if ((err = redux (&res, P, &mu)) != MP_OKAY) {
1790        goto LBL_RES;
1791      }
1792
1793      /* empty window and reset */
1794      bitcpy = 0;
1795      bitbuf = 0;
1796      mode   = 1;
1797    }
1798  }
1799
1800  /* if bits remain then square/multiply */
1801  if (mode == 2 && bitcpy > 0) {
1802    /* square then multiply if the bit is set */
1803    for (x = 0; x < bitcpy; x++) {
1804      if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
1805        goto LBL_RES;
1806      }
1807      if ((err = redux (&res, P, &mu)) != MP_OKAY) {
1808        goto LBL_RES;
1809      }
1810
1811      bitbuf <<= 1;
1812      if ((bitbuf & (1 << winsize)) != 0) {
1813        /* then multiply */
1814        if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
1815          goto LBL_RES;
1816        }
1817        if ((err = redux (&res, P, &mu)) != MP_OKAY) {
1818          goto LBL_RES;
1819        }
1820      }
1821    }
1822  }
1823
1824  mp_exch (&res, Y);
1825  err = MP_OKAY;
1826LBL_RES:mp_clear (&res);
1827LBL_MU:mp_clear (&mu);
1828LBL_M:
1829  mp_clear(&M[1]);
1830  for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
1831    mp_clear (&M[x]);
1832  }
1833  return err;
1834}
1835
1836
1837/* computes b = a*a */
1838static int mp_sqr (mp_int * a, mp_int * b)
1839{
1840  int     res;
1841
1842#ifdef BN_MP_TOOM_SQR_C
1843  /* use Toom-Cook? */
1844  if (a->used >= TOOM_SQR_CUTOFF) {
1845    res = mp_toom_sqr(a, b);
1846  /* Karatsuba? */
1847  } else
1848#endif
1849#ifdef BN_MP_KARATSUBA_SQR_C
1850if (a->used >= KARATSUBA_SQR_CUTOFF) {
1851    res = mp_karatsuba_sqr (a, b);
1852  } else
1853#endif
1854  {
1855#ifdef BN_FAST_S_MP_SQR_C
1856    /* can we use the fast comba multiplier? */
1857    if ((a->used * 2 + 1) < MP_WARRAY &&
1858         a->used <
1859         (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
1860      res = fast_s_mp_sqr (a, b);
1861    } else
1862#endif
1863#ifdef BN_S_MP_SQR_C
1864      res = s_mp_sqr (a, b);
1865#else
1866#error mp_sqr could fail
1867      res = MP_VAL;
1868#endif
1869  }
1870  b->sign = MP_ZPOS;
1871  return res;
1872}
1873
1874
1875/* reduces a modulo n where n is of the form 2**p - d
1876   This differs from reduce_2k since "d" can be larger
1877   than a single digit.
1878*/
1879static int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d)
1880{
1881   mp_int q;
1882   int    p, res;
1883
1884   if ((res = mp_init(&q)) != MP_OKAY) {
1885      return res;
1886   }
1887
1888   p = mp_count_bits(n);
1889top:
1890   /* q = a/2**p, a = a mod 2**p */
1891   if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
1892      goto ERR;
1893   }
1894
1895   /* q = q * d */
1896   if ((res = mp_mul(&q, d, &q)) != MP_OKAY) {
1897      goto ERR;
1898   }
1899
1900   /* a = a + q */
1901   if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
1902      goto ERR;
1903   }
1904
1905   if (mp_cmp_mag(a, n) != MP_LT) {
1906      s_mp_sub(a, n, a);
1907      goto top;
1908   }
1909
1910ERR:
1911   mp_clear(&q);
1912   return res;
1913}
1914
1915
1916/* determines the setup value */
1917static int mp_reduce_2k_setup_l(mp_int *a, mp_int *d)
1918{
1919   int    res;
1920   mp_int tmp;
1921
1922   if ((res = mp_init(&tmp)) != MP_OKAY) {
1923      return res;
1924   }
1925
1926   if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
1927      goto ERR;
1928   }
1929
1930   if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) {
1931      goto ERR;
1932   }
1933
1934ERR:
1935   mp_clear(&tmp);
1936   return res;
1937}
1938
1939
1940/* computes a = 2**b
1941 *
1942 * Simple algorithm which zeroes the int, grows it then just sets one bit
1943 * as required.
1944 */
1945static int mp_2expt (mp_int * a, int b)
1946{
1947  int     res;
1948
1949  /* zero a as per default */
1950  mp_zero (a);
1951
1952  /* grow a to accomodate the single bit */
1953  if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) {
1954    return res;
1955  }
1956
1957  /* set the used count of where the bit will go */
1958  a->used = b / DIGIT_BIT + 1;
1959
1960  /* put the single bit in its place */
1961  a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT);
1962
1963  return MP_OKAY;
1964}
1965
1966
1967/* pre-calculate the value required for Barrett reduction
1968 * For a given modulus "b" it calulates the value required in "a"
1969 */
1970static int mp_reduce_setup (mp_int * a, mp_int * b)
1971{
1972  int     res;
1973
1974  if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
1975    return res;
1976  }
1977  return mp_div (a, b, a, NULL);
1978}
1979
1980
1981/* reduces x mod m, assumes 0 < x < m**2, mu is
1982 * precomputed via mp_reduce_setup.
1983 * From HAC pp.604 Algorithm 14.42
1984 */
1985static int mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
1986{
1987  mp_int  q;
1988  int     res, um = m->used;
1989
1990  /* q = x */
1991  if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
1992    return res;
1993  }
1994
1995  /* q1 = x / b**(k-1)  */
1996  mp_rshd (&q, um - 1);
1997
1998  /* according to HAC this optimization is ok */
1999  if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
2000    if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
2001      goto CLEANUP;
2002    }
2003  } else {
2004#ifdef BN_S_MP_MUL_HIGH_DIGS_C
2005    if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
2006      goto CLEANUP;
2007    }
2008#elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C)
2009    if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
2010      goto CLEANUP;
2011    }
2012#else
2013    {
2014#error mp_reduce would always fail
2015      res = MP_VAL;
2016      goto CLEANUP;
2017    }
2018#endif
2019  }
2020
2021  /* q3 = q2 / b**(k+1) */
2022  mp_rshd (&q, um + 1);
2023
2024  /* x = x mod b**(k+1), quick (no division) */
2025  if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
2026    goto CLEANUP;
2027  }
2028
2029  /* q = q * m mod b**(k+1), quick (no division) */
2030  if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
2031    goto CLEANUP;
2032  }
2033
2034  /* x = x - q */
2035  if ((res = mp_sub (x, &q, x)) != MP_OKAY) {
2036    goto CLEANUP;
2037  }
2038
2039  /* If x < 0, add b**(k+1) to it */
2040  if (mp_cmp_d (x, 0) == MP_LT) {
2041    mp_set (&q, 1);
2042    if ((res = mp_lshd (&q, um + 1)) != MP_OKAY) {
2043      goto CLEANUP;
2044    }
2045    if ((res = mp_add (x, &q, x)) != MP_OKAY) {
2046      goto CLEANUP;
2047    }
2048  }
2049
2050  /* Back off if it's too big */
2051  while (mp_cmp (x, m) != MP_LT) {
2052    if ((res = s_mp_sub (x, m, x)) != MP_OKAY) {
2053      goto CLEANUP;
2054    }
2055  }
2056
2057CLEANUP:
2058  mp_clear (&q);
2059
2060  return res;
2061}
2062
2063
2064/* multiplies |a| * |b| and only computes upto digs digits of result
2065 * HAC pp. 595, Algorithm 14.12  Modified so you can control how
2066 * many digits of output are created.
2067 */
2068static int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
2069{
2070  mp_int  t;
2071  int     res, pa, pb, ix, iy;
2072  mp_digit u;
2073  mp_word r;
2074  mp_digit tmpx, *tmpt, *tmpy;
2075
2076  /* can we use the fast multiplier? */
2077  if (((digs) < MP_WARRAY) &&
2078      MIN (a->used, b->used) <
2079          (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2080    return fast_s_mp_mul_digs (a, b, c, digs);
2081  }
2082
2083  if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
2084    return res;
2085  }
2086  t.used = digs;
2087
2088  /* compute the digits of the product directly */
2089  pa = a->used;
2090  for (ix = 0; ix < pa; ix++) {
2091    /* set the carry to zero */
2092    u = 0;
2093
2094    /* limit ourselves to making digs digits of output */
2095    pb = MIN (b->used, digs - ix);
2096
2097    /* setup some aliases */
2098    /* copy of the digit from a used within the nested loop */
2099    tmpx = a->dp[ix];
2100
2101    /* an alias for the destination shifted ix places */
2102    tmpt = t.dp + ix;
2103
2104    /* an alias for the digits of b */
2105    tmpy = b->dp;
2106
2107    /* compute the columns of the output and propagate the carry */
2108    for (iy = 0; iy < pb; iy++) {
2109      /* compute the column as a mp_word */
2110      r       = ((mp_word)*tmpt) +
2111                ((mp_word)tmpx) * ((mp_word)*tmpy++) +
2112                ((mp_word) u);
2113
2114      /* the new column is the lower part of the result */
2115      *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
2116
2117      /* get the carry word from the result */
2118      u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
2119    }
2120    /* set carry if it is placed below digs */
2121    if (ix + iy < digs) {
2122      *tmpt = u;
2123    }
2124  }
2125
2126  mp_clamp (&t);
2127  mp_exch (&t, c);
2128
2129  mp_clear (&t);
2130  return MP_OKAY;
2131}
2132
2133
2134/* Fast (comba) multiplier
2135 *
2136 * This is the fast column-array [comba] multiplier.  It is
2137 * designed to compute the columns of the product first
2138 * then handle the carries afterwards.  This has the effect
2139 * of making the nested loops that compute the columns very
2140 * simple and schedulable on super-scalar processors.
2141 *
2142 * This has been modified to produce a variable number of
2143 * digits of output so if say only a half-product is required
2144 * you don't have to compute the upper half (a feature
2145 * required for fast Barrett reduction).
2146 *
2147 * Based on Algorithm 14.12 on pp.595 of HAC.
2148 *
2149 */
2150static int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
2151{
2152  int     olduse, res, pa, ix, iz;
2153  mp_digit W[MP_WARRAY];
2154  register mp_word  _W;
2155
2156  /* grow the destination as required */
2157  if (c->alloc < digs) {
2158    if ((res = mp_grow (c, digs)) != MP_OKAY) {
2159      return res;
2160    }
2161  }
2162
2163  /* number of output digits to produce */
2164  pa = MIN(digs, a->used + b->used);
2165
2166  /* clear the carry */
2167  _W = 0;
2168  for (ix = 0; ix < pa; ix++) {
2169      int      tx, ty;
2170      int      iy;
2171      mp_digit *tmpx, *tmpy;
2172
2173      /* get offsets into the two bignums */
2174      ty = MIN(b->used-1, ix);
2175      tx = ix - ty;
2176
2177      /* setup temp aliases */
2178      tmpx = a->dp + tx;
2179      tmpy = b->dp + ty;
2180
2181      /* this is the number of times the loop will iterrate, essentially
2182         while (tx++ < a->used && ty-- >= 0) { ... }
2183       */
2184      iy = MIN(a->used-tx, ty+1);
2185
2186      /* execute loop */
2187      for (iz = 0; iz < iy; ++iz) {
2188         _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
2189
2190      }
2191
2192      /* store term */
2193      W[ix] = ((mp_digit)_W) & MP_MASK;
2194
2195      /* make next carry */
2196      _W = _W >> ((mp_word)DIGIT_BIT);
2197 }
2198
2199  /* setup dest */
2200  olduse  = c->used;
2201  c->used = pa;
2202
2203  {
2204    register mp_digit *tmpc;
2205    tmpc = c->dp;
2206    for (ix = 0; ix < pa+1; ix++) {
2207      /* now extract the previous digit [below the carry] */
2208      *tmpc++ = W[ix];
2209    }
2210
2211    /* clear unused digits [that existed in the old copy of c] */
2212    for (; ix < olduse; ix++) {
2213      *tmpc++ = 0;
2214    }
2215  }
2216  mp_clamp (c);
2217  return MP_OKAY;
2218}
2219
2220
2221/* init an mp_init for a given size */
2222static int mp_init_size (mp_int * a, int size)
2223{
2224  int x;
2225
2226  /* pad size so there are always extra digits */
2227  size += (MP_PREC * 2) - (size % MP_PREC);
2228
2229  /* alloc mem */
2230  a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * size);
2231  if (a->dp == NULL) {
2232    return MP_MEM;
2233  }
2234
2235  /* set the members */
2236  a->used  = 0;
2237  a->alloc = size;
2238  a->sign  = MP_ZPOS;
2239
2240  /* zero the digits */
2241  for (x = 0; x < size; x++) {
2242      a->dp[x] = 0;
2243  }
2244
2245  return MP_OKAY;
2246}
2247
2248
2249/* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
2250static int s_mp_sqr (mp_int * a, mp_int * b)
2251{
2252  mp_int  t;
2253  int     res, ix, iy, pa;
2254  mp_word r;
2255  mp_digit u, tmpx, *tmpt;
2256
2257  pa = a->used;
2258  if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
2259    return res;
2260  }
2261
2262  /* default used is maximum possible size */
2263  t.used = 2*pa + 1;
2264
2265  for (ix = 0; ix < pa; ix++) {
2266    /* first calculate the digit at 2*ix */
2267    /* calculate double precision result */
2268    r = ((mp_word) t.dp[2*ix]) +
2269        ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
2270
2271    /* store lower part in result */
2272    t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
2273
2274    /* get the carry */
2275    u           = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
2276
2277    /* left hand side of A[ix] * A[iy] */
2278    tmpx        = a->dp[ix];
2279
2280    /* alias for where to store the results */
2281    tmpt        = t.dp + (2*ix + 1);
2282
2283    for (iy = ix + 1; iy < pa; iy++) {
2284      /* first calculate the product */
2285      r       = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
2286
2287      /* now calculate the double precision result, note we use
2288       * addition instead of *2 since it's easier to optimize
2289       */
2290      r       = ((mp_word) *tmpt) + r + r + ((mp_word) u);
2291
2292      /* store lower part */
2293      *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
2294
2295      /* get carry */
2296      u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
2297    }
2298    /* propagate upwards */
2299    while (u != ((mp_digit) 0)) {
2300      r       = ((mp_word) *tmpt) + ((mp_word) u);
2301      *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
2302      u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
2303    }
2304  }
2305
2306  mp_clamp (&t);
2307  mp_exch (&t, b);
2308  mp_clear (&t);
2309  return MP_OKAY;
2310}
2311
2312
2313/* multiplies |a| * |b| and does not compute the lower digs digits
2314 * [meant to get the higher part of the product]
2315 */
2316static int s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
2317{
2318  mp_int  t;
2319  int     res, pa, pb, ix, iy;
2320  mp_digit u;
2321  mp_word r;
2322  mp_digit tmpx, *tmpt, *tmpy;
2323
2324  /* can we use the fast multiplier? */
2325#ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C
2326  if (((a->used + b->used + 1) < MP_WARRAY)
2327      && MIN (a->used, b->used) < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2328    return fast_s_mp_mul_high_digs (a, b, c, digs);
2329  }
2330#endif
2331
2332  if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) {
2333    return res;
2334  }
2335  t.used = a->used + b->used + 1;
2336
2337  pa = a->used;
2338  pb = b->used;
2339  for (ix = 0; ix < pa; ix++) {
2340    /* clear the carry */
2341    u = 0;
2342
2343    /* left hand side of A[ix] * B[iy] */
2344    tmpx = a->dp[ix];
2345
2346    /* alias to the address of where the digits will be stored */
2347    tmpt = &(t.dp[digs]);
2348
2349    /* alias for where to read the right hand side from */
2350    tmpy = b->dp + (digs - ix);
2351
2352    for (iy = digs - ix; iy < pb; iy++) {
2353      /* calculate the double precision result */
2354      r       = ((mp_word)*tmpt) +
2355                ((mp_word)tmpx) * ((mp_word)*tmpy++) +
2356                ((mp_word) u);
2357
2358      /* get the lower part */
2359      *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
2360
2361      /* carry the carry */
2362      u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
2363    }
2364    *tmpt = u;
2365  }
2366  mp_clamp (&t);
2367  mp_exch (&t, c);
2368  mp_clear (&t);
2369  return MP_OKAY;
2370}
2371