1/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
2 * All rights reserved.
3 *
4 * This package is an SSL implementation written
5 * by Eric Young (eay@cryptsoft.com).
6 * The implementation was written so as to conform with Netscapes SSL.
7 *
8 * This library is free for commercial and non-commercial use as long as
9 * the following conditions are aheared to.  The following conditions
10 * apply to all code found in this distribution, be it the RC4, RSA,
11 * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
12 * included with this distribution is covered by the same copyright terms
13 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
14 *
15 * Copyright remains Eric Young's, and as such any Copyright notices in
16 * the code are not to be removed.
17 * If this package is used in a product, Eric Young should be given attribution
18 * as the author of the parts of the library used.
19 * This can be in the form of a textual message at program startup or
20 * in documentation (online or textual) provided with the package.
21 *
22 * Redistribution and use in source and binary forms, with or without
23 * modification, are permitted provided that the following conditions
24 * are met:
25 * 1. Redistributions of source code must retain the copyright
26 *    notice, this list of conditions and the following disclaimer.
27 * 2. Redistributions in binary form must reproduce the above copyright
28 *    notice, this list of conditions and the following disclaimer in the
29 *    documentation and/or other materials provided with the distribution.
30 * 3. All advertising materials mentioning features or use of this software
31 *    must display the following acknowledgement:
32 *    "This product includes cryptographic software written by
33 *     Eric Young (eay@cryptsoft.com)"
34 *    The word 'cryptographic' can be left out if the rouines from the library
35 *    being used are not cryptographic related :-).
36 * 4. If you include any Windows specific code (or a derivative thereof) from
37 *    the apps directory (application code) you must include an acknowledgement:
38 *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
39 *
40 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50 * SUCH DAMAGE.
51 *
52 * The licence and distribution terms for any publically available version or
53 * derivative of this code cannot be changed.  i.e. this code cannot simply be
54 * copied and put under another distribution licence
55 * [including the GNU Public Licence.] */
56
57#include <openssl/bn.h>
58
59#include <assert.h>
60#include <string.h>
61
62#include <openssl/err.h>
63#include <openssl/mem.h>
64
65#include "internal.h"
66#include "../../internal.h"
67
68
69#define BN_MUL_RECURSIVE_SIZE_NORMAL 16
70#define BN_SQR_RECURSIVE_SIZE_NORMAL BN_MUL_RECURSIVE_SIZE_NORMAL
71
72
73static void bn_mul_normal(BN_ULONG *r, const BN_ULONG *a, size_t na,
74                          const BN_ULONG *b, size_t nb) {
75  if (na < nb) {
76    size_t itmp = na;
77    na = nb;
78    nb = itmp;
79    const BN_ULONG *ltmp = a;
80    a = b;
81    b = ltmp;
82  }
83  BN_ULONG *rr = &(r[na]);
84  if (nb == 0) {
85    OPENSSL_memset(r, 0, na * sizeof(BN_ULONG));
86    return;
87  }
88  rr[0] = bn_mul_words(r, a, na, b[0]);
89
90  for (;;) {
91    if (--nb == 0) {
92      return;
93    }
94    rr[1] = bn_mul_add_words(&(r[1]), a, na, b[1]);
95    if (--nb == 0) {
96      return;
97    }
98    rr[2] = bn_mul_add_words(&(r[2]), a, na, b[2]);
99    if (--nb == 0) {
100      return;
101    }
102    rr[3] = bn_mul_add_words(&(r[3]), a, na, b[3]);
103    if (--nb == 0) {
104      return;
105    }
106    rr[4] = bn_mul_add_words(&(r[4]), a, na, b[4]);
107    rr += 4;
108    r += 4;
109    b += 4;
110  }
111}
112
113#if !defined(OPENSSL_X86) || defined(OPENSSL_NO_ASM)
114// Here follows specialised variants of bn_add_words() and bn_sub_words(). They
115// have the property performing operations on arrays of different sizes. The
116// sizes of those arrays is expressed through cl, which is the common length (
117// basicall, min(len(a),len(b)) ), and dl, which is the delta between the two
118// lengths, calculated as len(a)-len(b). All lengths are the number of
119// BN_ULONGs...  For the operations that require a result array as parameter,
120// it must have the length cl+abs(dl). These functions should probably end up
121// in bn_asm.c as soon as there are assembler counterparts for the systems that
122// use assembler files.
123
124static BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a,
125                                  const BN_ULONG *b, int cl, int dl) {
126  BN_ULONG c, t;
127
128  assert(cl >= 0);
129  c = bn_sub_words(r, a, b, cl);
130
131  if (dl == 0) {
132    return c;
133  }
134
135  r += cl;
136  a += cl;
137  b += cl;
138
139  if (dl < 0) {
140    for (;;) {
141      t = b[0];
142      r[0] = 0 - t - c;
143      if (t != 0) {
144        c = 1;
145      }
146      if (++dl >= 0) {
147        break;
148      }
149
150      t = b[1];
151      r[1] = 0 - t - c;
152      if (t != 0) {
153        c = 1;
154      }
155      if (++dl >= 0) {
156        break;
157      }
158
159      t = b[2];
160      r[2] = 0 - t - c;
161      if (t != 0) {
162        c = 1;
163      }
164      if (++dl >= 0) {
165        break;
166      }
167
168      t = b[3];
169      r[3] = 0 - t - c;
170      if (t != 0) {
171        c = 1;
172      }
173      if (++dl >= 0) {
174        break;
175      }
176
177      b += 4;
178      r += 4;
179    }
180  } else {
181    int save_dl = dl;
182    while (c) {
183      t = a[0];
184      r[0] = t - c;
185      if (t != 0) {
186        c = 0;
187      }
188      if (--dl <= 0) {
189        break;
190      }
191
192      t = a[1];
193      r[1] = t - c;
194      if (t != 0) {
195        c = 0;
196      }
197      if (--dl <= 0) {
198        break;
199      }
200
201      t = a[2];
202      r[2] = t - c;
203      if (t != 0) {
204        c = 0;
205      }
206      if (--dl <= 0) {
207        break;
208      }
209
210      t = a[3];
211      r[3] = t - c;
212      if (t != 0) {
213        c = 0;
214      }
215      if (--dl <= 0) {
216        break;
217      }
218
219      save_dl = dl;
220      a += 4;
221      r += 4;
222    }
223    if (dl > 0) {
224      if (save_dl > dl) {
225        switch (save_dl - dl) {
226          case 1:
227            r[1] = a[1];
228            if (--dl <= 0) {
229              break;
230            }
231            OPENSSL_FALLTHROUGH;
232          case 2:
233            r[2] = a[2];
234            if (--dl <= 0) {
235              break;
236            }
237            OPENSSL_FALLTHROUGH;
238          case 3:
239            r[3] = a[3];
240            if (--dl <= 0) {
241              break;
242            }
243        }
244        a += 4;
245        r += 4;
246      }
247    }
248
249    if (dl > 0) {
250      for (;;) {
251        r[0] = a[0];
252        if (--dl <= 0) {
253          break;
254        }
255        r[1] = a[1];
256        if (--dl <= 0) {
257          break;
258        }
259        r[2] = a[2];
260        if (--dl <= 0) {
261          break;
262        }
263        r[3] = a[3];
264        if (--dl <= 0) {
265          break;
266        }
267
268        a += 4;
269        r += 4;
270      }
271    }
272  }
273
274  return c;
275}
276#else
277// On other platforms the function is defined in asm.
278BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
279                           int cl, int dl);
280#endif
281
282// Karatsuba recursive multiplication algorithm
283// (cf. Knuth, The Art of Computer Programming, Vol. 2)
284
285// r is 2*n2 words in size,
286// a and b are both n2 words in size.
287// n2 must be a power of 2.
288// We multiply and return the result.
289// t must be 2*n2 words in size
290// We calculate
291// a[0]*b[0]
292// a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
293// a[1]*b[1]
294// dnX may not be positive, but n2/2+dnX has to be
295static void bn_mul_recursive(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
296                             int n2, int dna, int dnb, BN_ULONG *t) {
297  int n = n2 / 2, c1, c2;
298  int tna = n + dna, tnb = n + dnb;
299  unsigned int neg, zero;
300  BN_ULONG ln, lo, *p;
301
302  // Only call bn_mul_comba 8 if n2 == 8 and the
303  // two arrays are complete [steve]
304  if (n2 == 8 && dna == 0 && dnb == 0) {
305    bn_mul_comba8(r, a, b);
306    return;
307  }
308
309  // Else do normal multiply
310  if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) {
311    bn_mul_normal(r, a, n2 + dna, b, n2 + dnb);
312    if ((dna + dnb) < 0) {
313      OPENSSL_memset(&r[2 * n2 + dna + dnb], 0,
314                     sizeof(BN_ULONG) * -(dna + dnb));
315    }
316    return;
317  }
318
319  // r=(a[0]-a[1])*(b[1]-b[0])
320  c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna);
321  c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n);
322  zero = neg = 0;
323  switch (c1 * 3 + c2) {
324    case -4:
325      bn_sub_part_words(t, &(a[n]), a, tna, tna - n);        // -
326      bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb);  // -
327      break;
328    case -3:
329      zero = 1;
330      break;
331    case -2:
332      bn_sub_part_words(t, &(a[n]), a, tna, tna - n);        // -
333      bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);  // +
334      neg = 1;
335      break;
336    case -1:
337    case 0:
338    case 1:
339      zero = 1;
340      break;
341    case 2:
342      bn_sub_part_words(t, a, &(a[n]), tna, n - tna);        // +
343      bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb);  // -
344      neg = 1;
345      break;
346    case 3:
347      zero = 1;
348      break;
349    case 4:
350      bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
351      bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
352      break;
353  }
354
355  if (n == 4 && dna == 0 && dnb == 0) {
356    // XXX: bn_mul_comba4 could take extra args to do this well
357    if (!zero) {
358      bn_mul_comba4(&(t[n2]), t, &(t[n]));
359    } else {
360      OPENSSL_memset(&(t[n2]), 0, 8 * sizeof(BN_ULONG));
361    }
362
363    bn_mul_comba4(r, a, b);
364    bn_mul_comba4(&(r[n2]), &(a[n]), &(b[n]));
365  } else if (n == 8 && dna == 0 && dnb == 0) {
366    // XXX: bn_mul_comba8 could take extra args to do this well
367    if (!zero) {
368      bn_mul_comba8(&(t[n2]), t, &(t[n]));
369    } else {
370      OPENSSL_memset(&(t[n2]), 0, 16 * sizeof(BN_ULONG));
371    }
372
373    bn_mul_comba8(r, a, b);
374    bn_mul_comba8(&(r[n2]), &(a[n]), &(b[n]));
375  } else {
376    p = &(t[n2 * 2]);
377    if (!zero) {
378      bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
379    } else {
380      OPENSSL_memset(&(t[n2]), 0, n2 * sizeof(BN_ULONG));
381    }
382    bn_mul_recursive(r, a, b, n, 0, 0, p);
383    bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), n, dna, dnb, p);
384  }
385
386  // t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
387  // r[10] holds (a[0]*b[0])
388  // r[32] holds (b[1]*b[1])
389
390  c1 = (int)(bn_add_words(t, r, &(r[n2]), n2));
391
392  if (neg) {
393    // if t[32] is negative
394    c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2));
395  } else {
396    // Might have a carry
397    c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
398  }
399
400  // t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
401  // r[10] holds (a[0]*b[0])
402  // r[32] holds (b[1]*b[1])
403  // c1 holds the carry bits
404  c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
405  if (c1) {
406    p = &(r[n + n2]);
407    lo = *p;
408    ln = lo + c1;
409    *p = ln;
410
411    // The overflow will stop before we over write
412    // words we should not overwrite
413    if (ln < (BN_ULONG)c1) {
414      do {
415        p++;
416        lo = *p;
417        ln = lo + 1;
418        *p = ln;
419      } while (ln == 0);
420    }
421  }
422}
423
424// n+tn is the word length
425// t needs to be n*4 is size, as does r
426// tnX may not be negative but less than n
427static void bn_mul_part_recursive(BN_ULONG *r, const BN_ULONG *a,
428                                  const BN_ULONG *b, int n, int tna, int tnb,
429                                  BN_ULONG *t) {
430  int i, j, n2 = n * 2;
431  int c1, c2, neg;
432  BN_ULONG ln, lo, *p;
433
434  if (n < 8) {
435    bn_mul_normal(r, a, n + tna, b, n + tnb);
436    return;
437  }
438
439  // r=(a[0]-a[1])*(b[1]-b[0])
440  c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna);
441  c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n);
442  neg = 0;
443  switch (c1 * 3 + c2) {
444    case -4:
445      bn_sub_part_words(t, &(a[n]), a, tna, tna - n);        // -
446      bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb);  // -
447      break;
448    case -3:
449      // break;
450    case -2:
451      bn_sub_part_words(t, &(a[n]), a, tna, tna - n);        // -
452      bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);  // +
453      neg = 1;
454      break;
455    case -1:
456    case 0:
457    case 1:
458      // break;
459    case 2:
460      bn_sub_part_words(t, a, &(a[n]), tna, n - tna);        // +
461      bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb);  // -
462      neg = 1;
463      break;
464    case 3:
465      // break;
466    case 4:
467      bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
468      bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
469      break;
470  }
471
472  if (n == 8) {
473    bn_mul_comba8(&(t[n2]), t, &(t[n]));
474    bn_mul_comba8(r, a, b);
475    bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb);
476    OPENSSL_memset(&(r[n2 + tna + tnb]), 0, sizeof(BN_ULONG) * (n2 - tna - tnb));
477  } else {
478    p = &(t[n2 * 2]);
479    bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
480    bn_mul_recursive(r, a, b, n, 0, 0, p);
481    i = n / 2;
482    // If there is only a bottom half to the number,
483    // just do it
484    if (tna > tnb) {
485      j = tna - i;
486    } else {
487      j = tnb - i;
488    }
489
490    if (j == 0) {
491      bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), i, tna - i, tnb - i, p);
492      OPENSSL_memset(&(r[n2 + i * 2]), 0, sizeof(BN_ULONG) * (n2 - i * 2));
493    } else if (j > 0) {
494      // eg, n == 16, i == 8 and tn == 11
495      bn_mul_part_recursive(&(r[n2]), &(a[n]), &(b[n]), i, tna - i, tnb - i, p);
496      OPENSSL_memset(&(r[n2 + tna + tnb]), 0,
497                     sizeof(BN_ULONG) * (n2 - tna - tnb));
498    } else {
499      // (j < 0) eg, n == 16, i == 8 and tn == 5
500      OPENSSL_memset(&(r[n2]), 0, sizeof(BN_ULONG) * n2);
501      if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL &&
502          tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) {
503        bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb);
504      } else {
505        for (;;) {
506          i /= 2;
507          // these simplified conditions work
508          // exclusively because difference
509          // between tna and tnb is 1 or 0
510          if (i < tna || i < tnb) {
511            bn_mul_part_recursive(&(r[n2]), &(a[n]), &(b[n]), i, tna - i,
512                                  tnb - i, p);
513            break;
514          } else if (i == tna || i == tnb) {
515            bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), i, tna - i, tnb - i,
516                             p);
517            break;
518          }
519        }
520      }
521    }
522  }
523
524  // t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
525  // r[10] holds (a[0]*b[0])
526  // r[32] holds (b[1]*b[1])
527
528  c1 = (int)(bn_add_words(t, r, &(r[n2]), n2));
529
530  if (neg) {
531    // if t[32] is negative
532    c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2));
533  } else {
534    // Might have a carry
535    c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
536  }
537
538  // t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
539  // r[10] holds (a[0]*b[0])
540  // r[32] holds (b[1]*b[1])
541  // c1 holds the carry bits
542  c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
543  if (c1) {
544    p = &(r[n + n2]);
545    lo = *p;
546    ln = lo + c1;
547    *p = ln;
548
549    // The overflow will stop before we over write
550    // words we should not overwrite
551    if (ln < (BN_ULONG)c1) {
552      do {
553        p++;
554        lo = *p;
555        ln = lo + 1;
556        *p = ln;
557      } while (ln == 0);
558    }
559  }
560}
561
562int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) {
563  int ret = 0;
564  int top, al, bl;
565  BIGNUM *rr;
566  int i;
567  BIGNUM *t = NULL;
568  int j = 0, k;
569
570  al = a->top;
571  bl = b->top;
572
573  if ((al == 0) || (bl == 0)) {
574    BN_zero(r);
575    return 1;
576  }
577  top = al + bl;
578
579  BN_CTX_start(ctx);
580  if ((r == a) || (r == b)) {
581    if ((rr = BN_CTX_get(ctx)) == NULL) {
582      goto err;
583    }
584  } else {
585    rr = r;
586  }
587  rr->neg = a->neg ^ b->neg;
588
589  i = al - bl;
590  if (i == 0) {
591    if (al == 8) {
592      if (!bn_wexpand(rr, 16)) {
593        goto err;
594      }
595      rr->top = 16;
596      bn_mul_comba8(rr->d, a->d, b->d);
597      goto end;
598    }
599  }
600
601  static const int kMulNormalSize = 16;
602  if (al >= kMulNormalSize && bl >= kMulNormalSize) {
603    if (i >= -1 && i <= 1) {
604      /* Find out the power of two lower or equal
605         to the longest of the two numbers */
606      if (i >= 0) {
607        j = BN_num_bits_word((BN_ULONG)al);
608      }
609      if (i == -1) {
610        j = BN_num_bits_word((BN_ULONG)bl);
611      }
612      j = 1 << (j - 1);
613      assert(j <= al || j <= bl);
614      k = j + j;
615      t = BN_CTX_get(ctx);
616      if (t == NULL) {
617        goto err;
618      }
619      if (al > j || bl > j) {
620        if (!bn_wexpand(t, k * 4)) {
621          goto err;
622        }
623        if (!bn_wexpand(rr, k * 4)) {
624          goto err;
625        }
626        bn_mul_part_recursive(rr->d, a->d, b->d, j, al - j, bl - j, t->d);
627      } else {
628        // al <= j || bl <= j
629        if (!bn_wexpand(t, k * 2)) {
630          goto err;
631        }
632        if (!bn_wexpand(rr, k * 2)) {
633          goto err;
634        }
635        bn_mul_recursive(rr->d, a->d, b->d, j, al - j, bl - j, t->d);
636      }
637      rr->top = top;
638      goto end;
639    }
640  }
641
642  if (!bn_wexpand(rr, top)) {
643    goto err;
644  }
645  rr->top = top;
646  bn_mul_normal(rr->d, a->d, al, b->d, bl);
647
648end:
649  bn_correct_top(rr);
650  if (r != rr && !BN_copy(r, rr)) {
651    goto err;
652  }
653  ret = 1;
654
655err:
656  BN_CTX_end(ctx);
657  return ret;
658}
659
660int bn_mul_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a, size_t num_a,
661                 const BN_ULONG *b, size_t num_b) {
662  if (num_r != num_a + num_b) {
663    OPENSSL_PUT_ERROR(BN, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
664    return 0;
665  }
666  // TODO(davidben): Should this call |bn_mul_comba4| too? |BN_mul| does not
667  // hit that code.
668  if (num_a == 8 && num_b == 8) {
669    bn_mul_comba8(r, a, b);
670  } else {
671    bn_mul_normal(r, a, num_a, b, num_b);
672  }
673  return 1;
674}
675
676// tmp must have 2*n words
677static void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, size_t n,
678                          BN_ULONG *tmp) {
679  if (n == 0) {
680    return;
681  }
682
683  size_t max = n * 2;
684  const BN_ULONG *ap = a;
685  BN_ULONG *rp = r;
686  rp[0] = rp[max - 1] = 0;
687  rp++;
688
689  // Compute the contribution of a[i] * a[j] for all i < j.
690  if (n > 1) {
691    ap++;
692    rp[n - 1] = bn_mul_words(rp, ap, n - 1, ap[-1]);
693    rp += 2;
694  }
695  if (n > 2) {
696    for (size_t i = n - 2; i > 0; i--) {
697      ap++;
698      rp[i] = bn_mul_add_words(rp, ap, i, ap[-1]);
699      rp += 2;
700    }
701  }
702
703  // The final result fits in |max| words, so none of the following operations
704  // will overflow.
705
706  // Double |r|, giving the contribution of a[i] * a[j] for all i != j.
707  bn_add_words(r, r, r, max);
708
709  // Add in the contribution of a[i] * a[i] for all i.
710  bn_sqr_words(tmp, a, n);
711  bn_add_words(r, r, tmp, max);
712}
713
714// r is 2*n words in size,
715// a and b are both n words in size.    (There's not actually a 'b' here ...)
716// n must be a power of 2.
717// We multiply and return the result.
718// t must be 2*n words in size
719// We calculate
720// a[0]*b[0]
721// a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
722// a[1]*b[1]
723static void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2,
724                             BN_ULONG *t) {
725  int n = n2 / 2;
726  int zero, c1;
727  BN_ULONG ln, lo, *p;
728
729  if (n2 == 4) {
730    bn_sqr_comba4(r, a);
731    return;
732  } else if (n2 == 8) {
733    bn_sqr_comba8(r, a);
734    return;
735  }
736  if (n2 < BN_SQR_RECURSIVE_SIZE_NORMAL) {
737    bn_sqr_normal(r, a, n2, t);
738    return;
739  }
740  // r=(a[0]-a[1])*(a[1]-a[0])
741  c1 = bn_cmp_words(a, &(a[n]), n);
742  zero = 0;
743  if (c1 > 0) {
744    bn_sub_words(t, a, &(a[n]), n);
745  } else if (c1 < 0) {
746    bn_sub_words(t, &(a[n]), a, n);
747  } else {
748    zero = 1;
749  }
750
751  // The result will always be negative unless it is zero
752  p = &(t[n2 * 2]);
753
754  if (!zero) {
755    bn_sqr_recursive(&(t[n2]), t, n, p);
756  } else {
757    OPENSSL_memset(&(t[n2]), 0, n2 * sizeof(BN_ULONG));
758  }
759  bn_sqr_recursive(r, a, n, p);
760  bn_sqr_recursive(&(r[n2]), &(a[n]), n, p);
761
762  // t[32] holds (a[0]-a[1])*(a[1]-a[0]), it is negative or zero
763  // r[10] holds (a[0]*b[0])
764  // r[32] holds (b[1]*b[1])
765
766  c1 = (int)(bn_add_words(t, r, &(r[n2]), n2));
767
768  // t[32] is negative
769  c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2));
770
771  // t[32] holds (a[0]-a[1])*(a[1]-a[0])+(a[0]*a[0])+(a[1]*a[1])
772  // r[10] holds (a[0]*a[0])
773  // r[32] holds (a[1]*a[1])
774  // c1 holds the carry bits
775  c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
776  if (c1) {
777    p = &(r[n + n2]);
778    lo = *p;
779    ln = lo + c1;
780    *p = ln;
781
782    // The overflow will stop before we over write
783    // words we should not overwrite
784    if (ln < (BN_ULONG)c1) {
785      do {
786        p++;
787        lo = *p;
788        ln = lo + 1;
789        *p = ln;
790      } while (ln == 0);
791    }
792  }
793}
794
795int BN_mul_word(BIGNUM *bn, BN_ULONG w) {
796  if (!bn->top) {
797    return 1;
798  }
799
800  if (w == 0) {
801    BN_zero(bn);
802    return 1;
803  }
804
805  BN_ULONG ll = bn_mul_words(bn->d, bn->d, bn->top, w);
806  if (ll) {
807    if (!bn_wexpand(bn, bn->top + 1)) {
808      return 0;
809    }
810    bn->d[bn->top++] = ll;
811  }
812
813  return 1;
814}
815
816int BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx) {
817  int max, al;
818  int ret = 0;
819  BIGNUM *tmp, *rr;
820
821  al = a->top;
822  if (al <= 0) {
823    r->top = 0;
824    r->neg = 0;
825    return 1;
826  }
827
828  BN_CTX_start(ctx);
829  rr = (a != r) ? r : BN_CTX_get(ctx);
830  tmp = BN_CTX_get(ctx);
831  if (!rr || !tmp) {
832    goto err;
833  }
834
835  max = 2 * al;  // Non-zero (from above)
836  if (!bn_wexpand(rr, max)) {
837    goto err;
838  }
839
840  if (al == 4) {
841    bn_sqr_comba4(rr->d, a->d);
842  } else if (al == 8) {
843    bn_sqr_comba8(rr->d, a->d);
844  } else {
845    if (al < BN_SQR_RECURSIVE_SIZE_NORMAL) {
846      BN_ULONG t[BN_SQR_RECURSIVE_SIZE_NORMAL * 2];
847      bn_sqr_normal(rr->d, a->d, al, t);
848    } else {
849      int j, k;
850
851      j = BN_num_bits_word((BN_ULONG)al);
852      j = 1 << (j - 1);
853      k = j + j;
854      if (al == j) {
855        if (!bn_wexpand(tmp, k * 2)) {
856          goto err;
857        }
858        bn_sqr_recursive(rr->d, a->d, al, tmp->d);
859      } else {
860        if (!bn_wexpand(tmp, max)) {
861          goto err;
862        }
863        bn_sqr_normal(rr->d, a->d, al, tmp->d);
864      }
865    }
866  }
867
868  rr->neg = 0;
869  // If the most-significant half of the top word of 'a' is zero, then
870  // the square of 'a' will max-1 words.
871  if (a->d[al - 1] == (a->d[al - 1] & BN_MASK2l)) {
872    rr->top = max - 1;
873  } else {
874    rr->top = max;
875  }
876
877  if (rr != r && !BN_copy(r, rr)) {
878    goto err;
879  }
880  ret = 1;
881
882err:
883  BN_CTX_end(ctx);
884  return ret;
885}
886
887int bn_sqr_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a, size_t num_a) {
888  if (num_r != 2 * num_a || num_a > BN_SMALL_MAX_WORDS) {
889    OPENSSL_PUT_ERROR(BN, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
890    return 0;
891  }
892  if (num_a == 4) {
893    bn_sqr_comba4(r, a);
894  } else if (num_a == 8) {
895    bn_sqr_comba8(r, a);
896  } else {
897    BN_ULONG tmp[2 * BN_SMALL_MAX_WORDS];
898    bn_sqr_normal(r, a, num_a, tmp);
899    OPENSSL_cleanse(tmp, 2 * num_a * sizeof(BN_ULONG));
900  }
901  return 1;
902}
903