1/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 2 * All rights reserved. 3 * 4 * This package is an SSL implementation written 5 * by Eric Young (eay@cryptsoft.com). 6 * The implementation was written so as to conform with Netscapes SSL. 7 * 8 * This library is free for commercial and non-commercial use as long as 9 * the following conditions are aheared to. The following conditions 10 * apply to all code found in this distribution, be it the RC4, RSA, 11 * lhash, DES, etc., code; not just the SSL code. The SSL documentation 12 * included with this distribution is covered by the same copyright terms 13 * except that the holder is Tim Hudson (tjh@cryptsoft.com). 14 * 15 * Copyright remains Eric Young's, and as such any Copyright notices in 16 * the code are not to be removed. 17 * If this package is used in a product, Eric Young should be given attribution 18 * as the author of the parts of the library used. 19 * This can be in the form of a textual message at program startup or 20 * in documentation (online or textual) provided with the package. 21 * 22 * Redistribution and use in source and binary forms, with or without 23 * modification, are permitted provided that the following conditions 24 * are met: 25 * 1. Redistributions of source code must retain the copyright 26 * notice, this list of conditions and the following disclaimer. 27 * 2. Redistributions in binary form must reproduce the above copyright 28 * notice, this list of conditions and the following disclaimer in the 29 * documentation and/or other materials provided with the distribution. 30 * 3. All advertising materials mentioning features or use of this software 31 * must display the following acknowledgement: 32 * "This product includes cryptographic software written by 33 * Eric Young (eay@cryptsoft.com)" 34 * The word 'cryptographic' can be left out if the rouines from the library 35 * being used are not cryptographic related :-). 36 * 4. If you include any Windows specific code (or a derivative thereof) from 37 * the apps directory (application code) you must include an acknowledgement: 38 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" 39 * 40 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND 41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 43 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 50 * SUCH DAMAGE. 51 * 52 * The licence and distribution terms for any publically available version or 53 * derivative of this code cannot be changed. i.e. this code cannot simply be 54 * copied and put under another distribution licence 55 * [including the GNU Public Licence.] */ 56 57#include <openssl/bn.h> 58 59#include <assert.h> 60 61#include "internal.h" 62 63 64/* Generic implementations of most operations are needed for: 65 * - Configurations without inline assembly. 66 * - Architectures other than x86 or x86_64. 67 * - Windows x84_64; x86_64-gcc.c does not build on MSVC. */ 68#if defined(OPENSSL_NO_ASM) || \ 69 (!defined(OPENSSL_X86_64) && !defined(OPENSSL_X86)) || \ 70 (defined(OPENSSL_X86_64) && defined(OPENSSL_WINDOWS)) 71 72#ifdef BN_ULLONG 73#define mul_add(r, a, w, c) \ 74 { \ 75 BN_ULLONG t; \ 76 t = (BN_ULLONG)w * (a) + (r) + (c); \ 77 (r) = Lw(t); \ 78 (c) = Hw(t); \ 79 } 80 81#define mul(r, a, w, c) \ 82 { \ 83 BN_ULLONG t; \ 84 t = (BN_ULLONG)w * (a) + (c); \ 85 (r) = Lw(t); \ 86 (c) = Hw(t); \ 87 } 88 89#define sqr(r0, r1, a) \ 90 { \ 91 BN_ULLONG t; \ 92 t = (BN_ULLONG)(a) * (a); \ 93 (r0) = Lw(t); \ 94 (r1) = Hw(t); \ 95 } 96 97#elif defined(BN_UMULT_LOHI) 98#define mul_add(r, a, w, c) \ 99 { \ 100 BN_ULONG high, low, ret, tmp = (a); \ 101 ret = (r); \ 102 BN_UMULT_LOHI(low, high, w, tmp); \ 103 ret += (c); \ 104 (c) = (ret < (c)) ? 1 : 0; \ 105 (c) += high; \ 106 ret += low; \ 107 (c) += (ret < low) ? 1 : 0; \ 108 (r) = ret; \ 109 } 110 111#define mul(r, a, w, c) \ 112 { \ 113 BN_ULONG high, low, ret, ta = (a); \ 114 BN_UMULT_LOHI(low, high, w, ta); \ 115 ret = low + (c); \ 116 (c) = high; \ 117 (c) += (ret < low) ? 1 : 0; \ 118 (r) = ret; \ 119 } 120 121#define sqr(r0, r1, a) \ 122 { \ 123 BN_ULONG tmp = (a); \ 124 BN_UMULT_LOHI(r0, r1, tmp, tmp); \ 125 } 126 127#else 128 129/************************************************************* 130 * No long long type 131 */ 132 133#define LBITS(a) ((a) & BN_MASK2l) 134#define HBITS(a) (((a) >> BN_BITS4) & BN_MASK2l) 135#define L2HBITS(a) (((a) << BN_BITS4) & BN_MASK2) 136 137#define LLBITS(a) ((a) & BN_MASKl) 138#define LHBITS(a) (((a) >> BN_BITS2) & BN_MASKl) 139#define LL2HBITS(a) ((BN_ULLONG)((a) & BN_MASKl) << BN_BITS2) 140 141#define mul64(l, h, bl, bh) \ 142 { \ 143 BN_ULONG m, m1, lt, ht; \ 144 \ 145 lt = l; \ 146 ht = h; \ 147 m = (bh) * (lt); \ 148 lt = (bl) * (lt); \ 149 m1 = (bl) * (ht); \ 150 ht = (bh) * (ht); \ 151 m = (m + m1) & BN_MASK2; \ 152 if (m < m1) \ 153 ht += L2HBITS((BN_ULONG)1); \ 154 ht += HBITS(m); \ 155 m1 = L2HBITS(m); \ 156 lt = (lt + m1) & BN_MASK2; \ 157 if (lt < m1) \ 158 ht++; \ 159 (l) = lt; \ 160 (h) = ht; \ 161 } 162 163#define sqr64(lo, ho, in) \ 164 { \ 165 BN_ULONG l, h, m; \ 166 \ 167 h = (in); \ 168 l = LBITS(h); \ 169 h = HBITS(h); \ 170 m = (l) * (h); \ 171 l *= l; \ 172 h *= h; \ 173 h += (m & BN_MASK2h1) >> (BN_BITS4 - 1); \ 174 m = (m & BN_MASK2l) << (BN_BITS4 + 1); \ 175 l = (l + m) & BN_MASK2; \ 176 if (l < m) \ 177 h++; \ 178 (lo) = l; \ 179 (ho) = h; \ 180 } 181 182#define mul_add(r, a, bl, bh, c) \ 183 { \ 184 BN_ULONG l, h; \ 185 \ 186 h = (a); \ 187 l = LBITS(h); \ 188 h = HBITS(h); \ 189 mul64(l, h, (bl), (bh)); \ 190 \ 191 /* non-multiply part */ \ 192 l = (l + (c)) & BN_MASK2; \ 193 if (l < (c)) \ 194 h++; \ 195 (c) = (r); \ 196 l = (l + (c)) & BN_MASK2; \ 197 if (l < (c)) \ 198 h++; \ 199 (c) = h & BN_MASK2; \ 200 (r) = l; \ 201 } 202 203#define mul(r, a, bl, bh, c) \ 204 { \ 205 BN_ULONG l, h; \ 206 \ 207 h = (a); \ 208 l = LBITS(h); \ 209 h = HBITS(h); \ 210 mul64(l, h, (bl), (bh)); \ 211 \ 212 /* non-multiply part */ \ 213 l += (c); \ 214 if ((l & BN_MASK2) < (c)) \ 215 h++; \ 216 (c) = h & BN_MASK2; \ 217 (r) = l & BN_MASK2; \ 218 } 219#endif /* !BN_ULLONG */ 220 221#if defined(BN_ULLONG) || defined(BN_UMULT_HIGH) 222 223BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, 224 BN_ULONG w) { 225 BN_ULONG c1 = 0; 226 227 assert(num >= 0); 228 if (num <= 0) { 229 return c1; 230 } 231 232 while (num & ~3) { 233 mul_add(rp[0], ap[0], w, c1); 234 mul_add(rp[1], ap[1], w, c1); 235 mul_add(rp[2], ap[2], w, c1); 236 mul_add(rp[3], ap[3], w, c1); 237 ap += 4; 238 rp += 4; 239 num -= 4; 240 } 241 242 while (num) { 243 mul_add(rp[0], ap[0], w, c1); 244 ap++; 245 rp++; 246 num--; 247 } 248 249 return c1; 250} 251 252BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) { 253 BN_ULONG c1 = 0; 254 255 assert(num >= 0); 256 if (num <= 0) { 257 return c1; 258 } 259 260 while (num & ~3) { 261 mul(rp[0], ap[0], w, c1); 262 mul(rp[1], ap[1], w, c1); 263 mul(rp[2], ap[2], w, c1); 264 mul(rp[3], ap[3], w, c1); 265 ap += 4; 266 rp += 4; 267 num -= 4; 268 } 269 while (num) { 270 mul(rp[0], ap[0], w, c1); 271 ap++; 272 rp++; 273 num--; 274 } 275 return c1; 276} 277 278void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) { 279 assert(n >= 0); 280 if (n <= 0) { 281 return; 282 } 283 284 while (n & ~3) { 285 sqr(r[0], r[1], a[0]); 286 sqr(r[2], r[3], a[1]); 287 sqr(r[4], r[5], a[2]); 288 sqr(r[6], r[7], a[3]); 289 a += 4; 290 r += 8; 291 n -= 4; 292 } 293 while (n) { 294 sqr(r[0], r[1], a[0]); 295 a++; 296 r += 2; 297 n--; 298 } 299} 300 301#else /* !(defined(BN_ULLONG) || defined(BN_UMULT_HIGH)) */ 302 303BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, 304 BN_ULONG w) { 305 BN_ULONG c = 0; 306 BN_ULONG bl, bh; 307 308 assert(num >= 0); 309 if (num <= 0) { 310 return (BN_ULONG)0; 311 } 312 313 bl = LBITS(w); 314 bh = HBITS(w); 315 316 while (num & ~3) { 317 mul_add(rp[0], ap[0], bl, bh, c); 318 mul_add(rp[1], ap[1], bl, bh, c); 319 mul_add(rp[2], ap[2], bl, bh, c); 320 mul_add(rp[3], ap[3], bl, bh, c); 321 ap += 4; 322 rp += 4; 323 num -= 4; 324 } 325 while (num) { 326 mul_add(rp[0], ap[0], bl, bh, c); 327 ap++; 328 rp++; 329 num--; 330 } 331 return c; 332} 333 334BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) { 335 BN_ULONG carry = 0; 336 BN_ULONG bl, bh; 337 338 assert(num >= 0); 339 if (num <= 0) { 340 return (BN_ULONG)0; 341 } 342 343 bl = LBITS(w); 344 bh = HBITS(w); 345 346 while (num & ~3) { 347 mul(rp[0], ap[0], bl, bh, carry); 348 mul(rp[1], ap[1], bl, bh, carry); 349 mul(rp[2], ap[2], bl, bh, carry); 350 mul(rp[3], ap[3], bl, bh, carry); 351 ap += 4; 352 rp += 4; 353 num -= 4; 354 } 355 while (num) { 356 mul(rp[0], ap[0], bl, bh, carry); 357 ap++; 358 rp++; 359 num--; 360 } 361 return carry; 362} 363 364void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) { 365 assert(n >= 0); 366 if (n <= 0) { 367 return; 368 } 369 370 while (n & ~3) { 371 sqr64(r[0], r[1], a[0]); 372 sqr64(r[2], r[3], a[1]); 373 sqr64(r[4], r[5], a[2]); 374 sqr64(r[6], r[7], a[3]); 375 a += 4; 376 r += 8; 377 n -= 4; 378 } 379 while (n) { 380 sqr64(r[0], r[1], a[0]); 381 a++; 382 r += 2; 383 n--; 384 } 385} 386 387#endif /* !(defined(BN_ULLONG) || defined(BN_UMULT_HIGH)) */ 388 389#if defined(BN_ULLONG) 390 391BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) { 392 return (BN_ULONG)(((((BN_ULLONG)h) << BN_BITS2) | l) / (BN_ULLONG)d); 393} 394 395#else 396 397/* Divide h,l by d and return the result. */ 398BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) { 399 BN_ULONG dh, dl, q, ret = 0, th, tl, t; 400 int i, count = 2; 401 402 if (d == 0) { 403 return BN_MASK2; 404 } 405 406 i = BN_num_bits_word(d); 407 assert((i == BN_BITS2) || (h <= (BN_ULONG)1 << i)); 408 409 i = BN_BITS2 - i; 410 if (h >= d) { 411 h -= d; 412 } 413 414 if (i) { 415 d <<= i; 416 h = (h << i) | (l >> (BN_BITS2 - i)); 417 l <<= i; 418 } 419 dh = (d & BN_MASK2h) >> BN_BITS4; 420 dl = (d & BN_MASK2l); 421 for (;;) { 422 if ((h >> BN_BITS4) == dh) { 423 q = BN_MASK2l; 424 } else { 425 q = h / dh; 426 } 427 428 th = q * dh; 429 tl = dl * q; 430 for (;;) { 431 t = h - th; 432 if ((t & BN_MASK2h) || 433 ((tl) <= ((t << BN_BITS4) | ((l & BN_MASK2h) >> BN_BITS4)))) { 434 break; 435 } 436 q--; 437 th -= dh; 438 tl -= dl; 439 } 440 t = (tl >> BN_BITS4); 441 tl = (tl << BN_BITS4) & BN_MASK2h; 442 th += t; 443 444 if (l < tl) { 445 th++; 446 } 447 l -= tl; 448 if (h < th) { 449 h += d; 450 q--; 451 } 452 h -= th; 453 454 if (--count == 0) { 455 break; 456 } 457 458 ret = q << BN_BITS4; 459 h = ((h << BN_BITS4) | (l >> BN_BITS4)) & BN_MASK2; 460 l = (l & BN_MASK2l) << BN_BITS4; 461 } 462 463 ret |= q; 464 return ret; 465} 466 467#endif /* !defined(BN_ULLONG) */ 468 469#ifdef BN_ULLONG 470BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, 471 int n) { 472 BN_ULLONG ll = 0; 473 474 assert(n >= 0); 475 if (n <= 0) { 476 return (BN_ULONG)0; 477 } 478 479 while (n & ~3) { 480 ll += (BN_ULLONG)a[0] + b[0]; 481 r[0] = (BN_ULONG)ll & BN_MASK2; 482 ll >>= BN_BITS2; 483 ll += (BN_ULLONG)a[1] + b[1]; 484 r[1] = (BN_ULONG)ll & BN_MASK2; 485 ll >>= BN_BITS2; 486 ll += (BN_ULLONG)a[2] + b[2]; 487 r[2] = (BN_ULONG)ll & BN_MASK2; 488 ll >>= BN_BITS2; 489 ll += (BN_ULLONG)a[3] + b[3]; 490 r[3] = (BN_ULONG)ll & BN_MASK2; 491 ll >>= BN_BITS2; 492 a += 4; 493 b += 4; 494 r += 4; 495 n -= 4; 496 } 497 while (n) { 498 ll += (BN_ULLONG)a[0] + b[0]; 499 r[0] = (BN_ULONG)ll & BN_MASK2; 500 ll >>= BN_BITS2; 501 a++; 502 b++; 503 r++; 504 n--; 505 } 506 return (BN_ULONG)ll; 507} 508 509#else /* !BN_ULLONG */ 510 511BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, 512 int n) { 513 BN_ULONG c, l, t; 514 515 assert(n >= 0); 516 if (n <= 0) { 517 return (BN_ULONG)0; 518 } 519 520 c = 0; 521 while (n & ~3) { 522 t = a[0]; 523 t = (t + c) & BN_MASK2; 524 c = (t < c); 525 l = (t + b[0]) & BN_MASK2; 526 c += (l < t); 527 r[0] = l; 528 t = a[1]; 529 t = (t + c) & BN_MASK2; 530 c = (t < c); 531 l = (t + b[1]) & BN_MASK2; 532 c += (l < t); 533 r[1] = l; 534 t = a[2]; 535 t = (t + c) & BN_MASK2; 536 c = (t < c); 537 l = (t + b[2]) & BN_MASK2; 538 c += (l < t); 539 r[2] = l; 540 t = a[3]; 541 t = (t + c) & BN_MASK2; 542 c = (t < c); 543 l = (t + b[3]) & BN_MASK2; 544 c += (l < t); 545 r[3] = l; 546 a += 4; 547 b += 4; 548 r += 4; 549 n -= 4; 550 } 551 while (n) { 552 t = a[0]; 553 t = (t + c) & BN_MASK2; 554 c = (t < c); 555 l = (t + b[0]) & BN_MASK2; 556 c += (l < t); 557 r[0] = l; 558 a++; 559 b++; 560 r++; 561 n--; 562 } 563 return (BN_ULONG)c; 564} 565 566#endif /* !BN_ULLONG */ 567 568BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, 569 int n) { 570 BN_ULONG t1, t2; 571 int c = 0; 572 573 assert(n >= 0); 574 if (n <= 0) { 575 return (BN_ULONG)0; 576 } 577 578 while (n & ~3) { 579 t1 = a[0]; 580 t2 = b[0]; 581 r[0] = (t1 - t2 - c) & BN_MASK2; 582 if (t1 != t2) { 583 c = (t1 < t2); 584 } 585 t1 = a[1]; 586 t2 = b[1]; 587 r[1] = (t1 - t2 - c) & BN_MASK2; 588 if (t1 != t2) { 589 c = (t1 < t2); 590 } 591 t1 = a[2]; 592 t2 = b[2]; 593 r[2] = (t1 - t2 - c) & BN_MASK2; 594 if (t1 != t2) { 595 c = (t1 < t2); 596 } 597 t1 = a[3]; 598 t2 = b[3]; 599 r[3] = (t1 - t2 - c) & BN_MASK2; 600 if (t1 != t2) { 601 c = (t1 < t2); 602 } 603 a += 4; 604 b += 4; 605 r += 4; 606 n -= 4; 607 } 608 while (n) { 609 t1 = a[0]; 610 t2 = b[0]; 611 r[0] = (t1 - t2 - c) & BN_MASK2; 612 if (t1 != t2) { 613 c = (t1 < t2); 614 } 615 a++; 616 b++; 617 r++; 618 n--; 619 } 620 return c; 621} 622 623/* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */ 624/* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */ 625/* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ 626/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ 627 628#ifdef BN_ULLONG 629 630/* Keep in mind that additions to multiplication result can not overflow, 631 * because its high half cannot be all-ones. */ 632#define mul_add_c(a, b, c0, c1, c2) \ 633 do { \ 634 BN_ULONG hi; \ 635 BN_ULLONG t = (BN_ULLONG)(a) * (b); \ 636 t += c0; /* no carry */ \ 637 c0 = (BN_ULONG)Lw(t); \ 638 hi = (BN_ULONG)Hw(t); \ 639 c1 = (c1 + hi) & BN_MASK2; \ 640 if (c1 < hi) \ 641 c2++; \ 642 } while (0) 643 644#define mul_add_c2(a, b, c0, c1, c2) \ 645 do { \ 646 BN_ULONG hi; \ 647 BN_ULLONG t = (BN_ULLONG)(a) * (b); \ 648 BN_ULLONG tt = t + c0; /* no carry */ \ 649 c0 = (BN_ULONG)Lw(tt); \ 650 hi = (BN_ULONG)Hw(tt); \ 651 c1 = (c1 + hi) & BN_MASK2; \ 652 if (c1 < hi) \ 653 c2++; \ 654 t += c0; /* no carry */ \ 655 c0 = (BN_ULONG)Lw(t); \ 656 hi = (BN_ULONG)Hw(t); \ 657 c1 = (c1 + hi) & BN_MASK2; \ 658 if (c1 < hi) \ 659 c2++; \ 660 } while (0) 661 662#define sqr_add_c(a, i, c0, c1, c2) \ 663 do { \ 664 BN_ULONG hi; \ 665 BN_ULLONG t = (BN_ULLONG)a[i] * a[i]; \ 666 t += c0; /* no carry */ \ 667 c0 = (BN_ULONG)Lw(t); \ 668 hi = (BN_ULONG)Hw(t); \ 669 c1 = (c1 + hi) & BN_MASK2; \ 670 if (c1 < hi) \ 671 c2++; \ 672 } while (0) 673 674#define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2) 675 676#elif defined(BN_UMULT_LOHI) 677 678/* Keep in mind that additions to hi can not overflow, because the high word of 679 * a multiplication result cannot be all-ones. */ 680#define mul_add_c(a, b, c0, c1, c2) \ 681 do { \ 682 BN_ULONG ta = (a), tb = (b); \ 683 BN_ULONG lo, hi; \ 684 BN_UMULT_LOHI(lo, hi, ta, tb); \ 685 c0 += lo; \ 686 hi += (c0 < lo) ? 1 : 0; \ 687 c1 += hi; \ 688 c2 += (c1 < hi) ? 1 : 0; \ 689 } while (0) 690 691#define mul_add_c2(a, b, c0, c1, c2) \ 692 do { \ 693 BN_ULONG ta = (a), tb = (b); \ 694 BN_ULONG lo, hi, tt; \ 695 BN_UMULT_LOHI(lo, hi, ta, tb); \ 696 c0 += lo; \ 697 tt = hi + ((c0 < lo) ? 1 : 0); \ 698 c1 += tt; \ 699 c2 += (c1 < tt) ? 1 : 0; \ 700 c0 += lo; \ 701 hi += (c0 < lo) ? 1 : 0; \ 702 c1 += hi; \ 703 c2 += (c1 < hi) ? 1 : 0; \ 704 } while (0) 705 706#define sqr_add_c(a, i, c0, c1, c2) \ 707 do { \ 708 BN_ULONG ta = (a)[i]; \ 709 BN_ULONG lo, hi; \ 710 BN_UMULT_LOHI(lo, hi, ta, ta); \ 711 c0 += lo; \ 712 hi += (c0 < lo) ? 1 : 0; \ 713 c1 += hi; \ 714 c2 += (c1 < hi) ? 1 : 0; \ 715 } while (0) 716 717#define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2) 718 719#else /* !BN_ULLONG */ 720 721/* Keep in mind that additions to hi can not overflow, because 722 * the high word of a multiplication result cannot be all-ones. */ 723 724#define mul_add_c(a, b, c0, c1, c2) \ 725 do { \ 726 BN_ULONG lo = LBITS(a), hi = HBITS(a); \ 727 BN_ULONG bl = LBITS(b), bh = HBITS(b); \ 728 mul64(lo, hi, bl, bh); \ 729 c0 = (c0 + lo) & BN_MASK2; \ 730 if (c0 < lo) \ 731 hi++; \ 732 c1 = (c1 + hi) & BN_MASK2; \ 733 if (c1 < hi) \ 734 c2++; \ 735 } while (0) 736 737#define mul_add_c2(a, b, c0, c1, c2) \ 738 do { \ 739 BN_ULONG tt; \ 740 BN_ULONG lo = LBITS(a), hi = HBITS(a); \ 741 BN_ULONG bl = LBITS(b), bh = HBITS(b); \ 742 mul64(lo, hi, bl, bh); \ 743 tt = hi; \ 744 c0 = (c0 + lo) & BN_MASK2; \ 745 if (c0 < lo) \ 746 tt++; \ 747 c1 = (c1 + tt) & BN_MASK2; \ 748 if (c1 < tt) \ 749 c2++; \ 750 c0 = (c0 + lo) & BN_MASK2; \ 751 if (c0 < lo) \ 752 hi++; \ 753 c1 = (c1 + hi) & BN_MASK2; \ 754 if (c1 < hi) \ 755 c2++; \ 756 } while (0) 757 758#define sqr_add_c(a, i, c0, c1, c2) \ 759 do { \ 760 BN_ULONG lo, hi; \ 761 sqr64(lo, hi, (a)[i]); \ 762 c0 = (c0 + lo) & BN_MASK2; \ 763 if (c0 < lo) \ 764 hi++; \ 765 c1 = (c1 + hi) & BN_MASK2; \ 766 if (c1 < hi) \ 767 c2++; \ 768 } while (0) 769 770#define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2) 771#endif /* !BN_ULLONG */ 772 773void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) { 774 BN_ULONG c1, c2, c3; 775 776 c1 = 0; 777 c2 = 0; 778 c3 = 0; 779 mul_add_c(a[0], b[0], c1, c2, c3); 780 r[0] = c1; 781 c1 = 0; 782 mul_add_c(a[0], b[1], c2, c3, c1); 783 mul_add_c(a[1], b[0], c2, c3, c1); 784 r[1] = c2; 785 c2 = 0; 786 mul_add_c(a[2], b[0], c3, c1, c2); 787 mul_add_c(a[1], b[1], c3, c1, c2); 788 mul_add_c(a[0], b[2], c3, c1, c2); 789 r[2] = c3; 790 c3 = 0; 791 mul_add_c(a[0], b[3], c1, c2, c3); 792 mul_add_c(a[1], b[2], c1, c2, c3); 793 mul_add_c(a[2], b[1], c1, c2, c3); 794 mul_add_c(a[3], b[0], c1, c2, c3); 795 r[3] = c1; 796 c1 = 0; 797 mul_add_c(a[4], b[0], c2, c3, c1); 798 mul_add_c(a[3], b[1], c2, c3, c1); 799 mul_add_c(a[2], b[2], c2, c3, c1); 800 mul_add_c(a[1], b[3], c2, c3, c1); 801 mul_add_c(a[0], b[4], c2, c3, c1); 802 r[4] = c2; 803 c2 = 0; 804 mul_add_c(a[0], b[5], c3, c1, c2); 805 mul_add_c(a[1], b[4], c3, c1, c2); 806 mul_add_c(a[2], b[3], c3, c1, c2); 807 mul_add_c(a[3], b[2], c3, c1, c2); 808 mul_add_c(a[4], b[1], c3, c1, c2); 809 mul_add_c(a[5], b[0], c3, c1, c2); 810 r[5] = c3; 811 c3 = 0; 812 mul_add_c(a[6], b[0], c1, c2, c3); 813 mul_add_c(a[5], b[1], c1, c2, c3); 814 mul_add_c(a[4], b[2], c1, c2, c3); 815 mul_add_c(a[3], b[3], c1, c2, c3); 816 mul_add_c(a[2], b[4], c1, c2, c3); 817 mul_add_c(a[1], b[5], c1, c2, c3); 818 mul_add_c(a[0], b[6], c1, c2, c3); 819 r[6] = c1; 820 c1 = 0; 821 mul_add_c(a[0], b[7], c2, c3, c1); 822 mul_add_c(a[1], b[6], c2, c3, c1); 823 mul_add_c(a[2], b[5], c2, c3, c1); 824 mul_add_c(a[3], b[4], c2, c3, c1); 825 mul_add_c(a[4], b[3], c2, c3, c1); 826 mul_add_c(a[5], b[2], c2, c3, c1); 827 mul_add_c(a[6], b[1], c2, c3, c1); 828 mul_add_c(a[7], b[0], c2, c3, c1); 829 r[7] = c2; 830 c2 = 0; 831 mul_add_c(a[7], b[1], c3, c1, c2); 832 mul_add_c(a[6], b[2], c3, c1, c2); 833 mul_add_c(a[5], b[3], c3, c1, c2); 834 mul_add_c(a[4], b[4], c3, c1, c2); 835 mul_add_c(a[3], b[5], c3, c1, c2); 836 mul_add_c(a[2], b[6], c3, c1, c2); 837 mul_add_c(a[1], b[7], c3, c1, c2); 838 r[8] = c3; 839 c3 = 0; 840 mul_add_c(a[2], b[7], c1, c2, c3); 841 mul_add_c(a[3], b[6], c1, c2, c3); 842 mul_add_c(a[4], b[5], c1, c2, c3); 843 mul_add_c(a[5], b[4], c1, c2, c3); 844 mul_add_c(a[6], b[3], c1, c2, c3); 845 mul_add_c(a[7], b[2], c1, c2, c3); 846 r[9] = c1; 847 c1 = 0; 848 mul_add_c(a[7], b[3], c2, c3, c1); 849 mul_add_c(a[6], b[4], c2, c3, c1); 850 mul_add_c(a[5], b[5], c2, c3, c1); 851 mul_add_c(a[4], b[6], c2, c3, c1); 852 mul_add_c(a[3], b[7], c2, c3, c1); 853 r[10] = c2; 854 c2 = 0; 855 mul_add_c(a[4], b[7], c3, c1, c2); 856 mul_add_c(a[5], b[6], c3, c1, c2); 857 mul_add_c(a[6], b[5], c3, c1, c2); 858 mul_add_c(a[7], b[4], c3, c1, c2); 859 r[11] = c3; 860 c3 = 0; 861 mul_add_c(a[7], b[5], c1, c2, c3); 862 mul_add_c(a[6], b[6], c1, c2, c3); 863 mul_add_c(a[5], b[7], c1, c2, c3); 864 r[12] = c1; 865 c1 = 0; 866 mul_add_c(a[6], b[7], c2, c3, c1); 867 mul_add_c(a[7], b[6], c2, c3, c1); 868 r[13] = c2; 869 c2 = 0; 870 mul_add_c(a[7], b[7], c3, c1, c2); 871 r[14] = c3; 872 r[15] = c1; 873} 874 875void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) { 876 BN_ULONG c1, c2, c3; 877 878 c1 = 0; 879 c2 = 0; 880 c3 = 0; 881 mul_add_c(a[0], b[0], c1, c2, c3); 882 r[0] = c1; 883 c1 = 0; 884 mul_add_c(a[0], b[1], c2, c3, c1); 885 mul_add_c(a[1], b[0], c2, c3, c1); 886 r[1] = c2; 887 c2 = 0; 888 mul_add_c(a[2], b[0], c3, c1, c2); 889 mul_add_c(a[1], b[1], c3, c1, c2); 890 mul_add_c(a[0], b[2], c3, c1, c2); 891 r[2] = c3; 892 c3 = 0; 893 mul_add_c(a[0], b[3], c1, c2, c3); 894 mul_add_c(a[1], b[2], c1, c2, c3); 895 mul_add_c(a[2], b[1], c1, c2, c3); 896 mul_add_c(a[3], b[0], c1, c2, c3); 897 r[3] = c1; 898 c1 = 0; 899 mul_add_c(a[3], b[1], c2, c3, c1); 900 mul_add_c(a[2], b[2], c2, c3, c1); 901 mul_add_c(a[1], b[3], c2, c3, c1); 902 r[4] = c2; 903 c2 = 0; 904 mul_add_c(a[2], b[3], c3, c1, c2); 905 mul_add_c(a[3], b[2], c3, c1, c2); 906 r[5] = c3; 907 c3 = 0; 908 mul_add_c(a[3], b[3], c1, c2, c3); 909 r[6] = c1; 910 r[7] = c2; 911} 912 913void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) { 914 BN_ULONG c1, c2, c3; 915 916 c1 = 0; 917 c2 = 0; 918 c3 = 0; 919 sqr_add_c(a, 0, c1, c2, c3); 920 r[0] = c1; 921 c1 = 0; 922 sqr_add_c2(a, 1, 0, c2, c3, c1); 923 r[1] = c2; 924 c2 = 0; 925 sqr_add_c(a, 1, c3, c1, c2); 926 sqr_add_c2(a, 2, 0, c3, c1, c2); 927 r[2] = c3; 928 c3 = 0; 929 sqr_add_c2(a, 3, 0, c1, c2, c3); 930 sqr_add_c2(a, 2, 1, c1, c2, c3); 931 r[3] = c1; 932 c1 = 0; 933 sqr_add_c(a, 2, c2, c3, c1); 934 sqr_add_c2(a, 3, 1, c2, c3, c1); 935 sqr_add_c2(a, 4, 0, c2, c3, c1); 936 r[4] = c2; 937 c2 = 0; 938 sqr_add_c2(a, 5, 0, c3, c1, c2); 939 sqr_add_c2(a, 4, 1, c3, c1, c2); 940 sqr_add_c2(a, 3, 2, c3, c1, c2); 941 r[5] = c3; 942 c3 = 0; 943 sqr_add_c(a, 3, c1, c2, c3); 944 sqr_add_c2(a, 4, 2, c1, c2, c3); 945 sqr_add_c2(a, 5, 1, c1, c2, c3); 946 sqr_add_c2(a, 6, 0, c1, c2, c3); 947 r[6] = c1; 948 c1 = 0; 949 sqr_add_c2(a, 7, 0, c2, c3, c1); 950 sqr_add_c2(a, 6, 1, c2, c3, c1); 951 sqr_add_c2(a, 5, 2, c2, c3, c1); 952 sqr_add_c2(a, 4, 3, c2, c3, c1); 953 r[7] = c2; 954 c2 = 0; 955 sqr_add_c(a, 4, c3, c1, c2); 956 sqr_add_c2(a, 5, 3, c3, c1, c2); 957 sqr_add_c2(a, 6, 2, c3, c1, c2); 958 sqr_add_c2(a, 7, 1, c3, c1, c2); 959 r[8] = c3; 960 c3 = 0; 961 sqr_add_c2(a, 7, 2, c1, c2, c3); 962 sqr_add_c2(a, 6, 3, c1, c2, c3); 963 sqr_add_c2(a, 5, 4, c1, c2, c3); 964 r[9] = c1; 965 c1 = 0; 966 sqr_add_c(a, 5, c2, c3, c1); 967 sqr_add_c2(a, 6, 4, c2, c3, c1); 968 sqr_add_c2(a, 7, 3, c2, c3, c1); 969 r[10] = c2; 970 c2 = 0; 971 sqr_add_c2(a, 7, 4, c3, c1, c2); 972 sqr_add_c2(a, 6, 5, c3, c1, c2); 973 r[11] = c3; 974 c3 = 0; 975 sqr_add_c(a, 6, c1, c2, c3); 976 sqr_add_c2(a, 7, 5, c1, c2, c3); 977 r[12] = c1; 978 c1 = 0; 979 sqr_add_c2(a, 7, 6, c2, c3, c1); 980 r[13] = c2; 981 c2 = 0; 982 sqr_add_c(a, 7, c3, c1, c2); 983 r[14] = c3; 984 r[15] = c1; 985} 986 987void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) { 988 BN_ULONG c1, c2, c3; 989 990 c1 = 0; 991 c2 = 0; 992 c3 = 0; 993 sqr_add_c(a, 0, c1, c2, c3); 994 r[0] = c1; 995 c1 = 0; 996 sqr_add_c2(a, 1, 0, c2, c3, c1); 997 r[1] = c2; 998 c2 = 0; 999 sqr_add_c(a, 1, c3, c1, c2); 1000 sqr_add_c2(a, 2, 0, c3, c1, c2); 1001 r[2] = c3; 1002 c3 = 0; 1003 sqr_add_c2(a, 3, 0, c1, c2, c3); 1004 sqr_add_c2(a, 2, 1, c1, c2, c3); 1005 r[3] = c1; 1006 c1 = 0; 1007 sqr_add_c(a, 2, c2, c3, c1); 1008 sqr_add_c2(a, 3, 1, c2, c3, c1); 1009 r[4] = c2; 1010 c2 = 0; 1011 sqr_add_c2(a, 3, 2, c3, c1, c2); 1012 r[5] = c3; 1013 c3 = 0; 1014 sqr_add_c(a, 3, c1, c2, c3); 1015 r[6] = c1; 1016 r[7] = c2; 1017} 1018 1019#endif 1020