1default rel 2%define XMMWORD 3%define YMMWORD 4%define ZMMWORD 5section .text code align=64 6 7EXTERN OPENSSL_ia32cap_P 8 9 10ALIGN 64 11$L$poly: 12 DQ 0xffffffffffffffff,0x00000000ffffffff,0x0000000000000000,0xffffffff00000001 13 14$L$One: 15 DD 1,1,1,1,1,1,1,1 16$L$Two: 17 DD 2,2,2,2,2,2,2,2 18$L$Three: 19 DD 3,3,3,3,3,3,3,3 20$L$ONE_mont: 21 DQ 0x0000000000000001,0xffffffff00000000,0xffffffffffffffff,0x00000000fffffffe 22 23 24ALIGN 64 25ecp_nistz256_mul_by_2: 26 mov QWORD[8+rsp],rdi ;WIN64 prologue 27 mov QWORD[16+rsp],rsi 28 mov rax,rsp 29$L$SEH_begin_ecp_nistz256_mul_by_2: 30 mov rdi,rcx 31 mov rsi,rdx 32 33 34 push r12 35 push r13 36 37 mov r8,QWORD[rsi] 38 mov r9,QWORD[8+rsi] 39 add r8,r8 40 mov r10,QWORD[16+rsi] 41 adc r9,r9 42 mov r11,QWORD[24+rsi] 43 lea rsi,[$L$poly] 44 mov rax,r8 45 adc r10,r10 46 adc r11,r11 47 mov rdx,r9 48 sbb r13,r13 49 50 sub r8,QWORD[rsi] 51 mov rcx,r10 52 sbb r9,QWORD[8+rsi] 53 sbb r10,QWORD[16+rsi] 54 mov r12,r11 55 sbb r11,QWORD[24+rsi] 56 test r13,r13 57 58 cmovz r8,rax 59 cmovz r9,rdx 60 mov QWORD[rdi],r8 61 cmovz r10,rcx 62 mov QWORD[8+rdi],r9 63 cmovz r11,r12 64 mov QWORD[16+rdi],r10 65 mov QWORD[24+rdi],r11 66 67 pop r13 68 pop r12 69 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 70 mov rsi,QWORD[16+rsp] 71 DB 0F3h,0C3h ;repret 72$L$SEH_end_ecp_nistz256_mul_by_2: 73 74 75 76global ecp_nistz256_neg 77 78ALIGN 32 79ecp_nistz256_neg: 80 mov QWORD[8+rsp],rdi ;WIN64 prologue 81 mov QWORD[16+rsp],rsi 82 mov rax,rsp 83$L$SEH_begin_ecp_nistz256_neg: 84 mov rdi,rcx 85 mov rsi,rdx 86 87 88 push r12 89 push r13 90 91 xor r8,r8 92 xor r9,r9 93 xor r10,r10 94 xor r11,r11 95 xor r13,r13 96 97 sub r8,QWORD[rsi] 98 sbb r9,QWORD[8+rsi] 99 sbb r10,QWORD[16+rsi] 100 mov rax,r8 101 sbb r11,QWORD[24+rsi] 102 lea rsi,[$L$poly] 103 mov rdx,r9 104 sbb r13,0 105 106 add r8,QWORD[rsi] 107 mov rcx,r10 108 adc r9,QWORD[8+rsi] 109 adc r10,QWORD[16+rsi] 110 mov r12,r11 111 adc r11,QWORD[24+rsi] 112 test r13,r13 113 114 cmovz r8,rax 115 cmovz r9,rdx 116 mov QWORD[rdi],r8 117 cmovz r10,rcx 118 mov QWORD[8+rdi],r9 119 cmovz r11,r12 120 mov QWORD[16+rdi],r10 121 mov QWORD[24+rdi],r11 122 123 pop r13 124 pop r12 125 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 126 mov rsi,QWORD[16+rsp] 127 DB 0F3h,0C3h ;repret 128$L$SEH_end_ecp_nistz256_neg: 129 130 131 132 133 134 135global ecp_nistz256_mul_mont 136 137ALIGN 32 138ecp_nistz256_mul_mont: 139 mov QWORD[8+rsp],rdi ;WIN64 prologue 140 mov QWORD[16+rsp],rsi 141 mov rax,rsp 142$L$SEH_begin_ecp_nistz256_mul_mont: 143 mov rdi,rcx 144 mov rsi,rdx 145 mov rdx,r8 146 147 148$L$mul_mont: 149 push rbp 150 push rbx 151 push r12 152 push r13 153 push r14 154 push r15 155 mov rbx,rdx 156 mov rax,QWORD[rdx] 157 mov r9,QWORD[rsi] 158 mov r10,QWORD[8+rsi] 159 mov r11,QWORD[16+rsi] 160 mov r12,QWORD[24+rsi] 161 162 call __ecp_nistz256_mul_montq 163$L$mul_mont_done: 164 pop r15 165 pop r14 166 pop r13 167 pop r12 168 pop rbx 169 pop rbp 170 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 171 mov rsi,QWORD[16+rsp] 172 DB 0F3h,0C3h ;repret 173$L$SEH_end_ecp_nistz256_mul_mont: 174 175 176ALIGN 32 177__ecp_nistz256_mul_montq: 178 179 180 mov rbp,rax 181 mul r9 182 mov r14,QWORD[(($L$poly+8))] 183 mov r8,rax 184 mov rax,rbp 185 mov r9,rdx 186 187 mul r10 188 mov r15,QWORD[(($L$poly+24))] 189 add r9,rax 190 mov rax,rbp 191 adc rdx,0 192 mov r10,rdx 193 194 mul r11 195 add r10,rax 196 mov rax,rbp 197 adc rdx,0 198 mov r11,rdx 199 200 mul r12 201 add r11,rax 202 mov rax,r8 203 adc rdx,0 204 xor r13,r13 205 mov r12,rdx 206 207 208 209 210 211 212 213 214 215 216 mov rbp,r8 217 shl r8,32 218 mul r15 219 shr rbp,32 220 add r9,r8 221 adc r10,rbp 222 adc r11,rax 223 mov rax,QWORD[8+rbx] 224 adc r12,rdx 225 adc r13,0 226 xor r8,r8 227 228 229 230 mov rbp,rax 231 mul QWORD[rsi] 232 add r9,rax 233 mov rax,rbp 234 adc rdx,0 235 mov rcx,rdx 236 237 mul QWORD[8+rsi] 238 add r10,rcx 239 adc rdx,0 240 add r10,rax 241 mov rax,rbp 242 adc rdx,0 243 mov rcx,rdx 244 245 mul QWORD[16+rsi] 246 add r11,rcx 247 adc rdx,0 248 add r11,rax 249 mov rax,rbp 250 adc rdx,0 251 mov rcx,rdx 252 253 mul QWORD[24+rsi] 254 add r12,rcx 255 adc rdx,0 256 add r12,rax 257 mov rax,r9 258 adc r13,rdx 259 adc r8,0 260 261 262 263 mov rbp,r9 264 shl r9,32 265 mul r15 266 shr rbp,32 267 add r10,r9 268 adc r11,rbp 269 adc r12,rax 270 mov rax,QWORD[16+rbx] 271 adc r13,rdx 272 adc r8,0 273 xor r9,r9 274 275 276 277 mov rbp,rax 278 mul QWORD[rsi] 279 add r10,rax 280 mov rax,rbp 281 adc rdx,0 282 mov rcx,rdx 283 284 mul QWORD[8+rsi] 285 add r11,rcx 286 adc rdx,0 287 add r11,rax 288 mov rax,rbp 289 adc rdx,0 290 mov rcx,rdx 291 292 mul QWORD[16+rsi] 293 add r12,rcx 294 adc rdx,0 295 add r12,rax 296 mov rax,rbp 297 adc rdx,0 298 mov rcx,rdx 299 300 mul QWORD[24+rsi] 301 add r13,rcx 302 adc rdx,0 303 add r13,rax 304 mov rax,r10 305 adc r8,rdx 306 adc r9,0 307 308 309 310 mov rbp,r10 311 shl r10,32 312 mul r15 313 shr rbp,32 314 add r11,r10 315 adc r12,rbp 316 adc r13,rax 317 mov rax,QWORD[24+rbx] 318 adc r8,rdx 319 adc r9,0 320 xor r10,r10 321 322 323 324 mov rbp,rax 325 mul QWORD[rsi] 326 add r11,rax 327 mov rax,rbp 328 adc rdx,0 329 mov rcx,rdx 330 331 mul QWORD[8+rsi] 332 add r12,rcx 333 adc rdx,0 334 add r12,rax 335 mov rax,rbp 336 adc rdx,0 337 mov rcx,rdx 338 339 mul QWORD[16+rsi] 340 add r13,rcx 341 adc rdx,0 342 add r13,rax 343 mov rax,rbp 344 adc rdx,0 345 mov rcx,rdx 346 347 mul QWORD[24+rsi] 348 add r8,rcx 349 adc rdx,0 350 add r8,rax 351 mov rax,r11 352 adc r9,rdx 353 adc r10,0 354 355 356 357 mov rbp,r11 358 shl r11,32 359 mul r15 360 shr rbp,32 361 add r12,r11 362 adc r13,rbp 363 mov rcx,r12 364 adc r8,rax 365 adc r9,rdx 366 mov rbp,r13 367 adc r10,0 368 369 370 371 sub r12,-1 372 mov rbx,r8 373 sbb r13,r14 374 sbb r8,0 375 mov rdx,r9 376 sbb r9,r15 377 sbb r10,0 378 379 cmovc r12,rcx 380 cmovc r13,rbp 381 mov QWORD[rdi],r12 382 cmovc r8,rbx 383 mov QWORD[8+rdi],r13 384 cmovc r9,rdx 385 mov QWORD[16+rdi],r8 386 mov QWORD[24+rdi],r9 387 388 DB 0F3h,0C3h ;repret 389 390 391 392 393 394 395 396 397 398global ecp_nistz256_sqr_mont 399 400ALIGN 32 401ecp_nistz256_sqr_mont: 402 mov QWORD[8+rsp],rdi ;WIN64 prologue 403 mov QWORD[16+rsp],rsi 404 mov rax,rsp 405$L$SEH_begin_ecp_nistz256_sqr_mont: 406 mov rdi,rcx 407 mov rsi,rdx 408 409 410 push rbp 411 push rbx 412 push r12 413 push r13 414 push r14 415 push r15 416 mov rax,QWORD[rsi] 417 mov r14,QWORD[8+rsi] 418 mov r15,QWORD[16+rsi] 419 mov r8,QWORD[24+rsi] 420 421 call __ecp_nistz256_sqr_montq 422$L$sqr_mont_done: 423 pop r15 424 pop r14 425 pop r13 426 pop r12 427 pop rbx 428 pop rbp 429 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 430 mov rsi,QWORD[16+rsp] 431 DB 0F3h,0C3h ;repret 432$L$SEH_end_ecp_nistz256_sqr_mont: 433 434 435ALIGN 32 436__ecp_nistz256_sqr_montq: 437 mov r13,rax 438 mul r14 439 mov r9,rax 440 mov rax,r15 441 mov r10,rdx 442 443 mul r13 444 add r10,rax 445 mov rax,r8 446 adc rdx,0 447 mov r11,rdx 448 449 mul r13 450 add r11,rax 451 mov rax,r15 452 adc rdx,0 453 mov r12,rdx 454 455 456 mul r14 457 add r11,rax 458 mov rax,r8 459 adc rdx,0 460 mov rbp,rdx 461 462 mul r14 463 add r12,rax 464 mov rax,r8 465 adc rdx,0 466 add r12,rbp 467 mov r13,rdx 468 adc r13,0 469 470 471 mul r15 472 xor r15,r15 473 add r13,rax 474 mov rax,QWORD[rsi] 475 mov r14,rdx 476 adc r14,0 477 478 add r9,r9 479 adc r10,r10 480 adc r11,r11 481 adc r12,r12 482 adc r13,r13 483 adc r14,r14 484 adc r15,0 485 486 mul rax 487 mov r8,rax 488 mov rax,QWORD[8+rsi] 489 mov rcx,rdx 490 491 mul rax 492 add r9,rcx 493 adc r10,rax 494 mov rax,QWORD[16+rsi] 495 adc rdx,0 496 mov rcx,rdx 497 498 mul rax 499 add r11,rcx 500 adc r12,rax 501 mov rax,QWORD[24+rsi] 502 adc rdx,0 503 mov rcx,rdx 504 505 mul rax 506 add r13,rcx 507 adc r14,rax 508 mov rax,r8 509 adc r15,rdx 510 511 mov rsi,QWORD[(($L$poly+8))] 512 mov rbp,QWORD[(($L$poly+24))] 513 514 515 516 517 mov rcx,r8 518 shl r8,32 519 mul rbp 520 shr rcx,32 521 add r9,r8 522 adc r10,rcx 523 adc r11,rax 524 mov rax,r9 525 adc rdx,0 526 527 528 529 mov rcx,r9 530 shl r9,32 531 mov r8,rdx 532 mul rbp 533 shr rcx,32 534 add r10,r9 535 adc r11,rcx 536 adc r8,rax 537 mov rax,r10 538 adc rdx,0 539 540 541 542 mov rcx,r10 543 shl r10,32 544 mov r9,rdx 545 mul rbp 546 shr rcx,32 547 add r11,r10 548 adc r8,rcx 549 adc r9,rax 550 mov rax,r11 551 adc rdx,0 552 553 554 555 mov rcx,r11 556 shl r11,32 557 mov r10,rdx 558 mul rbp 559 shr rcx,32 560 add r8,r11 561 adc r9,rcx 562 adc r10,rax 563 adc rdx,0 564 xor r11,r11 565 566 567 568 add r12,r8 569 adc r13,r9 570 mov r8,r12 571 adc r14,r10 572 adc r15,rdx 573 mov r9,r13 574 adc r11,0 575 576 sub r12,-1 577 mov r10,r14 578 sbb r13,rsi 579 sbb r14,0 580 mov rcx,r15 581 sbb r15,rbp 582 sbb r11,0 583 584 cmovc r12,r8 585 cmovc r13,r9 586 mov QWORD[rdi],r12 587 cmovc r14,r10 588 mov QWORD[8+rdi],r13 589 cmovc r15,rcx 590 mov QWORD[16+rdi],r14 591 mov QWORD[24+rdi],r15 592 593 DB 0F3h,0C3h ;repret 594 595 596 597 598 599 600 601global ecp_nistz256_from_mont 602 603ALIGN 32 604ecp_nistz256_from_mont: 605 mov QWORD[8+rsp],rdi ;WIN64 prologue 606 mov QWORD[16+rsp],rsi 607 mov rax,rsp 608$L$SEH_begin_ecp_nistz256_from_mont: 609 mov rdi,rcx 610 mov rsi,rdx 611 612 613 push r12 614 push r13 615 616 mov rax,QWORD[rsi] 617 mov r13,QWORD[(($L$poly+24))] 618 mov r9,QWORD[8+rsi] 619 mov r10,QWORD[16+rsi] 620 mov r11,QWORD[24+rsi] 621 mov r8,rax 622 mov r12,QWORD[(($L$poly+8))] 623 624 625 626 mov rcx,rax 627 shl r8,32 628 mul r13 629 shr rcx,32 630 add r9,r8 631 adc r10,rcx 632 adc r11,rax 633 mov rax,r9 634 adc rdx,0 635 636 637 638 mov rcx,r9 639 shl r9,32 640 mov r8,rdx 641 mul r13 642 shr rcx,32 643 add r10,r9 644 adc r11,rcx 645 adc r8,rax 646 mov rax,r10 647 adc rdx,0 648 649 650 651 mov rcx,r10 652 shl r10,32 653 mov r9,rdx 654 mul r13 655 shr rcx,32 656 add r11,r10 657 adc r8,rcx 658 adc r9,rax 659 mov rax,r11 660 adc rdx,0 661 662 663 664 mov rcx,r11 665 shl r11,32 666 mov r10,rdx 667 mul r13 668 shr rcx,32 669 add r8,r11 670 adc r9,rcx 671 mov rcx,r8 672 adc r10,rax 673 mov rsi,r9 674 adc rdx,0 675 676 sub r8,-1 677 mov rax,r10 678 sbb r9,r12 679 sbb r10,0 680 mov r11,rdx 681 sbb rdx,r13 682 sbb r13,r13 683 684 cmovnz r8,rcx 685 cmovnz r9,rsi 686 mov QWORD[rdi],r8 687 cmovnz r10,rax 688 mov QWORD[8+rdi],r9 689 cmovz r11,rdx 690 mov QWORD[16+rdi],r10 691 mov QWORD[24+rdi],r11 692 693 pop r13 694 pop r12 695 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 696 mov rsi,QWORD[16+rsp] 697 DB 0F3h,0C3h ;repret 698$L$SEH_end_ecp_nistz256_from_mont: 699 700 701global ecp_nistz256_select_w5 702 703ALIGN 32 704ecp_nistz256_select_w5: 705 lea rax,[((-136))+rsp] 706$L$SEH_begin_ecp_nistz256_select_w5: 707DB 0x48,0x8d,0x60,0xe0 708DB 0x0f,0x29,0x70,0xe0 709DB 0x0f,0x29,0x78,0xf0 710DB 0x44,0x0f,0x29,0x00 711DB 0x44,0x0f,0x29,0x48,0x10 712DB 0x44,0x0f,0x29,0x50,0x20 713DB 0x44,0x0f,0x29,0x58,0x30 714DB 0x44,0x0f,0x29,0x60,0x40 715DB 0x44,0x0f,0x29,0x68,0x50 716DB 0x44,0x0f,0x29,0x70,0x60 717DB 0x44,0x0f,0x29,0x78,0x70 718 movdqa xmm0,XMMWORD[$L$One] 719 movd xmm1,r8d 720 721 pxor xmm2,xmm2 722 pxor xmm3,xmm3 723 pxor xmm4,xmm4 724 pxor xmm5,xmm5 725 pxor xmm6,xmm6 726 pxor xmm7,xmm7 727 728 movdqa xmm8,xmm0 729 pshufd xmm1,xmm1,0 730 731 mov rax,16 732$L$select_loop_sse_w5: 733 734 movdqa xmm15,xmm8 735 paddd xmm8,xmm0 736 pcmpeqd xmm15,xmm1 737 738 movdqa xmm9,XMMWORD[rdx] 739 movdqa xmm10,XMMWORD[16+rdx] 740 movdqa xmm11,XMMWORD[32+rdx] 741 movdqa xmm12,XMMWORD[48+rdx] 742 movdqa xmm13,XMMWORD[64+rdx] 743 movdqa xmm14,XMMWORD[80+rdx] 744 lea rdx,[96+rdx] 745 746 pand xmm9,xmm15 747 pand xmm10,xmm15 748 por xmm2,xmm9 749 pand xmm11,xmm15 750 por xmm3,xmm10 751 pand xmm12,xmm15 752 por xmm4,xmm11 753 pand xmm13,xmm15 754 por xmm5,xmm12 755 pand xmm14,xmm15 756 por xmm6,xmm13 757 por xmm7,xmm14 758 759 dec rax 760 jnz NEAR $L$select_loop_sse_w5 761 762 movdqu XMMWORD[rcx],xmm2 763 movdqu XMMWORD[16+rcx],xmm3 764 movdqu XMMWORD[32+rcx],xmm4 765 movdqu XMMWORD[48+rcx],xmm5 766 movdqu XMMWORD[64+rcx],xmm6 767 movdqu XMMWORD[80+rcx],xmm7 768 movaps xmm6,XMMWORD[rsp] 769 movaps xmm7,XMMWORD[16+rsp] 770 movaps xmm8,XMMWORD[32+rsp] 771 movaps xmm9,XMMWORD[48+rsp] 772 movaps xmm10,XMMWORD[64+rsp] 773 movaps xmm11,XMMWORD[80+rsp] 774 movaps xmm12,XMMWORD[96+rsp] 775 movaps xmm13,XMMWORD[112+rsp] 776 movaps xmm14,XMMWORD[128+rsp] 777 movaps xmm15,XMMWORD[144+rsp] 778 lea rsp,[168+rsp] 779$L$SEH_end_ecp_nistz256_select_w5: 780 DB 0F3h,0C3h ;repret 781 782 783 784 785global ecp_nistz256_select_w7 786 787ALIGN 32 788ecp_nistz256_select_w7: 789 lea rax,[((-136))+rsp] 790$L$SEH_begin_ecp_nistz256_select_w7: 791DB 0x48,0x8d,0x60,0xe0 792DB 0x0f,0x29,0x70,0xe0 793DB 0x0f,0x29,0x78,0xf0 794DB 0x44,0x0f,0x29,0x00 795DB 0x44,0x0f,0x29,0x48,0x10 796DB 0x44,0x0f,0x29,0x50,0x20 797DB 0x44,0x0f,0x29,0x58,0x30 798DB 0x44,0x0f,0x29,0x60,0x40 799DB 0x44,0x0f,0x29,0x68,0x50 800DB 0x44,0x0f,0x29,0x70,0x60 801DB 0x44,0x0f,0x29,0x78,0x70 802 movdqa xmm8,XMMWORD[$L$One] 803 movd xmm1,r8d 804 805 pxor xmm2,xmm2 806 pxor xmm3,xmm3 807 pxor xmm4,xmm4 808 pxor xmm5,xmm5 809 810 movdqa xmm0,xmm8 811 pshufd xmm1,xmm1,0 812 mov rax,64 813 814$L$select_loop_sse_w7: 815 movdqa xmm15,xmm8 816 paddd xmm8,xmm0 817 movdqa xmm9,XMMWORD[rdx] 818 movdqa xmm10,XMMWORD[16+rdx] 819 pcmpeqd xmm15,xmm1 820 movdqa xmm11,XMMWORD[32+rdx] 821 movdqa xmm12,XMMWORD[48+rdx] 822 lea rdx,[64+rdx] 823 824 pand xmm9,xmm15 825 pand xmm10,xmm15 826 por xmm2,xmm9 827 pand xmm11,xmm15 828 por xmm3,xmm10 829 pand xmm12,xmm15 830 por xmm4,xmm11 831 prefetcht0 [255+rdx] 832 por xmm5,xmm12 833 834 dec rax 835 jnz NEAR $L$select_loop_sse_w7 836 837 movdqu XMMWORD[rcx],xmm2 838 movdqu XMMWORD[16+rcx],xmm3 839 movdqu XMMWORD[32+rcx],xmm4 840 movdqu XMMWORD[48+rcx],xmm5 841 movaps xmm6,XMMWORD[rsp] 842 movaps xmm7,XMMWORD[16+rsp] 843 movaps xmm8,XMMWORD[32+rsp] 844 movaps xmm9,XMMWORD[48+rsp] 845 movaps xmm10,XMMWORD[64+rsp] 846 movaps xmm11,XMMWORD[80+rsp] 847 movaps xmm12,XMMWORD[96+rsp] 848 movaps xmm13,XMMWORD[112+rsp] 849 movaps xmm14,XMMWORD[128+rsp] 850 movaps xmm15,XMMWORD[144+rsp] 851 lea rsp,[168+rsp] 852$L$SEH_end_ecp_nistz256_select_w7: 853 DB 0F3h,0C3h ;repret 854 855global ecp_nistz256_avx2_select_w7 856 857ALIGN 32 858ecp_nistz256_avx2_select_w7: 859 mov QWORD[8+rsp],rdi ;WIN64 prologue 860 mov QWORD[16+rsp],rsi 861 mov rax,rsp 862$L$SEH_begin_ecp_nistz256_avx2_select_w7: 863 mov rdi,rcx 864 mov rsi,rdx 865 mov rdx,r8 866 867 868DB 0x0f,0x0b 869 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 870 mov rsi,QWORD[16+rsp] 871 DB 0F3h,0C3h ;repret 872$L$SEH_end_ecp_nistz256_avx2_select_w7: 873 874ALIGN 32 875__ecp_nistz256_add_toq: 876 add r12,QWORD[rbx] 877 adc r13,QWORD[8+rbx] 878 mov rax,r12 879 adc r8,QWORD[16+rbx] 880 adc r9,QWORD[24+rbx] 881 mov rbp,r13 882 sbb r11,r11 883 884 sub r12,-1 885 mov rcx,r8 886 sbb r13,r14 887 sbb r8,0 888 mov r10,r9 889 sbb r9,r15 890 test r11,r11 891 892 cmovz r12,rax 893 cmovz r13,rbp 894 mov QWORD[rdi],r12 895 cmovz r8,rcx 896 mov QWORD[8+rdi],r13 897 cmovz r9,r10 898 mov QWORD[16+rdi],r8 899 mov QWORD[24+rdi],r9 900 901 DB 0F3h,0C3h ;repret 902 903 904 905ALIGN 32 906__ecp_nistz256_sub_fromq: 907 sub r12,QWORD[rbx] 908 sbb r13,QWORD[8+rbx] 909 mov rax,r12 910 sbb r8,QWORD[16+rbx] 911 sbb r9,QWORD[24+rbx] 912 mov rbp,r13 913 sbb r11,r11 914 915 add r12,-1 916 mov rcx,r8 917 adc r13,r14 918 adc r8,0 919 mov r10,r9 920 adc r9,r15 921 test r11,r11 922 923 cmovz r12,rax 924 cmovz r13,rbp 925 mov QWORD[rdi],r12 926 cmovz r8,rcx 927 mov QWORD[8+rdi],r13 928 cmovz r9,r10 929 mov QWORD[16+rdi],r8 930 mov QWORD[24+rdi],r9 931 932 DB 0F3h,0C3h ;repret 933 934 935 936ALIGN 32 937__ecp_nistz256_subq: 938 sub rax,r12 939 sbb rbp,r13 940 mov r12,rax 941 sbb rcx,r8 942 sbb r10,r9 943 mov r13,rbp 944 sbb r11,r11 945 946 add rax,-1 947 mov r8,rcx 948 adc rbp,r14 949 adc rcx,0 950 mov r9,r10 951 adc r10,r15 952 test r11,r11 953 954 cmovnz r12,rax 955 cmovnz r13,rbp 956 cmovnz r8,rcx 957 cmovnz r9,r10 958 959 DB 0F3h,0C3h ;repret 960 961 962 963ALIGN 32 964__ecp_nistz256_mul_by_2q: 965 add r12,r12 966 adc r13,r13 967 mov rax,r12 968 adc r8,r8 969 adc r9,r9 970 mov rbp,r13 971 sbb r11,r11 972 973 sub r12,-1 974 mov rcx,r8 975 sbb r13,r14 976 sbb r8,0 977 mov r10,r9 978 sbb r9,r15 979 test r11,r11 980 981 cmovz r12,rax 982 cmovz r13,rbp 983 mov QWORD[rdi],r12 984 cmovz r8,rcx 985 mov QWORD[8+rdi],r13 986 cmovz r9,r10 987 mov QWORD[16+rdi],r8 988 mov QWORD[24+rdi],r9 989 990 DB 0F3h,0C3h ;repret 991 992global ecp_nistz256_point_double 993 994ALIGN 32 995ecp_nistz256_point_double: 996 mov QWORD[8+rsp],rdi ;WIN64 prologue 997 mov QWORD[16+rsp],rsi 998 mov rax,rsp 999$L$SEH_begin_ecp_nistz256_point_double: 1000 mov rdi,rcx 1001 mov rsi,rdx 1002 1003 1004 push rbp 1005 push rbx 1006 push r12 1007 push r13 1008 push r14 1009 push r15 1010 sub rsp,32*5+8 1011 1012 movdqu xmm0,XMMWORD[rsi] 1013 mov rbx,rsi 1014 movdqu xmm1,XMMWORD[16+rsi] 1015 mov r12,QWORD[((32+0))+rsi] 1016 mov r13,QWORD[((32+8))+rsi] 1017 mov r8,QWORD[((32+16))+rsi] 1018 mov r9,QWORD[((32+24))+rsi] 1019 mov r14,QWORD[(($L$poly+8))] 1020 mov r15,QWORD[(($L$poly+24))] 1021 movdqa XMMWORD[96+rsp],xmm0 1022 movdqa XMMWORD[(96+16)+rsp],xmm1 1023 lea r10,[32+rdi] 1024 lea r11,[64+rdi] 1025DB 102,72,15,110,199 1026DB 102,73,15,110,202 1027DB 102,73,15,110,211 1028 1029 lea rdi,[rsp] 1030 call __ecp_nistz256_mul_by_2q 1031 1032 mov rax,QWORD[((64+0))+rsi] 1033 mov r14,QWORD[((64+8))+rsi] 1034 mov r15,QWORD[((64+16))+rsi] 1035 mov r8,QWORD[((64+24))+rsi] 1036 lea rsi,[((64-0))+rsi] 1037 lea rdi,[64+rsp] 1038 call __ecp_nistz256_sqr_montq 1039 1040 mov rax,QWORD[((0+0))+rsp] 1041 mov r14,QWORD[((8+0))+rsp] 1042 lea rsi,[((0+0))+rsp] 1043 mov r15,QWORD[((16+0))+rsp] 1044 mov r8,QWORD[((24+0))+rsp] 1045 lea rdi,[rsp] 1046 call __ecp_nistz256_sqr_montq 1047 1048 mov rax,QWORD[32+rbx] 1049 mov r9,QWORD[((64+0))+rbx] 1050 mov r10,QWORD[((64+8))+rbx] 1051 mov r11,QWORD[((64+16))+rbx] 1052 mov r12,QWORD[((64+24))+rbx] 1053 lea rsi,[((64-0))+rbx] 1054 lea rbx,[32+rbx] 1055DB 102,72,15,126,215 1056 call __ecp_nistz256_mul_montq 1057 call __ecp_nistz256_mul_by_2q 1058 1059 mov r12,QWORD[((96+0))+rsp] 1060 mov r13,QWORD[((96+8))+rsp] 1061 lea rbx,[64+rsp] 1062 mov r8,QWORD[((96+16))+rsp] 1063 mov r9,QWORD[((96+24))+rsp] 1064 lea rdi,[32+rsp] 1065 call __ecp_nistz256_add_toq 1066 1067 mov r12,QWORD[((96+0))+rsp] 1068 mov r13,QWORD[((96+8))+rsp] 1069 lea rbx,[64+rsp] 1070 mov r8,QWORD[((96+16))+rsp] 1071 mov r9,QWORD[((96+24))+rsp] 1072 lea rdi,[64+rsp] 1073 call __ecp_nistz256_sub_fromq 1074 1075 mov rax,QWORD[((0+0))+rsp] 1076 mov r14,QWORD[((8+0))+rsp] 1077 lea rsi,[((0+0))+rsp] 1078 mov r15,QWORD[((16+0))+rsp] 1079 mov r8,QWORD[((24+0))+rsp] 1080DB 102,72,15,126,207 1081 call __ecp_nistz256_sqr_montq 1082 xor r9,r9 1083 mov rax,r12 1084 add r12,-1 1085 mov r10,r13 1086 adc r13,rsi 1087 mov rcx,r14 1088 adc r14,0 1089 mov r8,r15 1090 adc r15,rbp 1091 adc r9,0 1092 xor rsi,rsi 1093 test rax,1 1094 1095 cmovz r12,rax 1096 cmovz r13,r10 1097 cmovz r14,rcx 1098 cmovz r15,r8 1099 cmovz r9,rsi 1100 1101 mov rax,r13 1102 shr r12,1 1103 shl rax,63 1104 mov r10,r14 1105 shr r13,1 1106 or r12,rax 1107 shl r10,63 1108 mov rcx,r15 1109 shr r14,1 1110 or r13,r10 1111 shl rcx,63 1112 mov QWORD[rdi],r12 1113 shr r15,1 1114 mov QWORD[8+rdi],r13 1115 shl r9,63 1116 or r14,rcx 1117 or r15,r9 1118 mov QWORD[16+rdi],r14 1119 mov QWORD[24+rdi],r15 1120 mov rax,QWORD[64+rsp] 1121 lea rbx,[64+rsp] 1122 mov r9,QWORD[((0+32))+rsp] 1123 mov r10,QWORD[((8+32))+rsp] 1124 lea rsi,[((0+32))+rsp] 1125 mov r11,QWORD[((16+32))+rsp] 1126 mov r12,QWORD[((24+32))+rsp] 1127 lea rdi,[32+rsp] 1128 call __ecp_nistz256_mul_montq 1129 1130 lea rdi,[128+rsp] 1131 call __ecp_nistz256_mul_by_2q 1132 1133 lea rbx,[32+rsp] 1134 lea rdi,[32+rsp] 1135 call __ecp_nistz256_add_toq 1136 1137 mov rax,QWORD[96+rsp] 1138 lea rbx,[96+rsp] 1139 mov r9,QWORD[((0+0))+rsp] 1140 mov r10,QWORD[((8+0))+rsp] 1141 lea rsi,[((0+0))+rsp] 1142 mov r11,QWORD[((16+0))+rsp] 1143 mov r12,QWORD[((24+0))+rsp] 1144 lea rdi,[rsp] 1145 call __ecp_nistz256_mul_montq 1146 1147 lea rdi,[128+rsp] 1148 call __ecp_nistz256_mul_by_2q 1149 1150 mov rax,QWORD[((0+32))+rsp] 1151 mov r14,QWORD[((8+32))+rsp] 1152 lea rsi,[((0+32))+rsp] 1153 mov r15,QWORD[((16+32))+rsp] 1154 mov r8,QWORD[((24+32))+rsp] 1155DB 102,72,15,126,199 1156 call __ecp_nistz256_sqr_montq 1157 1158 lea rbx,[128+rsp] 1159 mov r8,r14 1160 mov r9,r15 1161 mov r14,rsi 1162 mov r15,rbp 1163 call __ecp_nistz256_sub_fromq 1164 1165 mov rax,QWORD[((0+0))+rsp] 1166 mov rbp,QWORD[((0+8))+rsp] 1167 mov rcx,QWORD[((0+16))+rsp] 1168 mov r10,QWORD[((0+24))+rsp] 1169 lea rdi,[rsp] 1170 call __ecp_nistz256_subq 1171 1172 mov rax,QWORD[32+rsp] 1173 lea rbx,[32+rsp] 1174 mov r14,r12 1175 xor ecx,ecx 1176 mov QWORD[((0+0))+rsp],r12 1177 mov r10,r13 1178 mov QWORD[((0+8))+rsp],r13 1179 cmovz r11,r8 1180 mov QWORD[((0+16))+rsp],r8 1181 lea rsi,[((0-0))+rsp] 1182 cmovz r12,r9 1183 mov QWORD[((0+24))+rsp],r9 1184 mov r9,r14 1185 lea rdi,[rsp] 1186 call __ecp_nistz256_mul_montq 1187 1188DB 102,72,15,126,203 1189DB 102,72,15,126,207 1190 call __ecp_nistz256_sub_fromq 1191 1192 add rsp,32*5+8 1193 pop r15 1194 pop r14 1195 pop r13 1196 pop r12 1197 pop rbx 1198 pop rbp 1199 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1200 mov rsi,QWORD[16+rsp] 1201 DB 0F3h,0C3h ;repret 1202$L$SEH_end_ecp_nistz256_point_double: 1203global ecp_nistz256_point_add 1204 1205ALIGN 32 1206ecp_nistz256_point_add: 1207 mov QWORD[8+rsp],rdi ;WIN64 prologue 1208 mov QWORD[16+rsp],rsi 1209 mov rax,rsp 1210$L$SEH_begin_ecp_nistz256_point_add: 1211 mov rdi,rcx 1212 mov rsi,rdx 1213 mov rdx,r8 1214 1215 1216 push rbp 1217 push rbx 1218 push r12 1219 push r13 1220 push r14 1221 push r15 1222 sub rsp,32*18+8 1223 1224 movdqu xmm0,XMMWORD[rsi] 1225 movdqu xmm1,XMMWORD[16+rsi] 1226 movdqu xmm2,XMMWORD[32+rsi] 1227 movdqu xmm3,XMMWORD[48+rsi] 1228 movdqu xmm4,XMMWORD[64+rsi] 1229 movdqu xmm5,XMMWORD[80+rsi] 1230 mov rbx,rsi 1231 mov rsi,rdx 1232 movdqa XMMWORD[384+rsp],xmm0 1233 movdqa XMMWORD[(384+16)+rsp],xmm1 1234 por xmm1,xmm0 1235 movdqa XMMWORD[416+rsp],xmm2 1236 movdqa XMMWORD[(416+16)+rsp],xmm3 1237 por xmm3,xmm2 1238 movdqa XMMWORD[448+rsp],xmm4 1239 movdqa XMMWORD[(448+16)+rsp],xmm5 1240 por xmm3,xmm1 1241 1242 movdqu xmm0,XMMWORD[rsi] 1243 pshufd xmm5,xmm3,0xb1 1244 movdqu xmm1,XMMWORD[16+rsi] 1245 movdqu xmm2,XMMWORD[32+rsi] 1246 por xmm5,xmm3 1247 movdqu xmm3,XMMWORD[48+rsi] 1248 mov rax,QWORD[((64+0))+rsi] 1249 mov r14,QWORD[((64+8))+rsi] 1250 mov r15,QWORD[((64+16))+rsi] 1251 mov r8,QWORD[((64+24))+rsi] 1252 movdqa XMMWORD[480+rsp],xmm0 1253 pshufd xmm4,xmm5,0x1e 1254 movdqa XMMWORD[(480+16)+rsp],xmm1 1255 por xmm1,xmm0 1256DB 102,72,15,110,199 1257 movdqa XMMWORD[512+rsp],xmm2 1258 movdqa XMMWORD[(512+16)+rsp],xmm3 1259 por xmm3,xmm2 1260 por xmm5,xmm4 1261 pxor xmm4,xmm4 1262 por xmm3,xmm1 1263 1264 lea rsi,[((64-0))+rsi] 1265 mov QWORD[((544+0))+rsp],rax 1266 mov QWORD[((544+8))+rsp],r14 1267 mov QWORD[((544+16))+rsp],r15 1268 mov QWORD[((544+24))+rsp],r8 1269 lea rdi,[96+rsp] 1270 call __ecp_nistz256_sqr_montq 1271 1272 pcmpeqd xmm5,xmm4 1273 pshufd xmm4,xmm3,0xb1 1274 por xmm4,xmm3 1275 pshufd xmm5,xmm5,0 1276 pshufd xmm3,xmm4,0x1e 1277 por xmm4,xmm3 1278 pxor xmm3,xmm3 1279 pcmpeqd xmm4,xmm3 1280 pshufd xmm4,xmm4,0 1281 mov rax,QWORD[((64+0))+rbx] 1282 mov r14,QWORD[((64+8))+rbx] 1283 mov r15,QWORD[((64+16))+rbx] 1284 mov r8,QWORD[((64+24))+rbx] 1285 1286 lea rsi,[((64-0))+rbx] 1287 lea rdi,[32+rsp] 1288 call __ecp_nistz256_sqr_montq 1289 1290 mov rax,QWORD[544+rsp] 1291 lea rbx,[544+rsp] 1292 mov r9,QWORD[((0+96))+rsp] 1293 mov r10,QWORD[((8+96))+rsp] 1294 lea rsi,[((0+96))+rsp] 1295 mov r11,QWORD[((16+96))+rsp] 1296 mov r12,QWORD[((24+96))+rsp] 1297 lea rdi,[224+rsp] 1298 call __ecp_nistz256_mul_montq 1299 1300 mov rax,QWORD[448+rsp] 1301 lea rbx,[448+rsp] 1302 mov r9,QWORD[((0+32))+rsp] 1303 mov r10,QWORD[((8+32))+rsp] 1304 lea rsi,[((0+32))+rsp] 1305 mov r11,QWORD[((16+32))+rsp] 1306 mov r12,QWORD[((24+32))+rsp] 1307 lea rdi,[256+rsp] 1308 call __ecp_nistz256_mul_montq 1309 1310 mov rax,QWORD[416+rsp] 1311 lea rbx,[416+rsp] 1312 mov r9,QWORD[((0+224))+rsp] 1313 mov r10,QWORD[((8+224))+rsp] 1314 lea rsi,[((0+224))+rsp] 1315 mov r11,QWORD[((16+224))+rsp] 1316 mov r12,QWORD[((24+224))+rsp] 1317 lea rdi,[224+rsp] 1318 call __ecp_nistz256_mul_montq 1319 1320 mov rax,QWORD[512+rsp] 1321 lea rbx,[512+rsp] 1322 mov r9,QWORD[((0+256))+rsp] 1323 mov r10,QWORD[((8+256))+rsp] 1324 lea rsi,[((0+256))+rsp] 1325 mov r11,QWORD[((16+256))+rsp] 1326 mov r12,QWORD[((24+256))+rsp] 1327 lea rdi,[256+rsp] 1328 call __ecp_nistz256_mul_montq 1329 1330 lea rbx,[224+rsp] 1331 lea rdi,[64+rsp] 1332 call __ecp_nistz256_sub_fromq 1333 1334 or r12,r13 1335 movdqa xmm2,xmm4 1336 or r12,r8 1337 or r12,r9 1338 por xmm2,xmm5 1339DB 102,73,15,110,220 1340 1341 mov rax,QWORD[384+rsp] 1342 lea rbx,[384+rsp] 1343 mov r9,QWORD[((0+96))+rsp] 1344 mov r10,QWORD[((8+96))+rsp] 1345 lea rsi,[((0+96))+rsp] 1346 mov r11,QWORD[((16+96))+rsp] 1347 mov r12,QWORD[((24+96))+rsp] 1348 lea rdi,[160+rsp] 1349 call __ecp_nistz256_mul_montq 1350 1351 mov rax,QWORD[480+rsp] 1352 lea rbx,[480+rsp] 1353 mov r9,QWORD[((0+32))+rsp] 1354 mov r10,QWORD[((8+32))+rsp] 1355 lea rsi,[((0+32))+rsp] 1356 mov r11,QWORD[((16+32))+rsp] 1357 mov r12,QWORD[((24+32))+rsp] 1358 lea rdi,[192+rsp] 1359 call __ecp_nistz256_mul_montq 1360 1361 lea rbx,[160+rsp] 1362 lea rdi,[rsp] 1363 call __ecp_nistz256_sub_fromq 1364 1365 or r12,r13 1366 or r12,r8 1367 or r12,r9 1368 1369DB 0x3e 1370 jnz NEAR $L$add_proceedq 1371DB 102,73,15,126,208 1372DB 102,73,15,126,217 1373 test r8,r8 1374 jnz NEAR $L$add_proceedq 1375 test r9,r9 1376 jz NEAR $L$add_proceedq 1377 1378DB 102,72,15,126,199 1379 pxor xmm0,xmm0 1380 movdqu XMMWORD[rdi],xmm0 1381 movdqu XMMWORD[16+rdi],xmm0 1382 movdqu XMMWORD[32+rdi],xmm0 1383 movdqu XMMWORD[48+rdi],xmm0 1384 movdqu XMMWORD[64+rdi],xmm0 1385 movdqu XMMWORD[80+rdi],xmm0 1386 jmp NEAR $L$add_doneq 1387 1388ALIGN 32 1389$L$add_proceedq: 1390 mov rax,QWORD[((0+64))+rsp] 1391 mov r14,QWORD[((8+64))+rsp] 1392 lea rsi,[((0+64))+rsp] 1393 mov r15,QWORD[((16+64))+rsp] 1394 mov r8,QWORD[((24+64))+rsp] 1395 lea rdi,[96+rsp] 1396 call __ecp_nistz256_sqr_montq 1397 1398 mov rax,QWORD[448+rsp] 1399 lea rbx,[448+rsp] 1400 mov r9,QWORD[((0+0))+rsp] 1401 mov r10,QWORD[((8+0))+rsp] 1402 lea rsi,[((0+0))+rsp] 1403 mov r11,QWORD[((16+0))+rsp] 1404 mov r12,QWORD[((24+0))+rsp] 1405 lea rdi,[352+rsp] 1406 call __ecp_nistz256_mul_montq 1407 1408 mov rax,QWORD[((0+0))+rsp] 1409 mov r14,QWORD[((8+0))+rsp] 1410 lea rsi,[((0+0))+rsp] 1411 mov r15,QWORD[((16+0))+rsp] 1412 mov r8,QWORD[((24+0))+rsp] 1413 lea rdi,[32+rsp] 1414 call __ecp_nistz256_sqr_montq 1415 1416 mov rax,QWORD[544+rsp] 1417 lea rbx,[544+rsp] 1418 mov r9,QWORD[((0+352))+rsp] 1419 mov r10,QWORD[((8+352))+rsp] 1420 lea rsi,[((0+352))+rsp] 1421 mov r11,QWORD[((16+352))+rsp] 1422 mov r12,QWORD[((24+352))+rsp] 1423 lea rdi,[352+rsp] 1424 call __ecp_nistz256_mul_montq 1425 1426 mov rax,QWORD[rsp] 1427 lea rbx,[rsp] 1428 mov r9,QWORD[((0+32))+rsp] 1429 mov r10,QWORD[((8+32))+rsp] 1430 lea rsi,[((0+32))+rsp] 1431 mov r11,QWORD[((16+32))+rsp] 1432 mov r12,QWORD[((24+32))+rsp] 1433 lea rdi,[128+rsp] 1434 call __ecp_nistz256_mul_montq 1435 1436 mov rax,QWORD[160+rsp] 1437 lea rbx,[160+rsp] 1438 mov r9,QWORD[((0+32))+rsp] 1439 mov r10,QWORD[((8+32))+rsp] 1440 lea rsi,[((0+32))+rsp] 1441 mov r11,QWORD[((16+32))+rsp] 1442 mov r12,QWORD[((24+32))+rsp] 1443 lea rdi,[192+rsp] 1444 call __ecp_nistz256_mul_montq 1445 1446 1447 1448 1449 add r12,r12 1450 lea rsi,[96+rsp] 1451 adc r13,r13 1452 mov rax,r12 1453 adc r8,r8 1454 adc r9,r9 1455 mov rbp,r13 1456 sbb r11,r11 1457 1458 sub r12,-1 1459 mov rcx,r8 1460 sbb r13,r14 1461 sbb r8,0 1462 mov r10,r9 1463 sbb r9,r15 1464 test r11,r11 1465 1466 cmovz r12,rax 1467 mov rax,QWORD[rsi] 1468 cmovz r13,rbp 1469 mov rbp,QWORD[8+rsi] 1470 cmovz r8,rcx 1471 mov rcx,QWORD[16+rsi] 1472 cmovz r9,r10 1473 mov r10,QWORD[24+rsi] 1474 1475 call __ecp_nistz256_subq 1476 1477 lea rbx,[128+rsp] 1478 lea rdi,[288+rsp] 1479 call __ecp_nistz256_sub_fromq 1480 1481 mov rax,QWORD[((192+0))+rsp] 1482 mov rbp,QWORD[((192+8))+rsp] 1483 mov rcx,QWORD[((192+16))+rsp] 1484 mov r10,QWORD[((192+24))+rsp] 1485 lea rdi,[320+rsp] 1486 1487 call __ecp_nistz256_subq 1488 1489 mov QWORD[rdi],r12 1490 mov QWORD[8+rdi],r13 1491 mov QWORD[16+rdi],r8 1492 mov QWORD[24+rdi],r9 1493 mov rax,QWORD[128+rsp] 1494 lea rbx,[128+rsp] 1495 mov r9,QWORD[((0+224))+rsp] 1496 mov r10,QWORD[((8+224))+rsp] 1497 lea rsi,[((0+224))+rsp] 1498 mov r11,QWORD[((16+224))+rsp] 1499 mov r12,QWORD[((24+224))+rsp] 1500 lea rdi,[256+rsp] 1501 call __ecp_nistz256_mul_montq 1502 1503 mov rax,QWORD[320+rsp] 1504 lea rbx,[320+rsp] 1505 mov r9,QWORD[((0+64))+rsp] 1506 mov r10,QWORD[((8+64))+rsp] 1507 lea rsi,[((0+64))+rsp] 1508 mov r11,QWORD[((16+64))+rsp] 1509 mov r12,QWORD[((24+64))+rsp] 1510 lea rdi,[320+rsp] 1511 call __ecp_nistz256_mul_montq 1512 1513 lea rbx,[256+rsp] 1514 lea rdi,[320+rsp] 1515 call __ecp_nistz256_sub_fromq 1516 1517DB 102,72,15,126,199 1518 1519 movdqa xmm0,xmm5 1520 movdqa xmm1,xmm5 1521 pandn xmm0,XMMWORD[352+rsp] 1522 movdqa xmm2,xmm5 1523 pandn xmm1,XMMWORD[((352+16))+rsp] 1524 movdqa xmm3,xmm5 1525 pand xmm2,XMMWORD[544+rsp] 1526 pand xmm3,XMMWORD[((544+16))+rsp] 1527 por xmm2,xmm0 1528 por xmm3,xmm1 1529 1530 movdqa xmm0,xmm4 1531 movdqa xmm1,xmm4 1532 pandn xmm0,xmm2 1533 movdqa xmm2,xmm4 1534 pandn xmm1,xmm3 1535 movdqa xmm3,xmm4 1536 pand xmm2,XMMWORD[448+rsp] 1537 pand xmm3,XMMWORD[((448+16))+rsp] 1538 por xmm2,xmm0 1539 por xmm3,xmm1 1540 movdqu XMMWORD[64+rdi],xmm2 1541 movdqu XMMWORD[80+rdi],xmm3 1542 1543 movdqa xmm0,xmm5 1544 movdqa xmm1,xmm5 1545 pandn xmm0,XMMWORD[288+rsp] 1546 movdqa xmm2,xmm5 1547 pandn xmm1,XMMWORD[((288+16))+rsp] 1548 movdqa xmm3,xmm5 1549 pand xmm2,XMMWORD[480+rsp] 1550 pand xmm3,XMMWORD[((480+16))+rsp] 1551 por xmm2,xmm0 1552 por xmm3,xmm1 1553 1554 movdqa xmm0,xmm4 1555 movdqa xmm1,xmm4 1556 pandn xmm0,xmm2 1557 movdqa xmm2,xmm4 1558 pandn xmm1,xmm3 1559 movdqa xmm3,xmm4 1560 pand xmm2,XMMWORD[384+rsp] 1561 pand xmm3,XMMWORD[((384+16))+rsp] 1562 por xmm2,xmm0 1563 por xmm3,xmm1 1564 movdqu XMMWORD[rdi],xmm2 1565 movdqu XMMWORD[16+rdi],xmm3 1566 1567 movdqa xmm0,xmm5 1568 movdqa xmm1,xmm5 1569 pandn xmm0,XMMWORD[320+rsp] 1570 movdqa xmm2,xmm5 1571 pandn xmm1,XMMWORD[((320+16))+rsp] 1572 movdqa xmm3,xmm5 1573 pand xmm2,XMMWORD[512+rsp] 1574 pand xmm3,XMMWORD[((512+16))+rsp] 1575 por xmm2,xmm0 1576 por xmm3,xmm1 1577 1578 movdqa xmm0,xmm4 1579 movdqa xmm1,xmm4 1580 pandn xmm0,xmm2 1581 movdqa xmm2,xmm4 1582 pandn xmm1,xmm3 1583 movdqa xmm3,xmm4 1584 pand xmm2,XMMWORD[416+rsp] 1585 pand xmm3,XMMWORD[((416+16))+rsp] 1586 por xmm2,xmm0 1587 por xmm3,xmm1 1588 movdqu XMMWORD[32+rdi],xmm2 1589 movdqu XMMWORD[48+rdi],xmm3 1590 1591$L$add_doneq: 1592 add rsp,32*18+8 1593 pop r15 1594 pop r14 1595 pop r13 1596 pop r12 1597 pop rbx 1598 pop rbp 1599 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1600 mov rsi,QWORD[16+rsp] 1601 DB 0F3h,0C3h ;repret 1602$L$SEH_end_ecp_nistz256_point_add: 1603global ecp_nistz256_point_add_affine 1604 1605ALIGN 32 1606ecp_nistz256_point_add_affine: 1607 mov QWORD[8+rsp],rdi ;WIN64 prologue 1608 mov QWORD[16+rsp],rsi 1609 mov rax,rsp 1610$L$SEH_begin_ecp_nistz256_point_add_affine: 1611 mov rdi,rcx 1612 mov rsi,rdx 1613 mov rdx,r8 1614 1615 1616 push rbp 1617 push rbx 1618 push r12 1619 push r13 1620 push r14 1621 push r15 1622 sub rsp,32*15+8 1623 1624 movdqu xmm0,XMMWORD[rsi] 1625 mov rbx,rdx 1626 movdqu xmm1,XMMWORD[16+rsi] 1627 movdqu xmm2,XMMWORD[32+rsi] 1628 movdqu xmm3,XMMWORD[48+rsi] 1629 movdqu xmm4,XMMWORD[64+rsi] 1630 movdqu xmm5,XMMWORD[80+rsi] 1631 mov rax,QWORD[((64+0))+rsi] 1632 mov r14,QWORD[((64+8))+rsi] 1633 mov r15,QWORD[((64+16))+rsi] 1634 mov r8,QWORD[((64+24))+rsi] 1635 movdqa XMMWORD[320+rsp],xmm0 1636 movdqa XMMWORD[(320+16)+rsp],xmm1 1637 por xmm1,xmm0 1638 movdqa XMMWORD[352+rsp],xmm2 1639 movdqa XMMWORD[(352+16)+rsp],xmm3 1640 por xmm3,xmm2 1641 movdqa XMMWORD[384+rsp],xmm4 1642 movdqa XMMWORD[(384+16)+rsp],xmm5 1643 por xmm3,xmm1 1644 1645 movdqu xmm0,XMMWORD[rbx] 1646 pshufd xmm5,xmm3,0xb1 1647 movdqu xmm1,XMMWORD[16+rbx] 1648 movdqu xmm2,XMMWORD[32+rbx] 1649 por xmm5,xmm3 1650 movdqu xmm3,XMMWORD[48+rbx] 1651 movdqa XMMWORD[416+rsp],xmm0 1652 pshufd xmm4,xmm5,0x1e 1653 movdqa XMMWORD[(416+16)+rsp],xmm1 1654 por xmm1,xmm0 1655DB 102,72,15,110,199 1656 movdqa XMMWORD[448+rsp],xmm2 1657 movdqa XMMWORD[(448+16)+rsp],xmm3 1658 por xmm3,xmm2 1659 por xmm5,xmm4 1660 pxor xmm4,xmm4 1661 por xmm3,xmm1 1662 1663 lea rsi,[((64-0))+rsi] 1664 lea rdi,[32+rsp] 1665 call __ecp_nistz256_sqr_montq 1666 1667 pcmpeqd xmm5,xmm4 1668 pshufd xmm4,xmm3,0xb1 1669 mov rax,QWORD[rbx] 1670 1671 mov r9,r12 1672 por xmm4,xmm3 1673 pshufd xmm5,xmm5,0 1674 pshufd xmm3,xmm4,0x1e 1675 mov r10,r13 1676 por xmm4,xmm3 1677 pxor xmm3,xmm3 1678 mov r11,r14 1679 pcmpeqd xmm4,xmm3 1680 pshufd xmm4,xmm4,0 1681 1682 lea rsi,[((32-0))+rsp] 1683 mov r12,r15 1684 lea rdi,[rsp] 1685 call __ecp_nistz256_mul_montq 1686 1687 lea rbx,[320+rsp] 1688 lea rdi,[64+rsp] 1689 call __ecp_nistz256_sub_fromq 1690 1691 mov rax,QWORD[384+rsp] 1692 lea rbx,[384+rsp] 1693 mov r9,QWORD[((0+32))+rsp] 1694 mov r10,QWORD[((8+32))+rsp] 1695 lea rsi,[((0+32))+rsp] 1696 mov r11,QWORD[((16+32))+rsp] 1697 mov r12,QWORD[((24+32))+rsp] 1698 lea rdi,[32+rsp] 1699 call __ecp_nistz256_mul_montq 1700 1701 mov rax,QWORD[384+rsp] 1702 lea rbx,[384+rsp] 1703 mov r9,QWORD[((0+64))+rsp] 1704 mov r10,QWORD[((8+64))+rsp] 1705 lea rsi,[((0+64))+rsp] 1706 mov r11,QWORD[((16+64))+rsp] 1707 mov r12,QWORD[((24+64))+rsp] 1708 lea rdi,[288+rsp] 1709 call __ecp_nistz256_mul_montq 1710 1711 mov rax,QWORD[448+rsp] 1712 lea rbx,[448+rsp] 1713 mov r9,QWORD[((0+32))+rsp] 1714 mov r10,QWORD[((8+32))+rsp] 1715 lea rsi,[((0+32))+rsp] 1716 mov r11,QWORD[((16+32))+rsp] 1717 mov r12,QWORD[((24+32))+rsp] 1718 lea rdi,[32+rsp] 1719 call __ecp_nistz256_mul_montq 1720 1721 lea rbx,[352+rsp] 1722 lea rdi,[96+rsp] 1723 call __ecp_nistz256_sub_fromq 1724 1725 mov rax,QWORD[((0+64))+rsp] 1726 mov r14,QWORD[((8+64))+rsp] 1727 lea rsi,[((0+64))+rsp] 1728 mov r15,QWORD[((16+64))+rsp] 1729 mov r8,QWORD[((24+64))+rsp] 1730 lea rdi,[128+rsp] 1731 call __ecp_nistz256_sqr_montq 1732 1733 mov rax,QWORD[((0+96))+rsp] 1734 mov r14,QWORD[((8+96))+rsp] 1735 lea rsi,[((0+96))+rsp] 1736 mov r15,QWORD[((16+96))+rsp] 1737 mov r8,QWORD[((24+96))+rsp] 1738 lea rdi,[192+rsp] 1739 call __ecp_nistz256_sqr_montq 1740 1741 mov rax,QWORD[128+rsp] 1742 lea rbx,[128+rsp] 1743 mov r9,QWORD[((0+64))+rsp] 1744 mov r10,QWORD[((8+64))+rsp] 1745 lea rsi,[((0+64))+rsp] 1746 mov r11,QWORD[((16+64))+rsp] 1747 mov r12,QWORD[((24+64))+rsp] 1748 lea rdi,[160+rsp] 1749 call __ecp_nistz256_mul_montq 1750 1751 mov rax,QWORD[320+rsp] 1752 lea rbx,[320+rsp] 1753 mov r9,QWORD[((0+128))+rsp] 1754 mov r10,QWORD[((8+128))+rsp] 1755 lea rsi,[((0+128))+rsp] 1756 mov r11,QWORD[((16+128))+rsp] 1757 mov r12,QWORD[((24+128))+rsp] 1758 lea rdi,[rsp] 1759 call __ecp_nistz256_mul_montq 1760 1761 1762 1763 1764 add r12,r12 1765 lea rsi,[192+rsp] 1766 adc r13,r13 1767 mov rax,r12 1768 adc r8,r8 1769 adc r9,r9 1770 mov rbp,r13 1771 sbb r11,r11 1772 1773 sub r12,-1 1774 mov rcx,r8 1775 sbb r13,r14 1776 sbb r8,0 1777 mov r10,r9 1778 sbb r9,r15 1779 test r11,r11 1780 1781 cmovz r12,rax 1782 mov rax,QWORD[rsi] 1783 cmovz r13,rbp 1784 mov rbp,QWORD[8+rsi] 1785 cmovz r8,rcx 1786 mov rcx,QWORD[16+rsi] 1787 cmovz r9,r10 1788 mov r10,QWORD[24+rsi] 1789 1790 call __ecp_nistz256_subq 1791 1792 lea rbx,[160+rsp] 1793 lea rdi,[224+rsp] 1794 call __ecp_nistz256_sub_fromq 1795 1796 mov rax,QWORD[((0+0))+rsp] 1797 mov rbp,QWORD[((0+8))+rsp] 1798 mov rcx,QWORD[((0+16))+rsp] 1799 mov r10,QWORD[((0+24))+rsp] 1800 lea rdi,[64+rsp] 1801 1802 call __ecp_nistz256_subq 1803 1804 mov QWORD[rdi],r12 1805 mov QWORD[8+rdi],r13 1806 mov QWORD[16+rdi],r8 1807 mov QWORD[24+rdi],r9 1808 mov rax,QWORD[352+rsp] 1809 lea rbx,[352+rsp] 1810 mov r9,QWORD[((0+160))+rsp] 1811 mov r10,QWORD[((8+160))+rsp] 1812 lea rsi,[((0+160))+rsp] 1813 mov r11,QWORD[((16+160))+rsp] 1814 mov r12,QWORD[((24+160))+rsp] 1815 lea rdi,[32+rsp] 1816 call __ecp_nistz256_mul_montq 1817 1818 mov rax,QWORD[96+rsp] 1819 lea rbx,[96+rsp] 1820 mov r9,QWORD[((0+64))+rsp] 1821 mov r10,QWORD[((8+64))+rsp] 1822 lea rsi,[((0+64))+rsp] 1823 mov r11,QWORD[((16+64))+rsp] 1824 mov r12,QWORD[((24+64))+rsp] 1825 lea rdi,[64+rsp] 1826 call __ecp_nistz256_mul_montq 1827 1828 lea rbx,[32+rsp] 1829 lea rdi,[256+rsp] 1830 call __ecp_nistz256_sub_fromq 1831 1832DB 102,72,15,126,199 1833 1834 movdqa xmm0,xmm5 1835 movdqa xmm1,xmm5 1836 pandn xmm0,XMMWORD[288+rsp] 1837 movdqa xmm2,xmm5 1838 pandn xmm1,XMMWORD[((288+16))+rsp] 1839 movdqa xmm3,xmm5 1840 pand xmm2,XMMWORD[$L$ONE_mont] 1841 pand xmm3,XMMWORD[(($L$ONE_mont+16))] 1842 por xmm2,xmm0 1843 por xmm3,xmm1 1844 1845 movdqa xmm0,xmm4 1846 movdqa xmm1,xmm4 1847 pandn xmm0,xmm2 1848 movdqa xmm2,xmm4 1849 pandn xmm1,xmm3 1850 movdqa xmm3,xmm4 1851 pand xmm2,XMMWORD[384+rsp] 1852 pand xmm3,XMMWORD[((384+16))+rsp] 1853 por xmm2,xmm0 1854 por xmm3,xmm1 1855 movdqu XMMWORD[64+rdi],xmm2 1856 movdqu XMMWORD[80+rdi],xmm3 1857 1858 movdqa xmm0,xmm5 1859 movdqa xmm1,xmm5 1860 pandn xmm0,XMMWORD[224+rsp] 1861 movdqa xmm2,xmm5 1862 pandn xmm1,XMMWORD[((224+16))+rsp] 1863 movdqa xmm3,xmm5 1864 pand xmm2,XMMWORD[416+rsp] 1865 pand xmm3,XMMWORD[((416+16))+rsp] 1866 por xmm2,xmm0 1867 por xmm3,xmm1 1868 1869 movdqa xmm0,xmm4 1870 movdqa xmm1,xmm4 1871 pandn xmm0,xmm2 1872 movdqa xmm2,xmm4 1873 pandn xmm1,xmm3 1874 movdqa xmm3,xmm4 1875 pand xmm2,XMMWORD[320+rsp] 1876 pand xmm3,XMMWORD[((320+16))+rsp] 1877 por xmm2,xmm0 1878 por xmm3,xmm1 1879 movdqu XMMWORD[rdi],xmm2 1880 movdqu XMMWORD[16+rdi],xmm3 1881 1882 movdqa xmm0,xmm5 1883 movdqa xmm1,xmm5 1884 pandn xmm0,XMMWORD[256+rsp] 1885 movdqa xmm2,xmm5 1886 pandn xmm1,XMMWORD[((256+16))+rsp] 1887 movdqa xmm3,xmm5 1888 pand xmm2,XMMWORD[448+rsp] 1889 pand xmm3,XMMWORD[((448+16))+rsp] 1890 por xmm2,xmm0 1891 por xmm3,xmm1 1892 1893 movdqa xmm0,xmm4 1894 movdqa xmm1,xmm4 1895 pandn xmm0,xmm2 1896 movdqa xmm2,xmm4 1897 pandn xmm1,xmm3 1898 movdqa xmm3,xmm4 1899 pand xmm2,XMMWORD[352+rsp] 1900 pand xmm3,XMMWORD[((352+16))+rsp] 1901 por xmm2,xmm0 1902 por xmm3,xmm1 1903 movdqu XMMWORD[32+rdi],xmm2 1904 movdqu XMMWORD[48+rdi],xmm3 1905 1906 add rsp,32*15+8 1907 pop r15 1908 pop r14 1909 pop r13 1910 pop r12 1911 pop rbx 1912 pop rbp 1913 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1914 mov rsi,QWORD[16+rsp] 1915 DB 0F3h,0C3h ;repret 1916$L$SEH_end_ecp_nistz256_point_add_affine: 1917