p256-x86_64-asm.asm revision bb1ceac29bc7a18b94e3da78057dc41aa7071784
1default rel 2%define XMMWORD 3%define YMMWORD 4%define ZMMWORD 5section .text code align=64 6 7EXTERN OPENSSL_ia32cap_P 8 9 10ALIGN 64 11$L$poly: 12 DQ 0xffffffffffffffff,0x00000000ffffffff,0x0000000000000000,0xffffffff00000001 13 14$L$One: 15 DD 1,1,1,1,1,1,1,1 16$L$Two: 17 DD 2,2,2,2,2,2,2,2 18$L$Three: 19 DD 3,3,3,3,3,3,3,3 20$L$ONE_mont: 21 DQ 0x0000000000000001,0xffffffff00000000,0xffffffffffffffff,0x00000000fffffffe 22 23 24ALIGN 64 25ecp_nistz256_mul_by_2: 26 mov QWORD[8+rsp],rdi ;WIN64 prologue 27 mov QWORD[16+rsp],rsi 28 mov rax,rsp 29$L$SEH_begin_ecp_nistz256_mul_by_2: 30 mov rdi,rcx 31 mov rsi,rdx 32 33 34 push r12 35 push r13 36 37 mov r8,QWORD[rsi] 38 mov r9,QWORD[8+rsi] 39 add r8,r8 40 mov r10,QWORD[16+rsi] 41 adc r9,r9 42 mov r11,QWORD[24+rsi] 43 lea rsi,[$L$poly] 44 mov rax,r8 45 adc r10,r10 46 adc r11,r11 47 mov rdx,r9 48 sbb r13,r13 49 50 sub r8,QWORD[rsi] 51 mov rcx,r10 52 sbb r9,QWORD[8+rsi] 53 sbb r10,QWORD[16+rsi] 54 mov r12,r11 55 sbb r11,QWORD[24+rsi] 56 test r13,r13 57 58 cmovz r8,rax 59 cmovz r9,rdx 60 mov QWORD[rdi],r8 61 cmovz r10,rcx 62 mov QWORD[8+rdi],r9 63 cmovz r11,r12 64 mov QWORD[16+rdi],r10 65 mov QWORD[24+rdi],r11 66 67 pop r13 68 pop r12 69 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 70 mov rsi,QWORD[16+rsp] 71 DB 0F3h,0C3h ;repret 72$L$SEH_end_ecp_nistz256_mul_by_2: 73 74 75 76global ecp_nistz256_neg 77 78ALIGN 32 79ecp_nistz256_neg: 80 mov QWORD[8+rsp],rdi ;WIN64 prologue 81 mov QWORD[16+rsp],rsi 82 mov rax,rsp 83$L$SEH_begin_ecp_nistz256_neg: 84 mov rdi,rcx 85 mov rsi,rdx 86 87 88 push r12 89 push r13 90 91 xor r8,r8 92 xor r9,r9 93 xor r10,r10 94 xor r11,r11 95 xor r13,r13 96 97 sub r8,QWORD[rsi] 98 sbb r9,QWORD[8+rsi] 99 sbb r10,QWORD[16+rsi] 100 mov rax,r8 101 sbb r11,QWORD[24+rsi] 102 lea rsi,[$L$poly] 103 mov rdx,r9 104 sbb r13,0 105 106 add r8,QWORD[rsi] 107 mov rcx,r10 108 adc r9,QWORD[8+rsi] 109 adc r10,QWORD[16+rsi] 110 mov r12,r11 111 adc r11,QWORD[24+rsi] 112 test r13,r13 113 114 cmovz r8,rax 115 cmovz r9,rdx 116 mov QWORD[rdi],r8 117 cmovz r10,rcx 118 mov QWORD[8+rdi],r9 119 cmovz r11,r12 120 mov QWORD[16+rdi],r10 121 mov QWORD[24+rdi],r11 122 123 pop r13 124 pop r12 125 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 126 mov rsi,QWORD[16+rsp] 127 DB 0F3h,0C3h ;repret 128$L$SEH_end_ecp_nistz256_neg: 129 130 131 132 133 134 135global ecp_nistz256_mul_mont 136 137ALIGN 32 138ecp_nistz256_mul_mont: 139 mov QWORD[8+rsp],rdi ;WIN64 prologue 140 mov QWORD[16+rsp],rsi 141 mov rax,rsp 142$L$SEH_begin_ecp_nistz256_mul_mont: 143 mov rdi,rcx 144 mov rsi,rdx 145 mov rdx,r8 146 147 148$L$mul_mont: 149 push rbp 150 push rbx 151 push r12 152 push r13 153 push r14 154 push r15 155 mov rbx,rdx 156 mov rax,QWORD[rdx] 157 mov r9,QWORD[rsi] 158 mov r10,QWORD[8+rsi] 159 mov r11,QWORD[16+rsi] 160 mov r12,QWORD[24+rsi] 161 162 call __ecp_nistz256_mul_montq 163$L$mul_mont_done: 164 pop r15 165 pop r14 166 pop r13 167 pop r12 168 pop rbx 169 pop rbp 170 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 171 mov rsi,QWORD[16+rsp] 172 DB 0F3h,0C3h ;repret 173$L$SEH_end_ecp_nistz256_mul_mont: 174 175 176ALIGN 32 177__ecp_nistz256_mul_montq: 178 179 180 mov rbp,rax 181 mul r9 182 mov r14,QWORD[(($L$poly+8))] 183 mov r8,rax 184 mov rax,rbp 185 mov r9,rdx 186 187 mul r10 188 mov r15,QWORD[(($L$poly+24))] 189 add r9,rax 190 mov rax,rbp 191 adc rdx,0 192 mov r10,rdx 193 194 mul r11 195 add r10,rax 196 mov rax,rbp 197 adc rdx,0 198 mov r11,rdx 199 200 mul r12 201 add r11,rax 202 mov rax,r8 203 adc rdx,0 204 xor r13,r13 205 mov r12,rdx 206 207 208 209 210 211 212 213 214 215 216 mov rbp,r8 217 shl r8,32 218 mul r15 219 shr rbp,32 220 add r9,r8 221 adc r10,rbp 222 adc r11,rax 223 mov rax,QWORD[8+rbx] 224 adc r12,rdx 225 adc r13,0 226 xor r8,r8 227 228 229 230 mov rbp,rax 231 mul QWORD[rsi] 232 add r9,rax 233 mov rax,rbp 234 adc rdx,0 235 mov rcx,rdx 236 237 mul QWORD[8+rsi] 238 add r10,rcx 239 adc rdx,0 240 add r10,rax 241 mov rax,rbp 242 adc rdx,0 243 mov rcx,rdx 244 245 mul QWORD[16+rsi] 246 add r11,rcx 247 adc rdx,0 248 add r11,rax 249 mov rax,rbp 250 adc rdx,0 251 mov rcx,rdx 252 253 mul QWORD[24+rsi] 254 add r12,rcx 255 adc rdx,0 256 add r12,rax 257 mov rax,r9 258 adc r13,rdx 259 adc r8,0 260 261 262 263 mov rbp,r9 264 shl r9,32 265 mul r15 266 shr rbp,32 267 add r10,r9 268 adc r11,rbp 269 adc r12,rax 270 mov rax,QWORD[16+rbx] 271 adc r13,rdx 272 adc r8,0 273 xor r9,r9 274 275 276 277 mov rbp,rax 278 mul QWORD[rsi] 279 add r10,rax 280 mov rax,rbp 281 adc rdx,0 282 mov rcx,rdx 283 284 mul QWORD[8+rsi] 285 add r11,rcx 286 adc rdx,0 287 add r11,rax 288 mov rax,rbp 289 adc rdx,0 290 mov rcx,rdx 291 292 mul QWORD[16+rsi] 293 add r12,rcx 294 adc rdx,0 295 add r12,rax 296 mov rax,rbp 297 adc rdx,0 298 mov rcx,rdx 299 300 mul QWORD[24+rsi] 301 add r13,rcx 302 adc rdx,0 303 add r13,rax 304 mov rax,r10 305 adc r8,rdx 306 adc r9,0 307 308 309 310 mov rbp,r10 311 shl r10,32 312 mul r15 313 shr rbp,32 314 add r11,r10 315 adc r12,rbp 316 adc r13,rax 317 mov rax,QWORD[24+rbx] 318 adc r8,rdx 319 adc r9,0 320 xor r10,r10 321 322 323 324 mov rbp,rax 325 mul QWORD[rsi] 326 add r11,rax 327 mov rax,rbp 328 adc rdx,0 329 mov rcx,rdx 330 331 mul QWORD[8+rsi] 332 add r12,rcx 333 adc rdx,0 334 add r12,rax 335 mov rax,rbp 336 adc rdx,0 337 mov rcx,rdx 338 339 mul QWORD[16+rsi] 340 add r13,rcx 341 adc rdx,0 342 add r13,rax 343 mov rax,rbp 344 adc rdx,0 345 mov rcx,rdx 346 347 mul QWORD[24+rsi] 348 add r8,rcx 349 adc rdx,0 350 add r8,rax 351 mov rax,r11 352 adc r9,rdx 353 adc r10,0 354 355 356 357 mov rbp,r11 358 shl r11,32 359 mul r15 360 shr rbp,32 361 add r12,r11 362 adc r13,rbp 363 mov rcx,r12 364 adc r8,rax 365 adc r9,rdx 366 mov rbp,r13 367 adc r10,0 368 369 370 371 sub r12,-1 372 mov rbx,r8 373 sbb r13,r14 374 sbb r8,0 375 mov rdx,r9 376 sbb r9,r15 377 sbb r10,0 378 379 cmovc r12,rcx 380 cmovc r13,rbp 381 mov QWORD[rdi],r12 382 cmovc r8,rbx 383 mov QWORD[8+rdi],r13 384 cmovc r9,rdx 385 mov QWORD[16+rdi],r8 386 mov QWORD[24+rdi],r9 387 388 DB 0F3h,0C3h ;repret 389 390 391 392 393 394 395 396 397 398global ecp_nistz256_sqr_mont 399 400ALIGN 32 401ecp_nistz256_sqr_mont: 402 mov QWORD[8+rsp],rdi ;WIN64 prologue 403 mov QWORD[16+rsp],rsi 404 mov rax,rsp 405$L$SEH_begin_ecp_nistz256_sqr_mont: 406 mov rdi,rcx 407 mov rsi,rdx 408 409 410 push rbp 411 push rbx 412 push r12 413 push r13 414 push r14 415 push r15 416 mov rax,QWORD[rsi] 417 mov r14,QWORD[8+rsi] 418 mov r15,QWORD[16+rsi] 419 mov r8,QWORD[24+rsi] 420 421 call __ecp_nistz256_sqr_montq 422$L$sqr_mont_done: 423 pop r15 424 pop r14 425 pop r13 426 pop r12 427 pop rbx 428 pop rbp 429 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 430 mov rsi,QWORD[16+rsp] 431 DB 0F3h,0C3h ;repret 432$L$SEH_end_ecp_nistz256_sqr_mont: 433 434 435ALIGN 32 436__ecp_nistz256_sqr_montq: 437 mov r13,rax 438 mul r14 439 mov r9,rax 440 mov rax,r15 441 mov r10,rdx 442 443 mul r13 444 add r10,rax 445 mov rax,r8 446 adc rdx,0 447 mov r11,rdx 448 449 mul r13 450 add r11,rax 451 mov rax,r15 452 adc rdx,0 453 mov r12,rdx 454 455 456 mul r14 457 add r11,rax 458 mov rax,r8 459 adc rdx,0 460 mov rbp,rdx 461 462 mul r14 463 add r12,rax 464 mov rax,r8 465 adc rdx,0 466 add r12,rbp 467 mov r13,rdx 468 adc r13,0 469 470 471 mul r15 472 xor r15,r15 473 add r13,rax 474 mov rax,QWORD[rsi] 475 mov r14,rdx 476 adc r14,0 477 478 add r9,r9 479 adc r10,r10 480 adc r11,r11 481 adc r12,r12 482 adc r13,r13 483 adc r14,r14 484 adc r15,0 485 486 mul rax 487 mov r8,rax 488 mov rax,QWORD[8+rsi] 489 mov rcx,rdx 490 491 mul rax 492 add r9,rcx 493 adc r10,rax 494 mov rax,QWORD[16+rsi] 495 adc rdx,0 496 mov rcx,rdx 497 498 mul rax 499 add r11,rcx 500 adc r12,rax 501 mov rax,QWORD[24+rsi] 502 adc rdx,0 503 mov rcx,rdx 504 505 mul rax 506 add r13,rcx 507 adc r14,rax 508 mov rax,r8 509 adc r15,rdx 510 511 mov rsi,QWORD[(($L$poly+8))] 512 mov rbp,QWORD[(($L$poly+24))] 513 514 515 516 517 mov rcx,r8 518 shl r8,32 519 mul rbp 520 shr rcx,32 521 add r9,r8 522 adc r10,rcx 523 adc r11,rax 524 mov rax,r9 525 adc rdx,0 526 527 528 529 mov rcx,r9 530 shl r9,32 531 mov r8,rdx 532 mul rbp 533 shr rcx,32 534 add r10,r9 535 adc r11,rcx 536 adc r8,rax 537 mov rax,r10 538 adc rdx,0 539 540 541 542 mov rcx,r10 543 shl r10,32 544 mov r9,rdx 545 mul rbp 546 shr rcx,32 547 add r11,r10 548 adc r8,rcx 549 adc r9,rax 550 mov rax,r11 551 adc rdx,0 552 553 554 555 mov rcx,r11 556 shl r11,32 557 mov r10,rdx 558 mul rbp 559 shr rcx,32 560 add r8,r11 561 adc r9,rcx 562 adc r10,rax 563 adc rdx,0 564 xor r11,r11 565 566 567 568 add r12,r8 569 adc r13,r9 570 mov r8,r12 571 adc r14,r10 572 adc r15,rdx 573 mov r9,r13 574 adc r11,0 575 576 sub r12,-1 577 mov r10,r14 578 sbb r13,rsi 579 sbb r14,0 580 mov rcx,r15 581 sbb r15,rbp 582 sbb r11,0 583 584 cmovc r12,r8 585 cmovc r13,r9 586 mov QWORD[rdi],r12 587 cmovc r14,r10 588 mov QWORD[8+rdi],r13 589 cmovc r15,rcx 590 mov QWORD[16+rdi],r14 591 mov QWORD[24+rdi],r15 592 593 DB 0F3h,0C3h ;repret 594 595 596 597 598 599 600 601global ecp_nistz256_from_mont 602 603ALIGN 32 604ecp_nistz256_from_mont: 605 mov QWORD[8+rsp],rdi ;WIN64 prologue 606 mov QWORD[16+rsp],rsi 607 mov rax,rsp 608$L$SEH_begin_ecp_nistz256_from_mont: 609 mov rdi,rcx 610 mov rsi,rdx 611 612 613 push r12 614 push r13 615 616 mov rax,QWORD[rsi] 617 mov r13,QWORD[(($L$poly+24))] 618 mov r9,QWORD[8+rsi] 619 mov r10,QWORD[16+rsi] 620 mov r11,QWORD[24+rsi] 621 mov r8,rax 622 mov r12,QWORD[(($L$poly+8))] 623 624 625 626 mov rcx,rax 627 shl r8,32 628 mul r13 629 shr rcx,32 630 add r9,r8 631 adc r10,rcx 632 adc r11,rax 633 mov rax,r9 634 adc rdx,0 635 636 637 638 mov rcx,r9 639 shl r9,32 640 mov r8,rdx 641 mul r13 642 shr rcx,32 643 add r10,r9 644 adc r11,rcx 645 adc r8,rax 646 mov rax,r10 647 adc rdx,0 648 649 650 651 mov rcx,r10 652 shl r10,32 653 mov r9,rdx 654 mul r13 655 shr rcx,32 656 add r11,r10 657 adc r8,rcx 658 adc r9,rax 659 mov rax,r11 660 adc rdx,0 661 662 663 664 mov rcx,r11 665 shl r11,32 666 mov r10,rdx 667 mul r13 668 shr rcx,32 669 add r8,r11 670 adc r9,rcx 671 mov rcx,r8 672 adc r10,rax 673 mov rsi,r9 674 adc rdx,0 675 676 677 678 sub r8,-1 679 mov rax,r10 680 sbb r9,r12 681 sbb r10,0 682 mov r11,rdx 683 sbb rdx,r13 684 sbb r13,r13 685 686 cmovnz r8,rcx 687 cmovnz r9,rsi 688 mov QWORD[rdi],r8 689 cmovnz r10,rax 690 mov QWORD[8+rdi],r9 691 cmovz r11,rdx 692 mov QWORD[16+rdi],r10 693 mov QWORD[24+rdi],r11 694 695 pop r13 696 pop r12 697 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 698 mov rsi,QWORD[16+rsp] 699 DB 0F3h,0C3h ;repret 700$L$SEH_end_ecp_nistz256_from_mont: 701 702 703global ecp_nistz256_select_w5 704 705ALIGN 32 706ecp_nistz256_select_w5: 707 lea rax,[((-136))+rsp] 708$L$SEH_begin_ecp_nistz256_select_w5: 709DB 0x48,0x8d,0x60,0xe0 710DB 0x0f,0x29,0x70,0xe0 711DB 0x0f,0x29,0x78,0xf0 712DB 0x44,0x0f,0x29,0x00 713DB 0x44,0x0f,0x29,0x48,0x10 714DB 0x44,0x0f,0x29,0x50,0x20 715DB 0x44,0x0f,0x29,0x58,0x30 716DB 0x44,0x0f,0x29,0x60,0x40 717DB 0x44,0x0f,0x29,0x68,0x50 718DB 0x44,0x0f,0x29,0x70,0x60 719DB 0x44,0x0f,0x29,0x78,0x70 720 movdqa xmm0,XMMWORD[$L$One] 721 movd xmm1,r8d 722 723 pxor xmm2,xmm2 724 pxor xmm3,xmm3 725 pxor xmm4,xmm4 726 pxor xmm5,xmm5 727 pxor xmm6,xmm6 728 pxor xmm7,xmm7 729 730 movdqa xmm8,xmm0 731 pshufd xmm1,xmm1,0 732 733 mov rax,16 734$L$select_loop_sse_w5: 735 736 movdqa xmm15,xmm8 737 paddd xmm8,xmm0 738 pcmpeqd xmm15,xmm1 739 740 movdqa xmm9,XMMWORD[rdx] 741 movdqa xmm10,XMMWORD[16+rdx] 742 movdqa xmm11,XMMWORD[32+rdx] 743 movdqa xmm12,XMMWORD[48+rdx] 744 movdqa xmm13,XMMWORD[64+rdx] 745 movdqa xmm14,XMMWORD[80+rdx] 746 lea rdx,[96+rdx] 747 748 pand xmm9,xmm15 749 pand xmm10,xmm15 750 por xmm2,xmm9 751 pand xmm11,xmm15 752 por xmm3,xmm10 753 pand xmm12,xmm15 754 por xmm4,xmm11 755 pand xmm13,xmm15 756 por xmm5,xmm12 757 pand xmm14,xmm15 758 por xmm6,xmm13 759 por xmm7,xmm14 760 761 dec rax 762 jnz NEAR $L$select_loop_sse_w5 763 764 movdqu XMMWORD[rcx],xmm2 765 movdqu XMMWORD[16+rcx],xmm3 766 movdqu XMMWORD[32+rcx],xmm4 767 movdqu XMMWORD[48+rcx],xmm5 768 movdqu XMMWORD[64+rcx],xmm6 769 movdqu XMMWORD[80+rcx],xmm7 770 movaps xmm6,XMMWORD[rsp] 771 movaps xmm7,XMMWORD[16+rsp] 772 movaps xmm8,XMMWORD[32+rsp] 773 movaps xmm9,XMMWORD[48+rsp] 774 movaps xmm10,XMMWORD[64+rsp] 775 movaps xmm11,XMMWORD[80+rsp] 776 movaps xmm12,XMMWORD[96+rsp] 777 movaps xmm13,XMMWORD[112+rsp] 778 movaps xmm14,XMMWORD[128+rsp] 779 movaps xmm15,XMMWORD[144+rsp] 780 lea rsp,[168+rsp] 781$L$SEH_end_ecp_nistz256_select_w5: 782 DB 0F3h,0C3h ;repret 783 784 785 786 787global ecp_nistz256_select_w7 788 789ALIGN 32 790ecp_nistz256_select_w7: 791 lea rax,[((-136))+rsp] 792$L$SEH_begin_ecp_nistz256_select_w7: 793DB 0x48,0x8d,0x60,0xe0 794DB 0x0f,0x29,0x70,0xe0 795DB 0x0f,0x29,0x78,0xf0 796DB 0x44,0x0f,0x29,0x00 797DB 0x44,0x0f,0x29,0x48,0x10 798DB 0x44,0x0f,0x29,0x50,0x20 799DB 0x44,0x0f,0x29,0x58,0x30 800DB 0x44,0x0f,0x29,0x60,0x40 801DB 0x44,0x0f,0x29,0x68,0x50 802DB 0x44,0x0f,0x29,0x70,0x60 803DB 0x44,0x0f,0x29,0x78,0x70 804 movdqa xmm8,XMMWORD[$L$One] 805 movd xmm1,r8d 806 807 pxor xmm2,xmm2 808 pxor xmm3,xmm3 809 pxor xmm4,xmm4 810 pxor xmm5,xmm5 811 812 movdqa xmm0,xmm8 813 pshufd xmm1,xmm1,0 814 mov rax,64 815 816$L$select_loop_sse_w7: 817 movdqa xmm15,xmm8 818 paddd xmm8,xmm0 819 movdqa xmm9,XMMWORD[rdx] 820 movdqa xmm10,XMMWORD[16+rdx] 821 pcmpeqd xmm15,xmm1 822 movdqa xmm11,XMMWORD[32+rdx] 823 movdqa xmm12,XMMWORD[48+rdx] 824 lea rdx,[64+rdx] 825 826 pand xmm9,xmm15 827 pand xmm10,xmm15 828 por xmm2,xmm9 829 pand xmm11,xmm15 830 por xmm3,xmm10 831 pand xmm12,xmm15 832 por xmm4,xmm11 833 prefetcht0 [255+rdx] 834 por xmm5,xmm12 835 836 dec rax 837 jnz NEAR $L$select_loop_sse_w7 838 839 movdqu XMMWORD[rcx],xmm2 840 movdqu XMMWORD[16+rcx],xmm3 841 movdqu XMMWORD[32+rcx],xmm4 842 movdqu XMMWORD[48+rcx],xmm5 843 movaps xmm6,XMMWORD[rsp] 844 movaps xmm7,XMMWORD[16+rsp] 845 movaps xmm8,XMMWORD[32+rsp] 846 movaps xmm9,XMMWORD[48+rsp] 847 movaps xmm10,XMMWORD[64+rsp] 848 movaps xmm11,XMMWORD[80+rsp] 849 movaps xmm12,XMMWORD[96+rsp] 850 movaps xmm13,XMMWORD[112+rsp] 851 movaps xmm14,XMMWORD[128+rsp] 852 movaps xmm15,XMMWORD[144+rsp] 853 lea rsp,[168+rsp] 854$L$SEH_end_ecp_nistz256_select_w7: 855 DB 0F3h,0C3h ;repret 856 857global ecp_nistz256_avx2_select_w7 858 859ALIGN 32 860ecp_nistz256_avx2_select_w7: 861 mov QWORD[8+rsp],rdi ;WIN64 prologue 862 mov QWORD[16+rsp],rsi 863 mov rax,rsp 864$L$SEH_begin_ecp_nistz256_avx2_select_w7: 865 mov rdi,rcx 866 mov rsi,rdx 867 mov rdx,r8 868 869 870DB 0x0f,0x0b 871 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 872 mov rsi,QWORD[16+rsp] 873 DB 0F3h,0C3h ;repret 874$L$SEH_end_ecp_nistz256_avx2_select_w7: 875 876ALIGN 32 877__ecp_nistz256_add_toq: 878 add r12,QWORD[rbx] 879 adc r13,QWORD[8+rbx] 880 mov rax,r12 881 adc r8,QWORD[16+rbx] 882 adc r9,QWORD[24+rbx] 883 mov rbp,r13 884 sbb r11,r11 885 886 sub r12,-1 887 mov rcx,r8 888 sbb r13,r14 889 sbb r8,0 890 mov r10,r9 891 sbb r9,r15 892 test r11,r11 893 894 cmovz r12,rax 895 cmovz r13,rbp 896 mov QWORD[rdi],r12 897 cmovz r8,rcx 898 mov QWORD[8+rdi],r13 899 cmovz r9,r10 900 mov QWORD[16+rdi],r8 901 mov QWORD[24+rdi],r9 902 903 DB 0F3h,0C3h ;repret 904 905 906 907ALIGN 32 908__ecp_nistz256_sub_fromq: 909 sub r12,QWORD[rbx] 910 sbb r13,QWORD[8+rbx] 911 mov rax,r12 912 sbb r8,QWORD[16+rbx] 913 sbb r9,QWORD[24+rbx] 914 mov rbp,r13 915 sbb r11,r11 916 917 add r12,-1 918 mov rcx,r8 919 adc r13,r14 920 adc r8,0 921 mov r10,r9 922 adc r9,r15 923 test r11,r11 924 925 cmovz r12,rax 926 cmovz r13,rbp 927 mov QWORD[rdi],r12 928 cmovz r8,rcx 929 mov QWORD[8+rdi],r13 930 cmovz r9,r10 931 mov QWORD[16+rdi],r8 932 mov QWORD[24+rdi],r9 933 934 DB 0F3h,0C3h ;repret 935 936 937 938ALIGN 32 939__ecp_nistz256_subq: 940 sub rax,r12 941 sbb rbp,r13 942 mov r12,rax 943 sbb rcx,r8 944 sbb r10,r9 945 mov r13,rbp 946 sbb r11,r11 947 948 add rax,-1 949 mov r8,rcx 950 adc rbp,r14 951 adc rcx,0 952 mov r9,r10 953 adc r10,r15 954 test r11,r11 955 956 cmovnz r12,rax 957 cmovnz r13,rbp 958 cmovnz r8,rcx 959 cmovnz r9,r10 960 961 DB 0F3h,0C3h ;repret 962 963 964 965ALIGN 32 966__ecp_nistz256_mul_by_2q: 967 add r12,r12 968 adc r13,r13 969 mov rax,r12 970 adc r8,r8 971 adc r9,r9 972 mov rbp,r13 973 sbb r11,r11 974 975 sub r12,-1 976 mov rcx,r8 977 sbb r13,r14 978 sbb r8,0 979 mov r10,r9 980 sbb r9,r15 981 test r11,r11 982 983 cmovz r12,rax 984 cmovz r13,rbp 985 mov QWORD[rdi],r12 986 cmovz r8,rcx 987 mov QWORD[8+rdi],r13 988 cmovz r9,r10 989 mov QWORD[16+rdi],r8 990 mov QWORD[24+rdi],r9 991 992 DB 0F3h,0C3h ;repret 993 994global ecp_nistz256_point_double 995 996ALIGN 32 997ecp_nistz256_point_double: 998 mov QWORD[8+rsp],rdi ;WIN64 prologue 999 mov QWORD[16+rsp],rsi 1000 mov rax,rsp 1001$L$SEH_begin_ecp_nistz256_point_double: 1002 mov rdi,rcx 1003 mov rsi,rdx 1004 1005 1006 push rbp 1007 push rbx 1008 push r12 1009 push r13 1010 push r14 1011 push r15 1012 sub rsp,32*5+8 1013 1014$L$point_double_shortcutq: 1015 movdqu xmm0,XMMWORD[rsi] 1016 mov rbx,rsi 1017 movdqu xmm1,XMMWORD[16+rsi] 1018 mov r12,QWORD[((32+0))+rsi] 1019 mov r13,QWORD[((32+8))+rsi] 1020 mov r8,QWORD[((32+16))+rsi] 1021 mov r9,QWORD[((32+24))+rsi] 1022 mov r14,QWORD[(($L$poly+8))] 1023 mov r15,QWORD[(($L$poly+24))] 1024 movdqa XMMWORD[96+rsp],xmm0 1025 movdqa XMMWORD[(96+16)+rsp],xmm1 1026 lea r10,[32+rdi] 1027 lea r11,[64+rdi] 1028DB 102,72,15,110,199 1029DB 102,73,15,110,202 1030DB 102,73,15,110,211 1031 1032 lea rdi,[rsp] 1033 call __ecp_nistz256_mul_by_2q 1034 1035 mov rax,QWORD[((64+0))+rsi] 1036 mov r14,QWORD[((64+8))+rsi] 1037 mov r15,QWORD[((64+16))+rsi] 1038 mov r8,QWORD[((64+24))+rsi] 1039 lea rsi,[((64-0))+rsi] 1040 lea rdi,[64+rsp] 1041 call __ecp_nistz256_sqr_montq 1042 1043 mov rax,QWORD[((0+0))+rsp] 1044 mov r14,QWORD[((8+0))+rsp] 1045 lea rsi,[((0+0))+rsp] 1046 mov r15,QWORD[((16+0))+rsp] 1047 mov r8,QWORD[((24+0))+rsp] 1048 lea rdi,[rsp] 1049 call __ecp_nistz256_sqr_montq 1050 1051 mov rax,QWORD[32+rbx] 1052 mov r9,QWORD[((64+0))+rbx] 1053 mov r10,QWORD[((64+8))+rbx] 1054 mov r11,QWORD[((64+16))+rbx] 1055 mov r12,QWORD[((64+24))+rbx] 1056 lea rsi,[((64-0))+rbx] 1057 lea rbx,[32+rbx] 1058DB 102,72,15,126,215 1059 call __ecp_nistz256_mul_montq 1060 call __ecp_nistz256_mul_by_2q 1061 1062 mov r12,QWORD[((96+0))+rsp] 1063 mov r13,QWORD[((96+8))+rsp] 1064 lea rbx,[64+rsp] 1065 mov r8,QWORD[((96+16))+rsp] 1066 mov r9,QWORD[((96+24))+rsp] 1067 lea rdi,[32+rsp] 1068 call __ecp_nistz256_add_toq 1069 1070 mov r12,QWORD[((96+0))+rsp] 1071 mov r13,QWORD[((96+8))+rsp] 1072 lea rbx,[64+rsp] 1073 mov r8,QWORD[((96+16))+rsp] 1074 mov r9,QWORD[((96+24))+rsp] 1075 lea rdi,[64+rsp] 1076 call __ecp_nistz256_sub_fromq 1077 1078 mov rax,QWORD[((0+0))+rsp] 1079 mov r14,QWORD[((8+0))+rsp] 1080 lea rsi,[((0+0))+rsp] 1081 mov r15,QWORD[((16+0))+rsp] 1082 mov r8,QWORD[((24+0))+rsp] 1083DB 102,72,15,126,207 1084 call __ecp_nistz256_sqr_montq 1085 xor r9,r9 1086 mov rax,r12 1087 add r12,-1 1088 mov r10,r13 1089 adc r13,rsi 1090 mov rcx,r14 1091 adc r14,0 1092 mov r8,r15 1093 adc r15,rbp 1094 adc r9,0 1095 xor rsi,rsi 1096 test rax,1 1097 1098 cmovz r12,rax 1099 cmovz r13,r10 1100 cmovz r14,rcx 1101 cmovz r15,r8 1102 cmovz r9,rsi 1103 1104 mov rax,r13 1105 shr r12,1 1106 shl rax,63 1107 mov r10,r14 1108 shr r13,1 1109 or r12,rax 1110 shl r10,63 1111 mov rcx,r15 1112 shr r14,1 1113 or r13,r10 1114 shl rcx,63 1115 mov QWORD[rdi],r12 1116 shr r15,1 1117 mov QWORD[8+rdi],r13 1118 shl r9,63 1119 or r14,rcx 1120 or r15,r9 1121 mov QWORD[16+rdi],r14 1122 mov QWORD[24+rdi],r15 1123 mov rax,QWORD[64+rsp] 1124 lea rbx,[64+rsp] 1125 mov r9,QWORD[((0+32))+rsp] 1126 mov r10,QWORD[((8+32))+rsp] 1127 lea rsi,[((0+32))+rsp] 1128 mov r11,QWORD[((16+32))+rsp] 1129 mov r12,QWORD[((24+32))+rsp] 1130 lea rdi,[32+rsp] 1131 call __ecp_nistz256_mul_montq 1132 1133 lea rdi,[128+rsp] 1134 call __ecp_nistz256_mul_by_2q 1135 1136 lea rbx,[32+rsp] 1137 lea rdi,[32+rsp] 1138 call __ecp_nistz256_add_toq 1139 1140 mov rax,QWORD[96+rsp] 1141 lea rbx,[96+rsp] 1142 mov r9,QWORD[((0+0))+rsp] 1143 mov r10,QWORD[((8+0))+rsp] 1144 lea rsi,[((0+0))+rsp] 1145 mov r11,QWORD[((16+0))+rsp] 1146 mov r12,QWORD[((24+0))+rsp] 1147 lea rdi,[rsp] 1148 call __ecp_nistz256_mul_montq 1149 1150 lea rdi,[128+rsp] 1151 call __ecp_nistz256_mul_by_2q 1152 1153 mov rax,QWORD[((0+32))+rsp] 1154 mov r14,QWORD[((8+32))+rsp] 1155 lea rsi,[((0+32))+rsp] 1156 mov r15,QWORD[((16+32))+rsp] 1157 mov r8,QWORD[((24+32))+rsp] 1158DB 102,72,15,126,199 1159 call __ecp_nistz256_sqr_montq 1160 1161 lea rbx,[128+rsp] 1162 mov r8,r14 1163 mov r9,r15 1164 mov r14,rsi 1165 mov r15,rbp 1166 call __ecp_nistz256_sub_fromq 1167 1168 mov rax,QWORD[((0+0))+rsp] 1169 mov rbp,QWORD[((0+8))+rsp] 1170 mov rcx,QWORD[((0+16))+rsp] 1171 mov r10,QWORD[((0+24))+rsp] 1172 lea rdi,[rsp] 1173 call __ecp_nistz256_subq 1174 1175 mov rax,QWORD[32+rsp] 1176 lea rbx,[32+rsp] 1177 mov r14,r12 1178 xor ecx,ecx 1179 mov QWORD[((0+0))+rsp],r12 1180 mov r10,r13 1181 mov QWORD[((0+8))+rsp],r13 1182 cmovz r11,r8 1183 mov QWORD[((0+16))+rsp],r8 1184 lea rsi,[((0-0))+rsp] 1185 cmovz r12,r9 1186 mov QWORD[((0+24))+rsp],r9 1187 mov r9,r14 1188 lea rdi,[rsp] 1189 call __ecp_nistz256_mul_montq 1190 1191DB 102,72,15,126,203 1192DB 102,72,15,126,207 1193 call __ecp_nistz256_sub_fromq 1194 1195 add rsp,32*5+8 1196 pop r15 1197 pop r14 1198 pop r13 1199 pop r12 1200 pop rbx 1201 pop rbp 1202 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1203 mov rsi,QWORD[16+rsp] 1204 DB 0F3h,0C3h ;repret 1205$L$SEH_end_ecp_nistz256_point_double: 1206global ecp_nistz256_point_add 1207 1208ALIGN 32 1209ecp_nistz256_point_add: 1210 mov QWORD[8+rsp],rdi ;WIN64 prologue 1211 mov QWORD[16+rsp],rsi 1212 mov rax,rsp 1213$L$SEH_begin_ecp_nistz256_point_add: 1214 mov rdi,rcx 1215 mov rsi,rdx 1216 mov rdx,r8 1217 1218 1219 push rbp 1220 push rbx 1221 push r12 1222 push r13 1223 push r14 1224 push r15 1225 sub rsp,32*18+8 1226 1227 movdqu xmm0,XMMWORD[rsi] 1228 movdqu xmm1,XMMWORD[16+rsi] 1229 movdqu xmm2,XMMWORD[32+rsi] 1230 movdqu xmm3,XMMWORD[48+rsi] 1231 movdqu xmm4,XMMWORD[64+rsi] 1232 movdqu xmm5,XMMWORD[80+rsi] 1233 mov rbx,rsi 1234 mov rsi,rdx 1235 movdqa XMMWORD[384+rsp],xmm0 1236 movdqa XMMWORD[(384+16)+rsp],xmm1 1237 por xmm1,xmm0 1238 movdqa XMMWORD[416+rsp],xmm2 1239 movdqa XMMWORD[(416+16)+rsp],xmm3 1240 por xmm3,xmm2 1241 movdqa XMMWORD[448+rsp],xmm4 1242 movdqa XMMWORD[(448+16)+rsp],xmm5 1243 por xmm3,xmm1 1244 1245 movdqu xmm0,XMMWORD[rsi] 1246 pshufd xmm5,xmm3,0xb1 1247 movdqu xmm1,XMMWORD[16+rsi] 1248 movdqu xmm2,XMMWORD[32+rsi] 1249 por xmm5,xmm3 1250 movdqu xmm3,XMMWORD[48+rsi] 1251 mov rax,QWORD[((64+0))+rsi] 1252 mov r14,QWORD[((64+8))+rsi] 1253 mov r15,QWORD[((64+16))+rsi] 1254 mov r8,QWORD[((64+24))+rsi] 1255 movdqa XMMWORD[480+rsp],xmm0 1256 pshufd xmm4,xmm5,0x1e 1257 movdqa XMMWORD[(480+16)+rsp],xmm1 1258 por xmm1,xmm0 1259DB 102,72,15,110,199 1260 movdqa XMMWORD[512+rsp],xmm2 1261 movdqa XMMWORD[(512+16)+rsp],xmm3 1262 por xmm3,xmm2 1263 por xmm5,xmm4 1264 pxor xmm4,xmm4 1265 por xmm3,xmm1 1266 1267 lea rsi,[((64-0))+rsi] 1268 mov QWORD[((544+0))+rsp],rax 1269 mov QWORD[((544+8))+rsp],r14 1270 mov QWORD[((544+16))+rsp],r15 1271 mov QWORD[((544+24))+rsp],r8 1272 lea rdi,[96+rsp] 1273 call __ecp_nistz256_sqr_montq 1274 1275 pcmpeqd xmm5,xmm4 1276 pshufd xmm4,xmm3,0xb1 1277 por xmm4,xmm3 1278 pshufd xmm5,xmm5,0 1279 pshufd xmm3,xmm4,0x1e 1280 por xmm4,xmm3 1281 pxor xmm3,xmm3 1282 pcmpeqd xmm4,xmm3 1283 pshufd xmm4,xmm4,0 1284 mov rax,QWORD[((64+0))+rbx] 1285 mov r14,QWORD[((64+8))+rbx] 1286 mov r15,QWORD[((64+16))+rbx] 1287 mov r8,QWORD[((64+24))+rbx] 1288DB 102,72,15,110,203 1289 1290 lea rsi,[((64-0))+rbx] 1291 lea rdi,[32+rsp] 1292 call __ecp_nistz256_sqr_montq 1293 1294 mov rax,QWORD[544+rsp] 1295 lea rbx,[544+rsp] 1296 mov r9,QWORD[((0+96))+rsp] 1297 mov r10,QWORD[((8+96))+rsp] 1298 lea rsi,[((0+96))+rsp] 1299 mov r11,QWORD[((16+96))+rsp] 1300 mov r12,QWORD[((24+96))+rsp] 1301 lea rdi,[224+rsp] 1302 call __ecp_nistz256_mul_montq 1303 1304 mov rax,QWORD[448+rsp] 1305 lea rbx,[448+rsp] 1306 mov r9,QWORD[((0+32))+rsp] 1307 mov r10,QWORD[((8+32))+rsp] 1308 lea rsi,[((0+32))+rsp] 1309 mov r11,QWORD[((16+32))+rsp] 1310 mov r12,QWORD[((24+32))+rsp] 1311 lea rdi,[256+rsp] 1312 call __ecp_nistz256_mul_montq 1313 1314 mov rax,QWORD[416+rsp] 1315 lea rbx,[416+rsp] 1316 mov r9,QWORD[((0+224))+rsp] 1317 mov r10,QWORD[((8+224))+rsp] 1318 lea rsi,[((0+224))+rsp] 1319 mov r11,QWORD[((16+224))+rsp] 1320 mov r12,QWORD[((24+224))+rsp] 1321 lea rdi,[224+rsp] 1322 call __ecp_nistz256_mul_montq 1323 1324 mov rax,QWORD[512+rsp] 1325 lea rbx,[512+rsp] 1326 mov r9,QWORD[((0+256))+rsp] 1327 mov r10,QWORD[((8+256))+rsp] 1328 lea rsi,[((0+256))+rsp] 1329 mov r11,QWORD[((16+256))+rsp] 1330 mov r12,QWORD[((24+256))+rsp] 1331 lea rdi,[256+rsp] 1332 call __ecp_nistz256_mul_montq 1333 1334 lea rbx,[224+rsp] 1335 lea rdi,[64+rsp] 1336 call __ecp_nistz256_sub_fromq 1337 1338 or r12,r13 1339 movdqa xmm2,xmm4 1340 or r12,r8 1341 or r12,r9 1342 por xmm2,xmm5 1343DB 102,73,15,110,220 1344 1345 mov rax,QWORD[384+rsp] 1346 lea rbx,[384+rsp] 1347 mov r9,QWORD[((0+96))+rsp] 1348 mov r10,QWORD[((8+96))+rsp] 1349 lea rsi,[((0+96))+rsp] 1350 mov r11,QWORD[((16+96))+rsp] 1351 mov r12,QWORD[((24+96))+rsp] 1352 lea rdi,[160+rsp] 1353 call __ecp_nistz256_mul_montq 1354 1355 mov rax,QWORD[480+rsp] 1356 lea rbx,[480+rsp] 1357 mov r9,QWORD[((0+32))+rsp] 1358 mov r10,QWORD[((8+32))+rsp] 1359 lea rsi,[((0+32))+rsp] 1360 mov r11,QWORD[((16+32))+rsp] 1361 mov r12,QWORD[((24+32))+rsp] 1362 lea rdi,[192+rsp] 1363 call __ecp_nistz256_mul_montq 1364 1365 lea rbx,[160+rsp] 1366 lea rdi,[rsp] 1367 call __ecp_nistz256_sub_fromq 1368 1369 or r12,r13 1370 or r12,r8 1371 or r12,r9 1372 1373DB 0x3e 1374 jnz NEAR $L$add_proceedq 1375DB 102,73,15,126,208 1376DB 102,73,15,126,217 1377 test r8,r8 1378 jnz NEAR $L$add_proceedq 1379 test r9,r9 1380 jz NEAR $L$add_doubleq 1381 1382DB 102,72,15,126,199 1383 pxor xmm0,xmm0 1384 movdqu XMMWORD[rdi],xmm0 1385 movdqu XMMWORD[16+rdi],xmm0 1386 movdqu XMMWORD[32+rdi],xmm0 1387 movdqu XMMWORD[48+rdi],xmm0 1388 movdqu XMMWORD[64+rdi],xmm0 1389 movdqu XMMWORD[80+rdi],xmm0 1390 jmp NEAR $L$add_doneq 1391 1392ALIGN 32 1393$L$add_doubleq: 1394DB 102,72,15,126,206 1395DB 102,72,15,126,199 1396 add rsp,416 1397 jmp NEAR $L$point_double_shortcutq 1398 1399ALIGN 32 1400$L$add_proceedq: 1401 mov rax,QWORD[((0+64))+rsp] 1402 mov r14,QWORD[((8+64))+rsp] 1403 lea rsi,[((0+64))+rsp] 1404 mov r15,QWORD[((16+64))+rsp] 1405 mov r8,QWORD[((24+64))+rsp] 1406 lea rdi,[96+rsp] 1407 call __ecp_nistz256_sqr_montq 1408 1409 mov rax,QWORD[448+rsp] 1410 lea rbx,[448+rsp] 1411 mov r9,QWORD[((0+0))+rsp] 1412 mov r10,QWORD[((8+0))+rsp] 1413 lea rsi,[((0+0))+rsp] 1414 mov r11,QWORD[((16+0))+rsp] 1415 mov r12,QWORD[((24+0))+rsp] 1416 lea rdi,[352+rsp] 1417 call __ecp_nistz256_mul_montq 1418 1419 mov rax,QWORD[((0+0))+rsp] 1420 mov r14,QWORD[((8+0))+rsp] 1421 lea rsi,[((0+0))+rsp] 1422 mov r15,QWORD[((16+0))+rsp] 1423 mov r8,QWORD[((24+0))+rsp] 1424 lea rdi,[32+rsp] 1425 call __ecp_nistz256_sqr_montq 1426 1427 mov rax,QWORD[544+rsp] 1428 lea rbx,[544+rsp] 1429 mov r9,QWORD[((0+352))+rsp] 1430 mov r10,QWORD[((8+352))+rsp] 1431 lea rsi,[((0+352))+rsp] 1432 mov r11,QWORD[((16+352))+rsp] 1433 mov r12,QWORD[((24+352))+rsp] 1434 lea rdi,[352+rsp] 1435 call __ecp_nistz256_mul_montq 1436 1437 mov rax,QWORD[rsp] 1438 lea rbx,[rsp] 1439 mov r9,QWORD[((0+32))+rsp] 1440 mov r10,QWORD[((8+32))+rsp] 1441 lea rsi,[((0+32))+rsp] 1442 mov r11,QWORD[((16+32))+rsp] 1443 mov r12,QWORD[((24+32))+rsp] 1444 lea rdi,[128+rsp] 1445 call __ecp_nistz256_mul_montq 1446 1447 mov rax,QWORD[160+rsp] 1448 lea rbx,[160+rsp] 1449 mov r9,QWORD[((0+32))+rsp] 1450 mov r10,QWORD[((8+32))+rsp] 1451 lea rsi,[((0+32))+rsp] 1452 mov r11,QWORD[((16+32))+rsp] 1453 mov r12,QWORD[((24+32))+rsp] 1454 lea rdi,[192+rsp] 1455 call __ecp_nistz256_mul_montq 1456 1457 1458 1459 1460 add r12,r12 1461 lea rsi,[96+rsp] 1462 adc r13,r13 1463 mov rax,r12 1464 adc r8,r8 1465 adc r9,r9 1466 mov rbp,r13 1467 sbb r11,r11 1468 1469 sub r12,-1 1470 mov rcx,r8 1471 sbb r13,r14 1472 sbb r8,0 1473 mov r10,r9 1474 sbb r9,r15 1475 test r11,r11 1476 1477 cmovz r12,rax 1478 mov rax,QWORD[rsi] 1479 cmovz r13,rbp 1480 mov rbp,QWORD[8+rsi] 1481 cmovz r8,rcx 1482 mov rcx,QWORD[16+rsi] 1483 cmovz r9,r10 1484 mov r10,QWORD[24+rsi] 1485 1486 call __ecp_nistz256_subq 1487 1488 lea rbx,[128+rsp] 1489 lea rdi,[288+rsp] 1490 call __ecp_nistz256_sub_fromq 1491 1492 mov rax,QWORD[((192+0))+rsp] 1493 mov rbp,QWORD[((192+8))+rsp] 1494 mov rcx,QWORD[((192+16))+rsp] 1495 mov r10,QWORD[((192+24))+rsp] 1496 lea rdi,[320+rsp] 1497 1498 call __ecp_nistz256_subq 1499 1500 mov QWORD[rdi],r12 1501 mov QWORD[8+rdi],r13 1502 mov QWORD[16+rdi],r8 1503 mov QWORD[24+rdi],r9 1504 mov rax,QWORD[128+rsp] 1505 lea rbx,[128+rsp] 1506 mov r9,QWORD[((0+224))+rsp] 1507 mov r10,QWORD[((8+224))+rsp] 1508 lea rsi,[((0+224))+rsp] 1509 mov r11,QWORD[((16+224))+rsp] 1510 mov r12,QWORD[((24+224))+rsp] 1511 lea rdi,[256+rsp] 1512 call __ecp_nistz256_mul_montq 1513 1514 mov rax,QWORD[320+rsp] 1515 lea rbx,[320+rsp] 1516 mov r9,QWORD[((0+64))+rsp] 1517 mov r10,QWORD[((8+64))+rsp] 1518 lea rsi,[((0+64))+rsp] 1519 mov r11,QWORD[((16+64))+rsp] 1520 mov r12,QWORD[((24+64))+rsp] 1521 lea rdi,[320+rsp] 1522 call __ecp_nistz256_mul_montq 1523 1524 lea rbx,[256+rsp] 1525 lea rdi,[320+rsp] 1526 call __ecp_nistz256_sub_fromq 1527 1528DB 102,72,15,126,199 1529 1530 movdqa xmm0,xmm5 1531 movdqa xmm1,xmm5 1532 pandn xmm0,XMMWORD[352+rsp] 1533 movdqa xmm2,xmm5 1534 pandn xmm1,XMMWORD[((352+16))+rsp] 1535 movdqa xmm3,xmm5 1536 pand xmm2,XMMWORD[544+rsp] 1537 pand xmm3,XMMWORD[((544+16))+rsp] 1538 por xmm2,xmm0 1539 por xmm3,xmm1 1540 1541 movdqa xmm0,xmm4 1542 movdqa xmm1,xmm4 1543 pandn xmm0,xmm2 1544 movdqa xmm2,xmm4 1545 pandn xmm1,xmm3 1546 movdqa xmm3,xmm4 1547 pand xmm2,XMMWORD[448+rsp] 1548 pand xmm3,XMMWORD[((448+16))+rsp] 1549 por xmm2,xmm0 1550 por xmm3,xmm1 1551 movdqu XMMWORD[64+rdi],xmm2 1552 movdqu XMMWORD[80+rdi],xmm3 1553 1554 movdqa xmm0,xmm5 1555 movdqa xmm1,xmm5 1556 pandn xmm0,XMMWORD[288+rsp] 1557 movdqa xmm2,xmm5 1558 pandn xmm1,XMMWORD[((288+16))+rsp] 1559 movdqa xmm3,xmm5 1560 pand xmm2,XMMWORD[480+rsp] 1561 pand xmm3,XMMWORD[((480+16))+rsp] 1562 por xmm2,xmm0 1563 por xmm3,xmm1 1564 1565 movdqa xmm0,xmm4 1566 movdqa xmm1,xmm4 1567 pandn xmm0,xmm2 1568 movdqa xmm2,xmm4 1569 pandn xmm1,xmm3 1570 movdqa xmm3,xmm4 1571 pand xmm2,XMMWORD[384+rsp] 1572 pand xmm3,XMMWORD[((384+16))+rsp] 1573 por xmm2,xmm0 1574 por xmm3,xmm1 1575 movdqu XMMWORD[rdi],xmm2 1576 movdqu XMMWORD[16+rdi],xmm3 1577 1578 movdqa xmm0,xmm5 1579 movdqa xmm1,xmm5 1580 pandn xmm0,XMMWORD[320+rsp] 1581 movdqa xmm2,xmm5 1582 pandn xmm1,XMMWORD[((320+16))+rsp] 1583 movdqa xmm3,xmm5 1584 pand xmm2,XMMWORD[512+rsp] 1585 pand xmm3,XMMWORD[((512+16))+rsp] 1586 por xmm2,xmm0 1587 por xmm3,xmm1 1588 1589 movdqa xmm0,xmm4 1590 movdqa xmm1,xmm4 1591 pandn xmm0,xmm2 1592 movdqa xmm2,xmm4 1593 pandn xmm1,xmm3 1594 movdqa xmm3,xmm4 1595 pand xmm2,XMMWORD[416+rsp] 1596 pand xmm3,XMMWORD[((416+16))+rsp] 1597 por xmm2,xmm0 1598 por xmm3,xmm1 1599 movdqu XMMWORD[32+rdi],xmm2 1600 movdqu XMMWORD[48+rdi],xmm3 1601 1602$L$add_doneq: 1603 add rsp,32*18+8 1604 pop r15 1605 pop r14 1606 pop r13 1607 pop r12 1608 pop rbx 1609 pop rbp 1610 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1611 mov rsi,QWORD[16+rsp] 1612 DB 0F3h,0C3h ;repret 1613$L$SEH_end_ecp_nistz256_point_add: 1614global ecp_nistz256_point_add_affine 1615 1616ALIGN 32 1617ecp_nistz256_point_add_affine: 1618 mov QWORD[8+rsp],rdi ;WIN64 prologue 1619 mov QWORD[16+rsp],rsi 1620 mov rax,rsp 1621$L$SEH_begin_ecp_nistz256_point_add_affine: 1622 mov rdi,rcx 1623 mov rsi,rdx 1624 mov rdx,r8 1625 1626 1627 push rbp 1628 push rbx 1629 push r12 1630 push r13 1631 push r14 1632 push r15 1633 sub rsp,32*15+8 1634 1635 movdqu xmm0,XMMWORD[rsi] 1636 mov rbx,rdx 1637 movdqu xmm1,XMMWORD[16+rsi] 1638 movdqu xmm2,XMMWORD[32+rsi] 1639 movdqu xmm3,XMMWORD[48+rsi] 1640 movdqu xmm4,XMMWORD[64+rsi] 1641 movdqu xmm5,XMMWORD[80+rsi] 1642 mov rax,QWORD[((64+0))+rsi] 1643 mov r14,QWORD[((64+8))+rsi] 1644 mov r15,QWORD[((64+16))+rsi] 1645 mov r8,QWORD[((64+24))+rsi] 1646 movdqa XMMWORD[320+rsp],xmm0 1647 movdqa XMMWORD[(320+16)+rsp],xmm1 1648 por xmm1,xmm0 1649 movdqa XMMWORD[352+rsp],xmm2 1650 movdqa XMMWORD[(352+16)+rsp],xmm3 1651 por xmm3,xmm2 1652 movdqa XMMWORD[384+rsp],xmm4 1653 movdqa XMMWORD[(384+16)+rsp],xmm5 1654 por xmm3,xmm1 1655 1656 movdqu xmm0,XMMWORD[rbx] 1657 pshufd xmm5,xmm3,0xb1 1658 movdqu xmm1,XMMWORD[16+rbx] 1659 movdqu xmm2,XMMWORD[32+rbx] 1660 por xmm5,xmm3 1661 movdqu xmm3,XMMWORD[48+rbx] 1662 movdqa XMMWORD[416+rsp],xmm0 1663 pshufd xmm4,xmm5,0x1e 1664 movdqa XMMWORD[(416+16)+rsp],xmm1 1665 por xmm1,xmm0 1666DB 102,72,15,110,199 1667 movdqa XMMWORD[448+rsp],xmm2 1668 movdqa XMMWORD[(448+16)+rsp],xmm3 1669 por xmm3,xmm2 1670 por xmm5,xmm4 1671 pxor xmm4,xmm4 1672 por xmm3,xmm1 1673 1674 lea rsi,[((64-0))+rsi] 1675 lea rdi,[32+rsp] 1676 call __ecp_nistz256_sqr_montq 1677 1678 pcmpeqd xmm5,xmm4 1679 pshufd xmm4,xmm3,0xb1 1680 mov rax,QWORD[rbx] 1681 1682 mov r9,r12 1683 por xmm4,xmm3 1684 pshufd xmm5,xmm5,0 1685 pshufd xmm3,xmm4,0x1e 1686 mov r10,r13 1687 por xmm4,xmm3 1688 pxor xmm3,xmm3 1689 mov r11,r14 1690 pcmpeqd xmm4,xmm3 1691 pshufd xmm4,xmm4,0 1692 1693 lea rsi,[((32-0))+rsp] 1694 mov r12,r15 1695 lea rdi,[rsp] 1696 call __ecp_nistz256_mul_montq 1697 1698 lea rbx,[320+rsp] 1699 lea rdi,[64+rsp] 1700 call __ecp_nistz256_sub_fromq 1701 1702 mov rax,QWORD[384+rsp] 1703 lea rbx,[384+rsp] 1704 mov r9,QWORD[((0+32))+rsp] 1705 mov r10,QWORD[((8+32))+rsp] 1706 lea rsi,[((0+32))+rsp] 1707 mov r11,QWORD[((16+32))+rsp] 1708 mov r12,QWORD[((24+32))+rsp] 1709 lea rdi,[32+rsp] 1710 call __ecp_nistz256_mul_montq 1711 1712 mov rax,QWORD[384+rsp] 1713 lea rbx,[384+rsp] 1714 mov r9,QWORD[((0+64))+rsp] 1715 mov r10,QWORD[((8+64))+rsp] 1716 lea rsi,[((0+64))+rsp] 1717 mov r11,QWORD[((16+64))+rsp] 1718 mov r12,QWORD[((24+64))+rsp] 1719 lea rdi,[288+rsp] 1720 call __ecp_nistz256_mul_montq 1721 1722 mov rax,QWORD[448+rsp] 1723 lea rbx,[448+rsp] 1724 mov r9,QWORD[((0+32))+rsp] 1725 mov r10,QWORD[((8+32))+rsp] 1726 lea rsi,[((0+32))+rsp] 1727 mov r11,QWORD[((16+32))+rsp] 1728 mov r12,QWORD[((24+32))+rsp] 1729 lea rdi,[32+rsp] 1730 call __ecp_nistz256_mul_montq 1731 1732 lea rbx,[352+rsp] 1733 lea rdi,[96+rsp] 1734 call __ecp_nistz256_sub_fromq 1735 1736 mov rax,QWORD[((0+64))+rsp] 1737 mov r14,QWORD[((8+64))+rsp] 1738 lea rsi,[((0+64))+rsp] 1739 mov r15,QWORD[((16+64))+rsp] 1740 mov r8,QWORD[((24+64))+rsp] 1741 lea rdi,[128+rsp] 1742 call __ecp_nistz256_sqr_montq 1743 1744 mov rax,QWORD[((0+96))+rsp] 1745 mov r14,QWORD[((8+96))+rsp] 1746 lea rsi,[((0+96))+rsp] 1747 mov r15,QWORD[((16+96))+rsp] 1748 mov r8,QWORD[((24+96))+rsp] 1749 lea rdi,[192+rsp] 1750 call __ecp_nistz256_sqr_montq 1751 1752 mov rax,QWORD[128+rsp] 1753 lea rbx,[128+rsp] 1754 mov r9,QWORD[((0+64))+rsp] 1755 mov r10,QWORD[((8+64))+rsp] 1756 lea rsi,[((0+64))+rsp] 1757 mov r11,QWORD[((16+64))+rsp] 1758 mov r12,QWORD[((24+64))+rsp] 1759 lea rdi,[160+rsp] 1760 call __ecp_nistz256_mul_montq 1761 1762 mov rax,QWORD[320+rsp] 1763 lea rbx,[320+rsp] 1764 mov r9,QWORD[((0+128))+rsp] 1765 mov r10,QWORD[((8+128))+rsp] 1766 lea rsi,[((0+128))+rsp] 1767 mov r11,QWORD[((16+128))+rsp] 1768 mov r12,QWORD[((24+128))+rsp] 1769 lea rdi,[rsp] 1770 call __ecp_nistz256_mul_montq 1771 1772 1773 1774 1775 add r12,r12 1776 lea rsi,[192+rsp] 1777 adc r13,r13 1778 mov rax,r12 1779 adc r8,r8 1780 adc r9,r9 1781 mov rbp,r13 1782 sbb r11,r11 1783 1784 sub r12,-1 1785 mov rcx,r8 1786 sbb r13,r14 1787 sbb r8,0 1788 mov r10,r9 1789 sbb r9,r15 1790 test r11,r11 1791 1792 cmovz r12,rax 1793 mov rax,QWORD[rsi] 1794 cmovz r13,rbp 1795 mov rbp,QWORD[8+rsi] 1796 cmovz r8,rcx 1797 mov rcx,QWORD[16+rsi] 1798 cmovz r9,r10 1799 mov r10,QWORD[24+rsi] 1800 1801 call __ecp_nistz256_subq 1802 1803 lea rbx,[160+rsp] 1804 lea rdi,[224+rsp] 1805 call __ecp_nistz256_sub_fromq 1806 1807 mov rax,QWORD[((0+0))+rsp] 1808 mov rbp,QWORD[((0+8))+rsp] 1809 mov rcx,QWORD[((0+16))+rsp] 1810 mov r10,QWORD[((0+24))+rsp] 1811 lea rdi,[64+rsp] 1812 1813 call __ecp_nistz256_subq 1814 1815 mov QWORD[rdi],r12 1816 mov QWORD[8+rdi],r13 1817 mov QWORD[16+rdi],r8 1818 mov QWORD[24+rdi],r9 1819 mov rax,QWORD[352+rsp] 1820 lea rbx,[352+rsp] 1821 mov r9,QWORD[((0+160))+rsp] 1822 mov r10,QWORD[((8+160))+rsp] 1823 lea rsi,[((0+160))+rsp] 1824 mov r11,QWORD[((16+160))+rsp] 1825 mov r12,QWORD[((24+160))+rsp] 1826 lea rdi,[32+rsp] 1827 call __ecp_nistz256_mul_montq 1828 1829 mov rax,QWORD[96+rsp] 1830 lea rbx,[96+rsp] 1831 mov r9,QWORD[((0+64))+rsp] 1832 mov r10,QWORD[((8+64))+rsp] 1833 lea rsi,[((0+64))+rsp] 1834 mov r11,QWORD[((16+64))+rsp] 1835 mov r12,QWORD[((24+64))+rsp] 1836 lea rdi,[64+rsp] 1837 call __ecp_nistz256_mul_montq 1838 1839 lea rbx,[32+rsp] 1840 lea rdi,[256+rsp] 1841 call __ecp_nistz256_sub_fromq 1842 1843DB 102,72,15,126,199 1844 1845 movdqa xmm0,xmm5 1846 movdqa xmm1,xmm5 1847 pandn xmm0,XMMWORD[288+rsp] 1848 movdqa xmm2,xmm5 1849 pandn xmm1,XMMWORD[((288+16))+rsp] 1850 movdqa xmm3,xmm5 1851 pand xmm2,XMMWORD[$L$ONE_mont] 1852 pand xmm3,XMMWORD[(($L$ONE_mont+16))] 1853 por xmm2,xmm0 1854 por xmm3,xmm1 1855 1856 movdqa xmm0,xmm4 1857 movdqa xmm1,xmm4 1858 pandn xmm0,xmm2 1859 movdqa xmm2,xmm4 1860 pandn xmm1,xmm3 1861 movdqa xmm3,xmm4 1862 pand xmm2,XMMWORD[384+rsp] 1863 pand xmm3,XMMWORD[((384+16))+rsp] 1864 por xmm2,xmm0 1865 por xmm3,xmm1 1866 movdqu XMMWORD[64+rdi],xmm2 1867 movdqu XMMWORD[80+rdi],xmm3 1868 1869 movdqa xmm0,xmm5 1870 movdqa xmm1,xmm5 1871 pandn xmm0,XMMWORD[224+rsp] 1872 movdqa xmm2,xmm5 1873 pandn xmm1,XMMWORD[((224+16))+rsp] 1874 movdqa xmm3,xmm5 1875 pand xmm2,XMMWORD[416+rsp] 1876 pand xmm3,XMMWORD[((416+16))+rsp] 1877 por xmm2,xmm0 1878 por xmm3,xmm1 1879 1880 movdqa xmm0,xmm4 1881 movdqa xmm1,xmm4 1882 pandn xmm0,xmm2 1883 movdqa xmm2,xmm4 1884 pandn xmm1,xmm3 1885 movdqa xmm3,xmm4 1886 pand xmm2,XMMWORD[320+rsp] 1887 pand xmm3,XMMWORD[((320+16))+rsp] 1888 por xmm2,xmm0 1889 por xmm3,xmm1 1890 movdqu XMMWORD[rdi],xmm2 1891 movdqu XMMWORD[16+rdi],xmm3 1892 1893 movdqa xmm0,xmm5 1894 movdqa xmm1,xmm5 1895 pandn xmm0,XMMWORD[256+rsp] 1896 movdqa xmm2,xmm5 1897 pandn xmm1,XMMWORD[((256+16))+rsp] 1898 movdqa xmm3,xmm5 1899 pand xmm2,XMMWORD[448+rsp] 1900 pand xmm3,XMMWORD[((448+16))+rsp] 1901 por xmm2,xmm0 1902 por xmm3,xmm1 1903 1904 movdqa xmm0,xmm4 1905 movdqa xmm1,xmm4 1906 pandn xmm0,xmm2 1907 movdqa xmm2,xmm4 1908 pandn xmm1,xmm3 1909 movdqa xmm3,xmm4 1910 pand xmm2,XMMWORD[352+rsp] 1911 pand xmm3,XMMWORD[((352+16))+rsp] 1912 por xmm2,xmm0 1913 por xmm3,xmm1 1914 movdqu XMMWORD[32+rdi],xmm2 1915 movdqu XMMWORD[48+rdi],xmm3 1916 1917 add rsp,32*15+8 1918 pop r15 1919 pop r14 1920 pop r13 1921 pop r12 1922 pop rbx 1923 pop rbp 1924 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1925 mov rsi,QWORD[16+rsp] 1926 DB 0F3h,0C3h ;repret 1927$L$SEH_end_ecp_nistz256_point_add_affine: 1928