1OPTION	DOTNAME
2.text$	SEGMENT ALIGN(256) 'CODE'
3
4
5ALIGN	16
6MULADD_128x512	PROC PRIVATE
7	mov	rax,QWORD PTR[rsi]
8	mul	rbp
9	add	r8,rax
10	adc	rdx,0
11	mov	QWORD PTR[rcx],r8
12	mov	rbx,rdx
13
14	mov	rax,QWORD PTR[8+rsi]
15	mul	rbp
16	add	r9,rax
17	adc	rdx,0
18	add	r9,rbx
19	adc	rdx,0
20	mov	rbx,rdx
21
22	mov	rax,QWORD PTR[16+rsi]
23	mul	rbp
24	add	r10,rax
25	adc	rdx,0
26	add	r10,rbx
27	adc	rdx,0
28	mov	rbx,rdx
29
30	mov	rax,QWORD PTR[24+rsi]
31	mul	rbp
32	add	r11,rax
33	adc	rdx,0
34	add	r11,rbx
35	adc	rdx,0
36	mov	rbx,rdx
37
38	mov	rax,QWORD PTR[32+rsi]
39	mul	rbp
40	add	r12,rax
41	adc	rdx,0
42	add	r12,rbx
43	adc	rdx,0
44	mov	rbx,rdx
45
46	mov	rax,QWORD PTR[40+rsi]
47	mul	rbp
48	add	r13,rax
49	adc	rdx,0
50	add	r13,rbx
51	adc	rdx,0
52	mov	rbx,rdx
53
54	mov	rax,QWORD PTR[48+rsi]
55	mul	rbp
56	add	r14,rax
57	adc	rdx,0
58	add	r14,rbx
59	adc	rdx,0
60	mov	rbx,rdx
61
62	mov	rax,QWORD PTR[56+rsi]
63	mul	rbp
64	add	r15,rax
65	adc	rdx,0
66	add	r15,rbx
67	adc	rdx,0
68	mov	r8,rdx
69	mov	rbp,QWORD PTR[8+rdi]
70	mov	rax,QWORD PTR[rsi]
71	mul	rbp
72	add	r9,rax
73	adc	rdx,0
74	mov	QWORD PTR[8+rcx],r9
75	mov	rbx,rdx
76
77	mov	rax,QWORD PTR[8+rsi]
78	mul	rbp
79	add	r10,rax
80	adc	rdx,0
81	add	r10,rbx
82	adc	rdx,0
83	mov	rbx,rdx
84
85	mov	rax,QWORD PTR[16+rsi]
86	mul	rbp
87	add	r11,rax
88	adc	rdx,0
89	add	r11,rbx
90	adc	rdx,0
91	mov	rbx,rdx
92
93	mov	rax,QWORD PTR[24+rsi]
94	mul	rbp
95	add	r12,rax
96	adc	rdx,0
97	add	r12,rbx
98	adc	rdx,0
99	mov	rbx,rdx
100
101	mov	rax,QWORD PTR[32+rsi]
102	mul	rbp
103	add	r13,rax
104	adc	rdx,0
105	add	r13,rbx
106	adc	rdx,0
107	mov	rbx,rdx
108
109	mov	rax,QWORD PTR[40+rsi]
110	mul	rbp
111	add	r14,rax
112	adc	rdx,0
113	add	r14,rbx
114	adc	rdx,0
115	mov	rbx,rdx
116
117	mov	rax,QWORD PTR[48+rsi]
118	mul	rbp
119	add	r15,rax
120	adc	rdx,0
121	add	r15,rbx
122	adc	rdx,0
123	mov	rbx,rdx
124
125	mov	rax,QWORD PTR[56+rsi]
126	mul	rbp
127	add	r8,rax
128	adc	rdx,0
129	add	r8,rbx
130	adc	rdx,0
131	mov	r9,rdx
132	DB	0F3h,0C3h		;repret
133MULADD_128x512	ENDP
134
135ALIGN	16
136mont_reduce	PROC PRIVATE
137	lea	rdi,QWORD PTR[192+rsp]
138	mov	rsi,QWORD PTR[32+rsp]
139	add	rsi,576
140	lea	rcx,QWORD PTR[520+rsp]
141
142	mov	rbp,QWORD PTR[96+rcx]
143	mov	rax,QWORD PTR[rsi]
144	mul	rbp
145	mov	r8,QWORD PTR[rcx]
146	add	r8,rax
147	adc	rdx,0
148	mov	QWORD PTR[rdi],r8
149	mov	rbx,rdx
150
151	mov	rax,QWORD PTR[8+rsi]
152	mul	rbp
153	mov	r9,QWORD PTR[8+rcx]
154	add	r9,rax
155	adc	rdx,0
156	add	r9,rbx
157	adc	rdx,0
158	mov	rbx,rdx
159
160	mov	rax,QWORD PTR[16+rsi]
161	mul	rbp
162	mov	r10,QWORD PTR[16+rcx]
163	add	r10,rax
164	adc	rdx,0
165	add	r10,rbx
166	adc	rdx,0
167	mov	rbx,rdx
168
169	mov	rax,QWORD PTR[24+rsi]
170	mul	rbp
171	mov	r11,QWORD PTR[24+rcx]
172	add	r11,rax
173	adc	rdx,0
174	add	r11,rbx
175	adc	rdx,0
176	mov	rbx,rdx
177
178	mov	rax,QWORD PTR[32+rsi]
179	mul	rbp
180	mov	r12,QWORD PTR[32+rcx]
181	add	r12,rax
182	adc	rdx,0
183	add	r12,rbx
184	adc	rdx,0
185	mov	rbx,rdx
186
187	mov	rax,QWORD PTR[40+rsi]
188	mul	rbp
189	mov	r13,QWORD PTR[40+rcx]
190	add	r13,rax
191	adc	rdx,0
192	add	r13,rbx
193	adc	rdx,0
194	mov	rbx,rdx
195
196	mov	rax,QWORD PTR[48+rsi]
197	mul	rbp
198	mov	r14,QWORD PTR[48+rcx]
199	add	r14,rax
200	adc	rdx,0
201	add	r14,rbx
202	adc	rdx,0
203	mov	rbx,rdx
204
205	mov	rax,QWORD PTR[56+rsi]
206	mul	rbp
207	mov	r15,QWORD PTR[56+rcx]
208	add	r15,rax
209	adc	rdx,0
210	add	r15,rbx
211	adc	rdx,0
212	mov	r8,rdx
213	mov	rbp,QWORD PTR[104+rcx]
214	mov	rax,QWORD PTR[rsi]
215	mul	rbp
216	add	r9,rax
217	adc	rdx,0
218	mov	QWORD PTR[8+rdi],r9
219	mov	rbx,rdx
220
221	mov	rax,QWORD PTR[8+rsi]
222	mul	rbp
223	add	r10,rax
224	adc	rdx,0
225	add	r10,rbx
226	adc	rdx,0
227	mov	rbx,rdx
228
229	mov	rax,QWORD PTR[16+rsi]
230	mul	rbp
231	add	r11,rax
232	adc	rdx,0
233	add	r11,rbx
234	adc	rdx,0
235	mov	rbx,rdx
236
237	mov	rax,QWORD PTR[24+rsi]
238	mul	rbp
239	add	r12,rax
240	adc	rdx,0
241	add	r12,rbx
242	adc	rdx,0
243	mov	rbx,rdx
244
245	mov	rax,QWORD PTR[32+rsi]
246	mul	rbp
247	add	r13,rax
248	adc	rdx,0
249	add	r13,rbx
250	adc	rdx,0
251	mov	rbx,rdx
252
253	mov	rax,QWORD PTR[40+rsi]
254	mul	rbp
255	add	r14,rax
256	adc	rdx,0
257	add	r14,rbx
258	adc	rdx,0
259	mov	rbx,rdx
260
261	mov	rax,QWORD PTR[48+rsi]
262	mul	rbp
263	add	r15,rax
264	adc	rdx,0
265	add	r15,rbx
266	adc	rdx,0
267	mov	rbx,rdx
268
269	mov	rax,QWORD PTR[56+rsi]
270	mul	rbp
271	add	r8,rax
272	adc	rdx,0
273	add	r8,rbx
274	adc	rdx,0
275	mov	r9,rdx
276	mov	rbp,QWORD PTR[112+rcx]
277	mov	rax,QWORD PTR[rsi]
278	mul	rbp
279	add	r10,rax
280	adc	rdx,0
281	mov	QWORD PTR[16+rdi],r10
282	mov	rbx,rdx
283
284	mov	rax,QWORD PTR[8+rsi]
285	mul	rbp
286	add	r11,rax
287	adc	rdx,0
288	add	r11,rbx
289	adc	rdx,0
290	mov	rbx,rdx
291
292	mov	rax,QWORD PTR[16+rsi]
293	mul	rbp
294	add	r12,rax
295	adc	rdx,0
296	add	r12,rbx
297	adc	rdx,0
298	mov	rbx,rdx
299
300	mov	rax,QWORD PTR[24+rsi]
301	mul	rbp
302	add	r13,rax
303	adc	rdx,0
304	add	r13,rbx
305	adc	rdx,0
306	mov	rbx,rdx
307
308	mov	rax,QWORD PTR[32+rsi]
309	mul	rbp
310	add	r14,rax
311	adc	rdx,0
312	add	r14,rbx
313	adc	rdx,0
314	mov	rbx,rdx
315
316	mov	rax,QWORD PTR[40+rsi]
317	mul	rbp
318	add	r15,rax
319	adc	rdx,0
320	add	r15,rbx
321	adc	rdx,0
322	mov	rbx,rdx
323
324	mov	rax,QWORD PTR[48+rsi]
325	mul	rbp
326	add	r8,rax
327	adc	rdx,0
328	add	r8,rbx
329	adc	rdx,0
330	mov	rbx,rdx
331
332	mov	rax,QWORD PTR[56+rsi]
333	mul	rbp
334	add	r9,rax
335	adc	rdx,0
336	add	r9,rbx
337	adc	rdx,0
338	mov	r10,rdx
339	mov	rbp,QWORD PTR[120+rcx]
340	mov	rax,QWORD PTR[rsi]
341	mul	rbp
342	add	r11,rax
343	adc	rdx,0
344	mov	QWORD PTR[24+rdi],r11
345	mov	rbx,rdx
346
347	mov	rax,QWORD PTR[8+rsi]
348	mul	rbp
349	add	r12,rax
350	adc	rdx,0
351	add	r12,rbx
352	adc	rdx,0
353	mov	rbx,rdx
354
355	mov	rax,QWORD PTR[16+rsi]
356	mul	rbp
357	add	r13,rax
358	adc	rdx,0
359	add	r13,rbx
360	adc	rdx,0
361	mov	rbx,rdx
362
363	mov	rax,QWORD PTR[24+rsi]
364	mul	rbp
365	add	r14,rax
366	adc	rdx,0
367	add	r14,rbx
368	adc	rdx,0
369	mov	rbx,rdx
370
371	mov	rax,QWORD PTR[32+rsi]
372	mul	rbp
373	add	r15,rax
374	adc	rdx,0
375	add	r15,rbx
376	adc	rdx,0
377	mov	rbx,rdx
378
379	mov	rax,QWORD PTR[40+rsi]
380	mul	rbp
381	add	r8,rax
382	adc	rdx,0
383	add	r8,rbx
384	adc	rdx,0
385	mov	rbx,rdx
386
387	mov	rax,QWORD PTR[48+rsi]
388	mul	rbp
389	add	r9,rax
390	adc	rdx,0
391	add	r9,rbx
392	adc	rdx,0
393	mov	rbx,rdx
394
395	mov	rax,QWORD PTR[56+rsi]
396	mul	rbp
397	add	r10,rax
398	adc	rdx,0
399	add	r10,rbx
400	adc	rdx,0
401	mov	r11,rdx
402	xor	rax,rax
403
404	add	r8,QWORD PTR[64+rcx]
405	adc	r9,QWORD PTR[72+rcx]
406	adc	r10,QWORD PTR[80+rcx]
407	adc	r11,QWORD PTR[88+rcx]
408	adc	rax,0
409
410
411
412
413	mov	QWORD PTR[64+rdi],r8
414	mov	QWORD PTR[72+rdi],r9
415	mov	rbp,r10
416	mov	QWORD PTR[88+rdi],r11
417
418	mov	QWORD PTR[384+rsp],rax
419
420	mov	r8,QWORD PTR[rdi]
421	mov	r9,QWORD PTR[8+rdi]
422	mov	r10,QWORD PTR[16+rdi]
423	mov	r11,QWORD PTR[24+rdi]
424
425
426
427
428
429
430
431
432	add	rdi,8*10
433
434	add	rsi,64
435	lea	rcx,QWORD PTR[296+rsp]
436
437	call	MULADD_128x512
438
439	mov	rax,QWORD PTR[384+rsp]
440
441
442	add	r8,QWORD PTR[((-16))+rdi]
443	adc	r9,QWORD PTR[((-8))+rdi]
444	mov	QWORD PTR[64+rcx],r8
445	mov	QWORD PTR[72+rcx],r9
446
447	adc	rax,rax
448	mov	QWORD PTR[384+rsp],rax
449
450	lea	rdi,QWORD PTR[192+rsp]
451	add	rsi,64
452
453
454
455
456
457	mov	r8,QWORD PTR[rsi]
458	mov	rbx,QWORD PTR[8+rsi]
459
460	mov	rax,QWORD PTR[rcx]
461	mul	r8
462	mov	rbp,rax
463	mov	r9,rdx
464
465	mov	rax,QWORD PTR[8+rcx]
466	mul	r8
467	add	r9,rax
468
469	mov	rax,QWORD PTR[rcx]
470	mul	rbx
471	add	r9,rax
472
473	mov	QWORD PTR[8+rdi],r9
474
475
476	sub	rsi,192
477
478	mov	r8,QWORD PTR[rcx]
479	mov	r9,QWORD PTR[8+rcx]
480
481	call	MULADD_128x512
482
483
484
485
486	mov	rax,QWORD PTR[rsi]
487	mov	rbx,QWORD PTR[8+rsi]
488	mov	rdi,QWORD PTR[16+rsi]
489	mov	rdx,QWORD PTR[24+rsi]
490
491
492	mov	rbp,QWORD PTR[384+rsp]
493
494	add	r8,QWORD PTR[64+rcx]
495	adc	r9,QWORD PTR[72+rcx]
496
497
498	adc	rbp,rbp
499
500
501
502	shl	rbp,3
503	mov	rcx,QWORD PTR[32+rsp]
504	add	rbp,rcx
505
506
507	xor	rsi,rsi
508
509	add	r10,QWORD PTR[rbp]
510	adc	r11,QWORD PTR[64+rbp]
511	adc	r12,QWORD PTR[128+rbp]
512	adc	r13,QWORD PTR[192+rbp]
513	adc	r14,QWORD PTR[256+rbp]
514	adc	r15,QWORD PTR[320+rbp]
515	adc	r8,QWORD PTR[384+rbp]
516	adc	r9,QWORD PTR[448+rbp]
517
518
519
520	sbb	rsi,0
521
522
523	and	rax,rsi
524	and	rbx,rsi
525	and	rdi,rsi
526	and	rdx,rsi
527
528	mov	rbp,1
529	sub	r10,rax
530	sbb	r11,rbx
531	sbb	r12,rdi
532	sbb	r13,rdx
533
534
535
536
537	sbb	rbp,0
538
539
540
541	add	rcx,512
542	mov	rax,QWORD PTR[32+rcx]
543	mov	rbx,QWORD PTR[40+rcx]
544	mov	rdi,QWORD PTR[48+rcx]
545	mov	rdx,QWORD PTR[56+rcx]
546
547
548
549	and	rax,rsi
550	and	rbx,rsi
551	and	rdi,rsi
552	and	rdx,rsi
553
554
555
556	sub	rbp,1
557
558	sbb	r14,rax
559	sbb	r15,rbx
560	sbb	r8,rdi
561	sbb	r9,rdx
562
563
564
565	mov	rsi,QWORD PTR[144+rsp]
566	mov	QWORD PTR[rsi],r10
567	mov	QWORD PTR[8+rsi],r11
568	mov	QWORD PTR[16+rsi],r12
569	mov	QWORD PTR[24+rsi],r13
570	mov	QWORD PTR[32+rsi],r14
571	mov	QWORD PTR[40+rsi],r15
572	mov	QWORD PTR[48+rsi],r8
573	mov	QWORD PTR[56+rsi],r9
574
575	DB	0F3h,0C3h		;repret
576mont_reduce	ENDP
577
578ALIGN	16
579mont_mul_a3b	PROC PRIVATE
580
581
582
583
584	mov	rbp,QWORD PTR[rdi]
585
586	mov	rax,r10
587	mul	rbp
588	mov	QWORD PTR[520+rsp],rax
589	mov	r10,rdx
590	mov	rax,r11
591	mul	rbp
592	add	r10,rax
593	adc	rdx,0
594	mov	r11,rdx
595	mov	rax,r12
596	mul	rbp
597	add	r11,rax
598	adc	rdx,0
599	mov	r12,rdx
600	mov	rax,r13
601	mul	rbp
602	add	r12,rax
603	adc	rdx,0
604	mov	r13,rdx
605	mov	rax,r14
606	mul	rbp
607	add	r13,rax
608	adc	rdx,0
609	mov	r14,rdx
610	mov	rax,r15
611	mul	rbp
612	add	r14,rax
613	adc	rdx,0
614	mov	r15,rdx
615	mov	rax,r8
616	mul	rbp
617	add	r15,rax
618	adc	rdx,0
619	mov	r8,rdx
620	mov	rax,r9
621	mul	rbp
622	add	r8,rax
623	adc	rdx,0
624	mov	r9,rdx
625	mov	rbp,QWORD PTR[8+rdi]
626	mov	rax,QWORD PTR[rsi]
627	mul	rbp
628	add	r10,rax
629	adc	rdx,0
630	mov	QWORD PTR[528+rsp],r10
631	mov	rbx,rdx
632
633	mov	rax,QWORD PTR[8+rsi]
634	mul	rbp
635	add	r11,rax
636	adc	rdx,0
637	add	r11,rbx
638	adc	rdx,0
639	mov	rbx,rdx
640
641	mov	rax,QWORD PTR[16+rsi]
642	mul	rbp
643	add	r12,rax
644	adc	rdx,0
645	add	r12,rbx
646	adc	rdx,0
647	mov	rbx,rdx
648
649	mov	rax,QWORD PTR[24+rsi]
650	mul	rbp
651	add	r13,rax
652	adc	rdx,0
653	add	r13,rbx
654	adc	rdx,0
655	mov	rbx,rdx
656
657	mov	rax,QWORD PTR[32+rsi]
658	mul	rbp
659	add	r14,rax
660	adc	rdx,0
661	add	r14,rbx
662	adc	rdx,0
663	mov	rbx,rdx
664
665	mov	rax,QWORD PTR[40+rsi]
666	mul	rbp
667	add	r15,rax
668	adc	rdx,0
669	add	r15,rbx
670	adc	rdx,0
671	mov	rbx,rdx
672
673	mov	rax,QWORD PTR[48+rsi]
674	mul	rbp
675	add	r8,rax
676	adc	rdx,0
677	add	r8,rbx
678	adc	rdx,0
679	mov	rbx,rdx
680
681	mov	rax,QWORD PTR[56+rsi]
682	mul	rbp
683	add	r9,rax
684	adc	rdx,0
685	add	r9,rbx
686	adc	rdx,0
687	mov	r10,rdx
688	mov	rbp,QWORD PTR[16+rdi]
689	mov	rax,QWORD PTR[rsi]
690	mul	rbp
691	add	r11,rax
692	adc	rdx,0
693	mov	QWORD PTR[536+rsp],r11
694	mov	rbx,rdx
695
696	mov	rax,QWORD PTR[8+rsi]
697	mul	rbp
698	add	r12,rax
699	adc	rdx,0
700	add	r12,rbx
701	adc	rdx,0
702	mov	rbx,rdx
703
704	mov	rax,QWORD PTR[16+rsi]
705	mul	rbp
706	add	r13,rax
707	adc	rdx,0
708	add	r13,rbx
709	adc	rdx,0
710	mov	rbx,rdx
711
712	mov	rax,QWORD PTR[24+rsi]
713	mul	rbp
714	add	r14,rax
715	adc	rdx,0
716	add	r14,rbx
717	adc	rdx,0
718	mov	rbx,rdx
719
720	mov	rax,QWORD PTR[32+rsi]
721	mul	rbp
722	add	r15,rax
723	adc	rdx,0
724	add	r15,rbx
725	adc	rdx,0
726	mov	rbx,rdx
727
728	mov	rax,QWORD PTR[40+rsi]
729	mul	rbp
730	add	r8,rax
731	adc	rdx,0
732	add	r8,rbx
733	adc	rdx,0
734	mov	rbx,rdx
735
736	mov	rax,QWORD PTR[48+rsi]
737	mul	rbp
738	add	r9,rax
739	adc	rdx,0
740	add	r9,rbx
741	adc	rdx,0
742	mov	rbx,rdx
743
744	mov	rax,QWORD PTR[56+rsi]
745	mul	rbp
746	add	r10,rax
747	adc	rdx,0
748	add	r10,rbx
749	adc	rdx,0
750	mov	r11,rdx
751	mov	rbp,QWORD PTR[24+rdi]
752	mov	rax,QWORD PTR[rsi]
753	mul	rbp
754	add	r12,rax
755	adc	rdx,0
756	mov	QWORD PTR[544+rsp],r12
757	mov	rbx,rdx
758
759	mov	rax,QWORD PTR[8+rsi]
760	mul	rbp
761	add	r13,rax
762	adc	rdx,0
763	add	r13,rbx
764	adc	rdx,0
765	mov	rbx,rdx
766
767	mov	rax,QWORD PTR[16+rsi]
768	mul	rbp
769	add	r14,rax
770	adc	rdx,0
771	add	r14,rbx
772	adc	rdx,0
773	mov	rbx,rdx
774
775	mov	rax,QWORD PTR[24+rsi]
776	mul	rbp
777	add	r15,rax
778	adc	rdx,0
779	add	r15,rbx
780	adc	rdx,0
781	mov	rbx,rdx
782
783	mov	rax,QWORD PTR[32+rsi]
784	mul	rbp
785	add	r8,rax
786	adc	rdx,0
787	add	r8,rbx
788	adc	rdx,0
789	mov	rbx,rdx
790
791	mov	rax,QWORD PTR[40+rsi]
792	mul	rbp
793	add	r9,rax
794	adc	rdx,0
795	add	r9,rbx
796	adc	rdx,0
797	mov	rbx,rdx
798
799	mov	rax,QWORD PTR[48+rsi]
800	mul	rbp
801	add	r10,rax
802	adc	rdx,0
803	add	r10,rbx
804	adc	rdx,0
805	mov	rbx,rdx
806
807	mov	rax,QWORD PTR[56+rsi]
808	mul	rbp
809	add	r11,rax
810	adc	rdx,0
811	add	r11,rbx
812	adc	rdx,0
813	mov	r12,rdx
814	mov	rbp,QWORD PTR[32+rdi]
815	mov	rax,QWORD PTR[rsi]
816	mul	rbp
817	add	r13,rax
818	adc	rdx,0
819	mov	QWORD PTR[552+rsp],r13
820	mov	rbx,rdx
821
822	mov	rax,QWORD PTR[8+rsi]
823	mul	rbp
824	add	r14,rax
825	adc	rdx,0
826	add	r14,rbx
827	adc	rdx,0
828	mov	rbx,rdx
829
830	mov	rax,QWORD PTR[16+rsi]
831	mul	rbp
832	add	r15,rax
833	adc	rdx,0
834	add	r15,rbx
835	adc	rdx,0
836	mov	rbx,rdx
837
838	mov	rax,QWORD PTR[24+rsi]
839	mul	rbp
840	add	r8,rax
841	adc	rdx,0
842	add	r8,rbx
843	adc	rdx,0
844	mov	rbx,rdx
845
846	mov	rax,QWORD PTR[32+rsi]
847	mul	rbp
848	add	r9,rax
849	adc	rdx,0
850	add	r9,rbx
851	adc	rdx,0
852	mov	rbx,rdx
853
854	mov	rax,QWORD PTR[40+rsi]
855	mul	rbp
856	add	r10,rax
857	adc	rdx,0
858	add	r10,rbx
859	adc	rdx,0
860	mov	rbx,rdx
861
862	mov	rax,QWORD PTR[48+rsi]
863	mul	rbp
864	add	r11,rax
865	adc	rdx,0
866	add	r11,rbx
867	adc	rdx,0
868	mov	rbx,rdx
869
870	mov	rax,QWORD PTR[56+rsi]
871	mul	rbp
872	add	r12,rax
873	adc	rdx,0
874	add	r12,rbx
875	adc	rdx,0
876	mov	r13,rdx
877	mov	rbp,QWORD PTR[40+rdi]
878	mov	rax,QWORD PTR[rsi]
879	mul	rbp
880	add	r14,rax
881	adc	rdx,0
882	mov	QWORD PTR[560+rsp],r14
883	mov	rbx,rdx
884
885	mov	rax,QWORD PTR[8+rsi]
886	mul	rbp
887	add	r15,rax
888	adc	rdx,0
889	add	r15,rbx
890	adc	rdx,0
891	mov	rbx,rdx
892
893	mov	rax,QWORD PTR[16+rsi]
894	mul	rbp
895	add	r8,rax
896	adc	rdx,0
897	add	r8,rbx
898	adc	rdx,0
899	mov	rbx,rdx
900
901	mov	rax,QWORD PTR[24+rsi]
902	mul	rbp
903	add	r9,rax
904	adc	rdx,0
905	add	r9,rbx
906	adc	rdx,0
907	mov	rbx,rdx
908
909	mov	rax,QWORD PTR[32+rsi]
910	mul	rbp
911	add	r10,rax
912	adc	rdx,0
913	add	r10,rbx
914	adc	rdx,0
915	mov	rbx,rdx
916
917	mov	rax,QWORD PTR[40+rsi]
918	mul	rbp
919	add	r11,rax
920	adc	rdx,0
921	add	r11,rbx
922	adc	rdx,0
923	mov	rbx,rdx
924
925	mov	rax,QWORD PTR[48+rsi]
926	mul	rbp
927	add	r12,rax
928	adc	rdx,0
929	add	r12,rbx
930	adc	rdx,0
931	mov	rbx,rdx
932
933	mov	rax,QWORD PTR[56+rsi]
934	mul	rbp
935	add	r13,rax
936	adc	rdx,0
937	add	r13,rbx
938	adc	rdx,0
939	mov	r14,rdx
940	mov	rbp,QWORD PTR[48+rdi]
941	mov	rax,QWORD PTR[rsi]
942	mul	rbp
943	add	r15,rax
944	adc	rdx,0
945	mov	QWORD PTR[568+rsp],r15
946	mov	rbx,rdx
947
948	mov	rax,QWORD PTR[8+rsi]
949	mul	rbp
950	add	r8,rax
951	adc	rdx,0
952	add	r8,rbx
953	adc	rdx,0
954	mov	rbx,rdx
955
956	mov	rax,QWORD PTR[16+rsi]
957	mul	rbp
958	add	r9,rax
959	adc	rdx,0
960	add	r9,rbx
961	adc	rdx,0
962	mov	rbx,rdx
963
964	mov	rax,QWORD PTR[24+rsi]
965	mul	rbp
966	add	r10,rax
967	adc	rdx,0
968	add	r10,rbx
969	adc	rdx,0
970	mov	rbx,rdx
971
972	mov	rax,QWORD PTR[32+rsi]
973	mul	rbp
974	add	r11,rax
975	adc	rdx,0
976	add	r11,rbx
977	adc	rdx,0
978	mov	rbx,rdx
979
980	mov	rax,QWORD PTR[40+rsi]
981	mul	rbp
982	add	r12,rax
983	adc	rdx,0
984	add	r12,rbx
985	adc	rdx,0
986	mov	rbx,rdx
987
988	mov	rax,QWORD PTR[48+rsi]
989	mul	rbp
990	add	r13,rax
991	adc	rdx,0
992	add	r13,rbx
993	adc	rdx,0
994	mov	rbx,rdx
995
996	mov	rax,QWORD PTR[56+rsi]
997	mul	rbp
998	add	r14,rax
999	adc	rdx,0
1000	add	r14,rbx
1001	adc	rdx,0
1002	mov	r15,rdx
1003	mov	rbp,QWORD PTR[56+rdi]
1004	mov	rax,QWORD PTR[rsi]
1005	mul	rbp
1006	add	r8,rax
1007	adc	rdx,0
1008	mov	QWORD PTR[576+rsp],r8
1009	mov	rbx,rdx
1010
1011	mov	rax,QWORD PTR[8+rsi]
1012	mul	rbp
1013	add	r9,rax
1014	adc	rdx,0
1015	add	r9,rbx
1016	adc	rdx,0
1017	mov	rbx,rdx
1018
1019	mov	rax,QWORD PTR[16+rsi]
1020	mul	rbp
1021	add	r10,rax
1022	adc	rdx,0
1023	add	r10,rbx
1024	adc	rdx,0
1025	mov	rbx,rdx
1026
1027	mov	rax,QWORD PTR[24+rsi]
1028	mul	rbp
1029	add	r11,rax
1030	adc	rdx,0
1031	add	r11,rbx
1032	adc	rdx,0
1033	mov	rbx,rdx
1034
1035	mov	rax,QWORD PTR[32+rsi]
1036	mul	rbp
1037	add	r12,rax
1038	adc	rdx,0
1039	add	r12,rbx
1040	adc	rdx,0
1041	mov	rbx,rdx
1042
1043	mov	rax,QWORD PTR[40+rsi]
1044	mul	rbp
1045	add	r13,rax
1046	adc	rdx,0
1047	add	r13,rbx
1048	adc	rdx,0
1049	mov	rbx,rdx
1050
1051	mov	rax,QWORD PTR[48+rsi]
1052	mul	rbp
1053	add	r14,rax
1054	adc	rdx,0
1055	add	r14,rbx
1056	adc	rdx,0
1057	mov	rbx,rdx
1058
1059	mov	rax,QWORD PTR[56+rsi]
1060	mul	rbp
1061	add	r15,rax
1062	adc	rdx,0
1063	add	r15,rbx
1064	adc	rdx,0
1065	mov	r8,rdx
1066	mov	QWORD PTR[584+rsp],r9
1067	mov	QWORD PTR[592+rsp],r10
1068	mov	QWORD PTR[600+rsp],r11
1069	mov	QWORD PTR[608+rsp],r12
1070	mov	QWORD PTR[616+rsp],r13
1071	mov	QWORD PTR[624+rsp],r14
1072	mov	QWORD PTR[632+rsp],r15
1073	mov	QWORD PTR[640+rsp],r8
1074
1075
1076
1077
1078
1079	jmp	mont_reduce
1080
1081
1082mont_mul_a3b	ENDP
1083
1084ALIGN	16
1085sqr_reduce	PROC PRIVATE
1086	mov	rcx,QWORD PTR[16+rsp]
1087
1088
1089
1090	mov	rbx,r10
1091
1092	mov	rax,r11
1093	mul	rbx
1094	mov	QWORD PTR[528+rsp],rax
1095	mov	r10,rdx
1096	mov	rax,r12
1097	mul	rbx
1098	add	r10,rax
1099	adc	rdx,0
1100	mov	r11,rdx
1101	mov	rax,r13
1102	mul	rbx
1103	add	r11,rax
1104	adc	rdx,0
1105	mov	r12,rdx
1106	mov	rax,r14
1107	mul	rbx
1108	add	r12,rax
1109	adc	rdx,0
1110	mov	r13,rdx
1111	mov	rax,r15
1112	mul	rbx
1113	add	r13,rax
1114	adc	rdx,0
1115	mov	r14,rdx
1116	mov	rax,r8
1117	mul	rbx
1118	add	r14,rax
1119	adc	rdx,0
1120	mov	r15,rdx
1121	mov	rax,r9
1122	mul	rbx
1123	add	r15,rax
1124	adc	rdx,0
1125	mov	rsi,rdx
1126
1127	mov	QWORD PTR[536+rsp],r10
1128
1129
1130
1131
1132
1133	mov	rbx,QWORD PTR[8+rcx]
1134
1135	mov	rax,QWORD PTR[16+rcx]
1136	mul	rbx
1137	add	r11,rax
1138	adc	rdx,0
1139	mov	QWORD PTR[544+rsp],r11
1140
1141	mov	r10,rdx
1142	mov	rax,QWORD PTR[24+rcx]
1143	mul	rbx
1144	add	r12,rax
1145	adc	rdx,0
1146	add	r12,r10
1147	adc	rdx,0
1148	mov	QWORD PTR[552+rsp],r12
1149
1150	mov	r10,rdx
1151	mov	rax,QWORD PTR[32+rcx]
1152	mul	rbx
1153	add	r13,rax
1154	adc	rdx,0
1155	add	r13,r10
1156	adc	rdx,0
1157
1158	mov	r10,rdx
1159	mov	rax,QWORD PTR[40+rcx]
1160	mul	rbx
1161	add	r14,rax
1162	adc	rdx,0
1163	add	r14,r10
1164	adc	rdx,0
1165
1166	mov	r10,rdx
1167	mov	rax,r8
1168	mul	rbx
1169	add	r15,rax
1170	adc	rdx,0
1171	add	r15,r10
1172	adc	rdx,0
1173
1174	mov	r10,rdx
1175	mov	rax,r9
1176	mul	rbx
1177	add	rsi,rax
1178	adc	rdx,0
1179	add	rsi,r10
1180	adc	rdx,0
1181
1182	mov	r11,rdx
1183
1184
1185
1186
1187	mov	rbx,QWORD PTR[16+rcx]
1188
1189	mov	rax,QWORD PTR[24+rcx]
1190	mul	rbx
1191	add	r13,rax
1192	adc	rdx,0
1193	mov	QWORD PTR[560+rsp],r13
1194
1195	mov	r10,rdx
1196	mov	rax,QWORD PTR[32+rcx]
1197	mul	rbx
1198	add	r14,rax
1199	adc	rdx,0
1200	add	r14,r10
1201	adc	rdx,0
1202	mov	QWORD PTR[568+rsp],r14
1203
1204	mov	r10,rdx
1205	mov	rax,QWORD PTR[40+rcx]
1206	mul	rbx
1207	add	r15,rax
1208	adc	rdx,0
1209	add	r15,r10
1210	adc	rdx,0
1211
1212	mov	r10,rdx
1213	mov	rax,r8
1214	mul	rbx
1215	add	rsi,rax
1216	adc	rdx,0
1217	add	rsi,r10
1218	adc	rdx,0
1219
1220	mov	r10,rdx
1221	mov	rax,r9
1222	mul	rbx
1223	add	r11,rax
1224	adc	rdx,0
1225	add	r11,r10
1226	adc	rdx,0
1227
1228	mov	r12,rdx
1229
1230
1231
1232
1233
1234	mov	rbx,QWORD PTR[24+rcx]
1235
1236	mov	rax,QWORD PTR[32+rcx]
1237	mul	rbx
1238	add	r15,rax
1239	adc	rdx,0
1240	mov	QWORD PTR[576+rsp],r15
1241
1242	mov	r10,rdx
1243	mov	rax,QWORD PTR[40+rcx]
1244	mul	rbx
1245	add	rsi,rax
1246	adc	rdx,0
1247	add	rsi,r10
1248	adc	rdx,0
1249	mov	QWORD PTR[584+rsp],rsi
1250
1251	mov	r10,rdx
1252	mov	rax,r8
1253	mul	rbx
1254	add	r11,rax
1255	adc	rdx,0
1256	add	r11,r10
1257	adc	rdx,0
1258
1259	mov	r10,rdx
1260	mov	rax,r9
1261	mul	rbx
1262	add	r12,rax
1263	adc	rdx,0
1264	add	r12,r10
1265	adc	rdx,0
1266
1267	mov	r15,rdx
1268
1269
1270
1271
1272	mov	rbx,QWORD PTR[32+rcx]
1273
1274	mov	rax,QWORD PTR[40+rcx]
1275	mul	rbx
1276	add	r11,rax
1277	adc	rdx,0
1278	mov	QWORD PTR[592+rsp],r11
1279
1280	mov	r10,rdx
1281	mov	rax,r8
1282	mul	rbx
1283	add	r12,rax
1284	adc	rdx,0
1285	add	r12,r10
1286	adc	rdx,0
1287	mov	QWORD PTR[600+rsp],r12
1288
1289	mov	r10,rdx
1290	mov	rax,r9
1291	mul	rbx
1292	add	r15,rax
1293	adc	rdx,0
1294	add	r15,r10
1295	adc	rdx,0
1296
1297	mov	r11,rdx
1298
1299
1300
1301
1302	mov	rbx,QWORD PTR[40+rcx]
1303
1304	mov	rax,r8
1305	mul	rbx
1306	add	r15,rax
1307	adc	rdx,0
1308	mov	QWORD PTR[608+rsp],r15
1309
1310	mov	r10,rdx
1311	mov	rax,r9
1312	mul	rbx
1313	add	r11,rax
1314	adc	rdx,0
1315	add	r11,r10
1316	adc	rdx,0
1317	mov	QWORD PTR[616+rsp],r11
1318
1319	mov	r12,rdx
1320
1321
1322
1323
1324	mov	rbx,r8
1325
1326	mov	rax,r9
1327	mul	rbx
1328	add	r12,rax
1329	adc	rdx,0
1330	mov	QWORD PTR[624+rsp],r12
1331
1332	mov	QWORD PTR[632+rsp],rdx
1333
1334
1335	mov	r10,QWORD PTR[528+rsp]
1336	mov	r11,QWORD PTR[536+rsp]
1337	mov	r12,QWORD PTR[544+rsp]
1338	mov	r13,QWORD PTR[552+rsp]
1339	mov	r14,QWORD PTR[560+rsp]
1340	mov	r15,QWORD PTR[568+rsp]
1341
1342	mov	rax,QWORD PTR[24+rcx]
1343	mul	rax
1344	mov	rdi,rax
1345	mov	r8,rdx
1346
1347	add	r10,r10
1348	adc	r11,r11
1349	adc	r12,r12
1350	adc	r13,r13
1351	adc	r14,r14
1352	adc	r15,r15
1353	adc	r8,0
1354
1355	mov	rax,QWORD PTR[rcx]
1356	mul	rax
1357	mov	QWORD PTR[520+rsp],rax
1358	mov	rbx,rdx
1359
1360	mov	rax,QWORD PTR[8+rcx]
1361	mul	rax
1362
1363	add	r10,rbx
1364	adc	r11,rax
1365	adc	rdx,0
1366
1367	mov	rbx,rdx
1368	mov	QWORD PTR[528+rsp],r10
1369	mov	QWORD PTR[536+rsp],r11
1370
1371	mov	rax,QWORD PTR[16+rcx]
1372	mul	rax
1373
1374	add	r12,rbx
1375	adc	r13,rax
1376	adc	rdx,0
1377
1378	mov	rbx,rdx
1379
1380	mov	QWORD PTR[544+rsp],r12
1381	mov	QWORD PTR[552+rsp],r13
1382
1383	xor	rbp,rbp
1384	add	r14,rbx
1385	adc	r15,rdi
1386	adc	rbp,0
1387
1388	mov	QWORD PTR[560+rsp],r14
1389	mov	QWORD PTR[568+rsp],r15
1390
1391
1392
1393
1394	mov	r10,QWORD PTR[576+rsp]
1395	mov	r11,QWORD PTR[584+rsp]
1396	mov	r12,QWORD PTR[592+rsp]
1397	mov	r13,QWORD PTR[600+rsp]
1398	mov	r14,QWORD PTR[608+rsp]
1399	mov	r15,QWORD PTR[616+rsp]
1400	mov	rdi,QWORD PTR[624+rsp]
1401	mov	rsi,QWORD PTR[632+rsp]
1402
1403	mov	rax,r9
1404	mul	rax
1405	mov	r9,rax
1406	mov	rbx,rdx
1407
1408	add	r10,r10
1409	adc	r11,r11
1410	adc	r12,r12
1411	adc	r13,r13
1412	adc	r14,r14
1413	adc	r15,r15
1414	adc	rdi,rdi
1415	adc	rsi,rsi
1416	adc	rbx,0
1417
1418	add	r10,rbp
1419
1420	mov	rax,QWORD PTR[32+rcx]
1421	mul	rax
1422
1423	add	r10,r8
1424	adc	r11,rax
1425	adc	rdx,0
1426
1427	mov	rbp,rdx
1428
1429	mov	QWORD PTR[576+rsp],r10
1430	mov	QWORD PTR[584+rsp],r11
1431
1432	mov	rax,QWORD PTR[40+rcx]
1433	mul	rax
1434
1435	add	r12,rbp
1436	adc	r13,rax
1437	adc	rdx,0
1438
1439	mov	rbp,rdx
1440
1441	mov	QWORD PTR[592+rsp],r12
1442	mov	QWORD PTR[600+rsp],r13
1443
1444	mov	rax,QWORD PTR[48+rcx]
1445	mul	rax
1446
1447	add	r14,rbp
1448	adc	r15,rax
1449	adc	rdx,0
1450
1451	mov	QWORD PTR[608+rsp],r14
1452	mov	QWORD PTR[616+rsp],r15
1453
1454	add	rdi,rdx
1455	adc	rsi,r9
1456	adc	rbx,0
1457
1458	mov	QWORD PTR[624+rsp],rdi
1459	mov	QWORD PTR[632+rsp],rsi
1460	mov	QWORD PTR[640+rsp],rbx
1461
1462	jmp	mont_reduce
1463
1464
1465sqr_reduce	ENDP
1466PUBLIC	mod_exp_512
1467
1468mod_exp_512	PROC PUBLIC
1469	mov	QWORD PTR[8+rsp],rdi	;WIN64 prologue
1470	mov	QWORD PTR[16+rsp],rsi
1471	mov	rax,rsp
1472$L$SEH_begin_mod_exp_512::
1473	mov	rdi,rcx
1474	mov	rsi,rdx
1475	mov	rdx,r8
1476	mov	rcx,r9
1477
1478
1479	push	rbp
1480	push	rbx
1481	push	r12
1482	push	r13
1483	push	r14
1484	push	r15
1485
1486
1487	mov	r8,rsp
1488	sub	rsp,2688
1489	and	rsp,-64
1490
1491
1492	mov	QWORD PTR[rsp],r8
1493	mov	QWORD PTR[8+rsp],rdi
1494	mov	QWORD PTR[16+rsp],rsi
1495	mov	QWORD PTR[24+rsp],rcx
1496$L$body::
1497
1498
1499
1500	pxor	xmm4,xmm4
1501	movdqu	xmm0,XMMWORD PTR[rsi]
1502	movdqu	xmm1,XMMWORD PTR[16+rsi]
1503	movdqu	xmm2,XMMWORD PTR[32+rsi]
1504	movdqu	xmm3,XMMWORD PTR[48+rsi]
1505	movdqa	XMMWORD PTR[512+rsp],xmm4
1506	movdqa	XMMWORD PTR[528+rsp],xmm4
1507	movdqa	XMMWORD PTR[608+rsp],xmm4
1508	movdqa	XMMWORD PTR[624+rsp],xmm4
1509	movdqa	XMMWORD PTR[544+rsp],xmm0
1510	movdqa	XMMWORD PTR[560+rsp],xmm1
1511	movdqa	XMMWORD PTR[576+rsp],xmm2
1512	movdqa	XMMWORD PTR[592+rsp],xmm3
1513
1514
1515	movdqu	xmm0,XMMWORD PTR[rdx]
1516	movdqu	xmm1,XMMWORD PTR[16+rdx]
1517	movdqu	xmm2,XMMWORD PTR[32+rdx]
1518	movdqu	xmm3,XMMWORD PTR[48+rdx]
1519
1520	lea	rbx,QWORD PTR[384+rsp]
1521	mov	QWORD PTR[136+rsp],rbx
1522	call	mont_reduce
1523
1524
1525	lea	rcx,QWORD PTR[448+rsp]
1526	xor	rax,rax
1527	mov	QWORD PTR[rcx],rax
1528	mov	QWORD PTR[8+rcx],rax
1529	mov	QWORD PTR[24+rcx],rax
1530	mov	QWORD PTR[32+rcx],rax
1531	mov	QWORD PTR[40+rcx],rax
1532	mov	QWORD PTR[48+rcx],rax
1533	mov	QWORD PTR[56+rcx],rax
1534	mov	QWORD PTR[128+rsp],rax
1535	mov	QWORD PTR[16+rcx],1
1536
1537	lea	rbp,QWORD PTR[640+rsp]
1538	mov	rsi,rcx
1539	mov	rdi,rbp
1540	mov	rax,8
1541loop_0::
1542	mov	rbx,QWORD PTR[rcx]
1543	mov	WORD PTR[rdi],bx
1544	shr	rbx,16
1545	mov	WORD PTR[64+rdi],bx
1546	shr	rbx,16
1547	mov	WORD PTR[128+rdi],bx
1548	shr	rbx,16
1549	mov	WORD PTR[192+rdi],bx
1550	lea	rcx,QWORD PTR[8+rcx]
1551	lea	rdi,QWORD PTR[256+rdi]
1552	dec	rax
1553	jnz	loop_0
1554	mov	rax,31
1555	mov	QWORD PTR[32+rsp],rax
1556	mov	QWORD PTR[40+rsp],rbp
1557
1558	mov	QWORD PTR[136+rsp],rsi
1559	mov	r10,QWORD PTR[rsi]
1560	mov	r11,QWORD PTR[8+rsi]
1561	mov	r12,QWORD PTR[16+rsi]
1562	mov	r13,QWORD PTR[24+rsi]
1563	mov	r14,QWORD PTR[32+rsi]
1564	mov	r15,QWORD PTR[40+rsi]
1565	mov	r8,QWORD PTR[48+rsi]
1566	mov	r9,QWORD PTR[56+rsi]
1567init_loop::
1568	lea	rdi,QWORD PTR[384+rsp]
1569	call	mont_mul_a3b
1570	lea	rsi,QWORD PTR[448+rsp]
1571	mov	rbp,QWORD PTR[40+rsp]
1572	add	rbp,2
1573	mov	QWORD PTR[40+rsp],rbp
1574	mov	rcx,rsi
1575	mov	rax,8
1576loop_1::
1577	mov	rbx,QWORD PTR[rcx]
1578	mov	WORD PTR[rbp],bx
1579	shr	rbx,16
1580	mov	WORD PTR[64+rbp],bx
1581	shr	rbx,16
1582	mov	WORD PTR[128+rbp],bx
1583	shr	rbx,16
1584	mov	WORD PTR[192+rbp],bx
1585	lea	rcx,QWORD PTR[8+rcx]
1586	lea	rbp,QWORD PTR[256+rbp]
1587	dec	rax
1588	jnz	loop_1
1589	mov	rax,QWORD PTR[32+rsp]
1590	sub	rax,1
1591	mov	QWORD PTR[32+rsp],rax
1592	jne	init_loop
1593
1594
1595
1596	movdqa	XMMWORD PTR[64+rsp],xmm0
1597	movdqa	XMMWORD PTR[80+rsp],xmm1
1598	movdqa	XMMWORD PTR[96+rsp],xmm2
1599	movdqa	XMMWORD PTR[112+rsp],xmm3
1600
1601
1602
1603
1604
1605	mov	eax,DWORD PTR[126+rsp]
1606	mov	rdx,rax
1607	shr	rax,11
1608	and	edx,007FFh
1609	mov	DWORD PTR[126+rsp],edx
1610	lea	rsi,QWORD PTR[640+rax*2+rsp]
1611	mov	rdx,QWORD PTR[8+rsp]
1612	mov	rbp,4
1613loop_2::
1614	movzx	rbx,WORD PTR[192+rsi]
1615	movzx	rax,WORD PTR[448+rsi]
1616	shl	rbx,16
1617	shl	rax,16
1618	mov	bx,WORD PTR[128+rsi]
1619	mov	ax,WORD PTR[384+rsi]
1620	shl	rbx,16
1621	shl	rax,16
1622	mov	bx,WORD PTR[64+rsi]
1623	mov	ax,WORD PTR[320+rsi]
1624	shl	rbx,16
1625	shl	rax,16
1626	mov	bx,WORD PTR[rsi]
1627	mov	ax,WORD PTR[256+rsi]
1628	mov	QWORD PTR[rdx],rbx
1629	mov	QWORD PTR[8+rdx],rax
1630	lea	rsi,QWORD PTR[512+rsi]
1631	lea	rdx,QWORD PTR[16+rdx]
1632	sub	rbp,1
1633	jnz	loop_2
1634	mov	QWORD PTR[48+rsp],505
1635
1636	mov	rcx,QWORD PTR[8+rsp]
1637	mov	QWORD PTR[136+rsp],rcx
1638	mov	r10,QWORD PTR[rcx]
1639	mov	r11,QWORD PTR[8+rcx]
1640	mov	r12,QWORD PTR[16+rcx]
1641	mov	r13,QWORD PTR[24+rcx]
1642	mov	r14,QWORD PTR[32+rcx]
1643	mov	r15,QWORD PTR[40+rcx]
1644	mov	r8,QWORD PTR[48+rcx]
1645	mov	r9,QWORD PTR[56+rcx]
1646	jmp	sqr_2
1647
1648main_loop_a3b::
1649	call	sqr_reduce
1650	call	sqr_reduce
1651	call	sqr_reduce
1652sqr_2::
1653	call	sqr_reduce
1654	call	sqr_reduce
1655
1656
1657
1658	mov	rcx,QWORD PTR[48+rsp]
1659	mov	rax,rcx
1660	shr	rax,4
1661	mov	edx,DWORD PTR[64+rax*2+rsp]
1662	and	rcx,15
1663	shr	rdx,cl
1664	and	rdx,01Fh
1665
1666	lea	rsi,QWORD PTR[640+rdx*2+rsp]
1667	lea	rdx,QWORD PTR[448+rsp]
1668	mov	rdi,rdx
1669	mov	rbp,4
1670loop_3::
1671	movzx	rbx,WORD PTR[192+rsi]
1672	movzx	rax,WORD PTR[448+rsi]
1673	shl	rbx,16
1674	shl	rax,16
1675	mov	bx,WORD PTR[128+rsi]
1676	mov	ax,WORD PTR[384+rsi]
1677	shl	rbx,16
1678	shl	rax,16
1679	mov	bx,WORD PTR[64+rsi]
1680	mov	ax,WORD PTR[320+rsi]
1681	shl	rbx,16
1682	shl	rax,16
1683	mov	bx,WORD PTR[rsi]
1684	mov	ax,WORD PTR[256+rsi]
1685	mov	QWORD PTR[rdx],rbx
1686	mov	QWORD PTR[8+rdx],rax
1687	lea	rsi,QWORD PTR[512+rsi]
1688	lea	rdx,QWORD PTR[16+rdx]
1689	sub	rbp,1
1690	jnz	loop_3
1691	mov	rsi,QWORD PTR[8+rsp]
1692	call	mont_mul_a3b
1693
1694
1695
1696	mov	rcx,QWORD PTR[48+rsp]
1697	sub	rcx,5
1698	mov	QWORD PTR[48+rsp],rcx
1699	jge	main_loop_a3b
1700
1701
1702
1703end_main_loop_a3b::
1704
1705
1706	mov	rdx,QWORD PTR[8+rsp]
1707	pxor	xmm4,xmm4
1708	movdqu	xmm0,XMMWORD PTR[rdx]
1709	movdqu	xmm1,XMMWORD PTR[16+rdx]
1710	movdqu	xmm2,XMMWORD PTR[32+rdx]
1711	movdqu	xmm3,XMMWORD PTR[48+rdx]
1712	movdqa	XMMWORD PTR[576+rsp],xmm4
1713	movdqa	XMMWORD PTR[592+rsp],xmm4
1714	movdqa	XMMWORD PTR[608+rsp],xmm4
1715	movdqa	XMMWORD PTR[624+rsp],xmm4
1716	movdqa	XMMWORD PTR[512+rsp],xmm0
1717	movdqa	XMMWORD PTR[528+rsp],xmm1
1718	movdqa	XMMWORD PTR[544+rsp],xmm2
1719	movdqa	XMMWORD PTR[560+rsp],xmm3
1720	call	mont_reduce
1721
1722
1723
1724	mov	rax,QWORD PTR[8+rsp]
1725	mov	r8,QWORD PTR[rax]
1726	mov	r9,QWORD PTR[8+rax]
1727	mov	r10,QWORD PTR[16+rax]
1728	mov	r11,QWORD PTR[24+rax]
1729	mov	r12,QWORD PTR[32+rax]
1730	mov	r13,QWORD PTR[40+rax]
1731	mov	r14,QWORD PTR[48+rax]
1732	mov	r15,QWORD PTR[56+rax]
1733
1734
1735	mov	rbx,QWORD PTR[24+rsp]
1736	add	rbx,512
1737
1738	sub	r8,QWORD PTR[rbx]
1739	sbb	r9,QWORD PTR[8+rbx]
1740	sbb	r10,QWORD PTR[16+rbx]
1741	sbb	r11,QWORD PTR[24+rbx]
1742	sbb	r12,QWORD PTR[32+rbx]
1743	sbb	r13,QWORD PTR[40+rbx]
1744	sbb	r14,QWORD PTR[48+rbx]
1745	sbb	r15,QWORD PTR[56+rbx]
1746
1747
1748	mov	rsi,QWORD PTR[rax]
1749	mov	rdi,QWORD PTR[8+rax]
1750	mov	rcx,QWORD PTR[16+rax]
1751	mov	rdx,QWORD PTR[24+rax]
1752	cmovnc	rsi,r8
1753	cmovnc	rdi,r9
1754	cmovnc	rcx,r10
1755	cmovnc	rdx,r11
1756	mov	QWORD PTR[rax],rsi
1757	mov	QWORD PTR[8+rax],rdi
1758	mov	QWORD PTR[16+rax],rcx
1759	mov	QWORD PTR[24+rax],rdx
1760
1761	mov	rsi,QWORD PTR[32+rax]
1762	mov	rdi,QWORD PTR[40+rax]
1763	mov	rcx,QWORD PTR[48+rax]
1764	mov	rdx,QWORD PTR[56+rax]
1765	cmovnc	rsi,r12
1766	cmovnc	rdi,r13
1767	cmovnc	rcx,r14
1768	cmovnc	rdx,r15
1769	mov	QWORD PTR[32+rax],rsi
1770	mov	QWORD PTR[40+rax],rdi
1771	mov	QWORD PTR[48+rax],rcx
1772	mov	QWORD PTR[56+rax],rdx
1773
1774	mov	rsi,QWORD PTR[rsp]
1775	mov	r15,QWORD PTR[rsi]
1776	mov	r14,QWORD PTR[8+rsi]
1777	mov	r13,QWORD PTR[16+rsi]
1778	mov	r12,QWORD PTR[24+rsi]
1779	mov	rbx,QWORD PTR[32+rsi]
1780	mov	rbp,QWORD PTR[40+rsi]
1781	lea	rsp,QWORD PTR[48+rsi]
1782$L$epilogue::
1783	mov	rdi,QWORD PTR[8+rsp]	;WIN64 epilogue
1784	mov	rsi,QWORD PTR[16+rsp]
1785	DB	0F3h,0C3h		;repret
1786$L$SEH_end_mod_exp_512::
1787mod_exp_512	ENDP
1788EXTERN	__imp_RtlVirtualUnwind:NEAR
1789
1790ALIGN	16
1791mod_exp_512_se_handler	PROC PRIVATE
1792	push	rsi
1793	push	rdi
1794	push	rbx
1795	push	rbp
1796	push	r12
1797	push	r13
1798	push	r14
1799	push	r15
1800	pushfq
1801	sub	rsp,64
1802
1803	mov	rax,QWORD PTR[120+r8]
1804	mov	rbx,QWORD PTR[248+r8]
1805
1806	lea	r10,QWORD PTR[$L$body]
1807	cmp	rbx,r10
1808	jb	$L$in_prologue
1809
1810	mov	rax,QWORD PTR[152+r8]
1811
1812	lea	r10,QWORD PTR[$L$epilogue]
1813	cmp	rbx,r10
1814	jae	$L$in_prologue
1815
1816	mov	rax,QWORD PTR[rax]
1817
1818	mov	rbx,QWORD PTR[32+rax]
1819	mov	rbp,QWORD PTR[40+rax]
1820	mov	r12,QWORD PTR[24+rax]
1821	mov	r13,QWORD PTR[16+rax]
1822	mov	r14,QWORD PTR[8+rax]
1823	mov	r15,QWORD PTR[rax]
1824	lea	rax,QWORD PTR[48+rax]
1825	mov	QWORD PTR[144+r8],rbx
1826	mov	QWORD PTR[160+r8],rbp
1827	mov	QWORD PTR[216+r8],r12
1828	mov	QWORD PTR[224+r8],r13
1829	mov	QWORD PTR[232+r8],r14
1830	mov	QWORD PTR[240+r8],r15
1831
1832$L$in_prologue::
1833	mov	rdi,QWORD PTR[8+rax]
1834	mov	rsi,QWORD PTR[16+rax]
1835	mov	QWORD PTR[152+r8],rax
1836	mov	QWORD PTR[168+r8],rsi
1837	mov	QWORD PTR[176+r8],rdi
1838
1839	mov	rdi,QWORD PTR[40+r9]
1840	mov	rsi,r8
1841	mov	ecx,154
1842	DD	0a548f3fch
1843
1844	mov	rsi,r9
1845	xor	rcx,rcx
1846	mov	rdx,QWORD PTR[8+rsi]
1847	mov	r8,QWORD PTR[rsi]
1848	mov	r9,QWORD PTR[16+rsi]
1849	mov	r10,QWORD PTR[40+rsi]
1850	lea	r11,QWORD PTR[56+rsi]
1851	lea	r12,QWORD PTR[24+rsi]
1852	mov	QWORD PTR[32+rsp],r10
1853	mov	QWORD PTR[40+rsp],r11
1854	mov	QWORD PTR[48+rsp],r12
1855	mov	QWORD PTR[56+rsp],rcx
1856	call	QWORD PTR[__imp_RtlVirtualUnwind]
1857
1858	mov	eax,1
1859	add	rsp,64
1860	popfq
1861	pop	r15
1862	pop	r14
1863	pop	r13
1864	pop	r12
1865	pop	rbp
1866	pop	rbx
1867	pop	rdi
1868	pop	rsi
1869	DB	0F3h,0C3h		;repret
1870mod_exp_512_se_handler	ENDP
1871
1872.text$	ENDS
1873.pdata	SEGMENT READONLY ALIGN(4)
1874ALIGN	4
1875	DD	imagerel $L$SEH_begin_mod_exp_512
1876	DD	imagerel $L$SEH_end_mod_exp_512
1877	DD	imagerel $L$SEH_info_mod_exp_512
1878
1879.pdata	ENDS
1880.xdata	SEGMENT READONLY ALIGN(8)
1881ALIGN	8
1882$L$SEH_info_mod_exp_512::
1883DB	9,0,0,0
1884	DD	imagerel mod_exp_512_se_handler
1885
1886.xdata	ENDS
1887END
1888