1OPTION	DOTNAME
2.text$	SEGMENT ALIGN(256) 'CODE'
3
4EXTERN	OPENSSL_ia32cap_P:NEAR
5
6PUBLIC	bn_mul_mont_gather5
7
8ALIGN	64
9bn_mul_mont_gather5	PROC PUBLIC
10	mov	QWORD PTR[8+rsp],rdi	;WIN64 prologue
11	mov	QWORD PTR[16+rsp],rsi
12	mov	rax,rsp
13$L$SEH_begin_bn_mul_mont_gather5::
14	mov	rdi,rcx
15	mov	rsi,rdx
16	mov	rdx,r8
17	mov	rcx,r9
18	mov	r8,QWORD PTR[40+rsp]
19	mov	r9,QWORD PTR[48+rsp]
20
21
22	test	r9d,7
23	jnz	$L$mul_enter
24	jmp	$L$mul4x_enter
25
26ALIGN	16
27$L$mul_enter::
28	mov	r9d,r9d
29	mov	rax,rsp
30	mov	r10d,DWORD PTR[56+rsp]
31	push	rbx
32	push	rbp
33	push	r12
34	push	r13
35	push	r14
36	push	r15
37	lea	rsp,QWORD PTR[((-40))+rsp]
38	movaps	XMMWORD PTR[rsp],xmm6
39	movaps	XMMWORD PTR[16+rsp],xmm7
40	lea	r11,QWORD PTR[2+r9]
41	neg	r11
42	lea	rsp,QWORD PTR[r11*8+rsp]
43	and	rsp,-1024
44
45	mov	QWORD PTR[8+r9*8+rsp],rax
46$L$mul_body::
47	mov	r12,rdx
48	mov	r11,r10
49	shr	r10,3
50	and	r11,7
51	not	r10
52	lea	rax,QWORD PTR[$L$magic_masks]
53	and	r10,3
54	lea	r12,QWORD PTR[96+r11*8+r12]
55	movq	xmm4,QWORD PTR[r10*8+rax]
56	movq	xmm5,QWORD PTR[8+r10*8+rax]
57	movq	xmm6,QWORD PTR[16+r10*8+rax]
58	movq	xmm7,QWORD PTR[24+r10*8+rax]
59
60	movq	xmm0,QWORD PTR[(((-96)))+r12]
61	movq	xmm1,QWORD PTR[((-32))+r12]
62	pand	xmm0,xmm4
63	movq	xmm2,QWORD PTR[32+r12]
64	pand	xmm1,xmm5
65	movq	xmm3,QWORD PTR[96+r12]
66	pand	xmm2,xmm6
67	por	xmm0,xmm1
68	pand	xmm3,xmm7
69	por	xmm0,xmm2
70	lea	r12,QWORD PTR[256+r12]
71	por	xmm0,xmm3
72
73DB	102,72,15,126,195
74
75	mov	r8,QWORD PTR[r8]
76	mov	rax,QWORD PTR[rsi]
77
78	xor	r14,r14
79	xor	r15,r15
80
81	movq	xmm0,QWORD PTR[(((-96)))+r12]
82	movq	xmm1,QWORD PTR[((-32))+r12]
83	pand	xmm0,xmm4
84	movq	xmm2,QWORD PTR[32+r12]
85	pand	xmm1,xmm5
86
87	mov	rbp,r8
88	mul	rbx
89	mov	r10,rax
90	mov	rax,QWORD PTR[rcx]
91
92	movq	xmm3,QWORD PTR[96+r12]
93	pand	xmm2,xmm6
94	por	xmm0,xmm1
95	pand	xmm3,xmm7
96
97	imul	rbp,r10
98	mov	r11,rdx
99
100	por	xmm0,xmm2
101	lea	r12,QWORD PTR[256+r12]
102	por	xmm0,xmm3
103
104	mul	rbp
105	add	r10,rax
106	mov	rax,QWORD PTR[8+rsi]
107	adc	rdx,0
108	mov	r13,rdx
109
110	lea	r15,QWORD PTR[1+r15]
111	jmp	$L$1st_enter
112
113ALIGN	16
114$L$1st::
115	add	r13,rax
116	mov	rax,QWORD PTR[r15*8+rsi]
117	adc	rdx,0
118	add	r13,r11
119	mov	r11,r10
120	adc	rdx,0
121	mov	QWORD PTR[((-16))+r15*8+rsp],r13
122	mov	r13,rdx
123
124$L$1st_enter::
125	mul	rbx
126	add	r11,rax
127	mov	rax,QWORD PTR[r15*8+rcx]
128	adc	rdx,0
129	lea	r15,QWORD PTR[1+r15]
130	mov	r10,rdx
131
132	mul	rbp
133	cmp	r15,r9
134	jne	$L$1st
135
136DB	102,72,15,126,195
137
138	add	r13,rax
139	mov	rax,QWORD PTR[rsi]
140	adc	rdx,0
141	add	r13,r11
142	adc	rdx,0
143	mov	QWORD PTR[((-16))+r15*8+rsp],r13
144	mov	r13,rdx
145	mov	r11,r10
146
147	xor	rdx,rdx
148	add	r13,r11
149	adc	rdx,0
150	mov	QWORD PTR[((-8))+r9*8+rsp],r13
151	mov	QWORD PTR[r9*8+rsp],rdx
152
153	lea	r14,QWORD PTR[1+r14]
154	jmp	$L$outer
155ALIGN	16
156$L$outer::
157	xor	r15,r15
158	mov	rbp,r8
159	mov	r10,QWORD PTR[rsp]
160
161	movq	xmm0,QWORD PTR[(((-96)))+r12]
162	movq	xmm1,QWORD PTR[((-32))+r12]
163	pand	xmm0,xmm4
164	movq	xmm2,QWORD PTR[32+r12]
165	pand	xmm1,xmm5
166
167	mul	rbx
168	add	r10,rax
169	mov	rax,QWORD PTR[rcx]
170	adc	rdx,0
171
172	movq	xmm3,QWORD PTR[96+r12]
173	pand	xmm2,xmm6
174	por	xmm0,xmm1
175	pand	xmm3,xmm7
176
177	imul	rbp,r10
178	mov	r11,rdx
179
180	por	xmm0,xmm2
181	lea	r12,QWORD PTR[256+r12]
182	por	xmm0,xmm3
183
184	mul	rbp
185	add	r10,rax
186	mov	rax,QWORD PTR[8+rsi]
187	adc	rdx,0
188	mov	r10,QWORD PTR[8+rsp]
189	mov	r13,rdx
190
191	lea	r15,QWORD PTR[1+r15]
192	jmp	$L$inner_enter
193
194ALIGN	16
195$L$inner::
196	add	r13,rax
197	mov	rax,QWORD PTR[r15*8+rsi]
198	adc	rdx,0
199	add	r13,r10
200	mov	r10,QWORD PTR[r15*8+rsp]
201	adc	rdx,0
202	mov	QWORD PTR[((-16))+r15*8+rsp],r13
203	mov	r13,rdx
204
205$L$inner_enter::
206	mul	rbx
207	add	r11,rax
208	mov	rax,QWORD PTR[r15*8+rcx]
209	adc	rdx,0
210	add	r10,r11
211	mov	r11,rdx
212	adc	r11,0
213	lea	r15,QWORD PTR[1+r15]
214
215	mul	rbp
216	cmp	r15,r9
217	jne	$L$inner
218
219DB	102,72,15,126,195
220
221	add	r13,rax
222	mov	rax,QWORD PTR[rsi]
223	adc	rdx,0
224	add	r13,r10
225	mov	r10,QWORD PTR[r15*8+rsp]
226	adc	rdx,0
227	mov	QWORD PTR[((-16))+r15*8+rsp],r13
228	mov	r13,rdx
229
230	xor	rdx,rdx
231	add	r13,r11
232	adc	rdx,0
233	add	r13,r10
234	adc	rdx,0
235	mov	QWORD PTR[((-8))+r9*8+rsp],r13
236	mov	QWORD PTR[r9*8+rsp],rdx
237
238	lea	r14,QWORD PTR[1+r14]
239	cmp	r14,r9
240	jb	$L$outer
241
242	xor	r14,r14
243	mov	rax,QWORD PTR[rsp]
244	lea	rsi,QWORD PTR[rsp]
245	mov	r15,r9
246	jmp	$L$sub
247ALIGN	16
248$L$sub::	sbb	rax,QWORD PTR[r14*8+rcx]
249	mov	QWORD PTR[r14*8+rdi],rax
250	mov	rax,QWORD PTR[8+r14*8+rsi]
251	lea	r14,QWORD PTR[1+r14]
252	dec	r15
253	jnz	$L$sub
254
255	sbb	rax,0
256	xor	r14,r14
257	mov	r15,r9
258ALIGN	16
259$L$copy::
260	mov	rsi,QWORD PTR[r14*8+rsp]
261	mov	rcx,QWORD PTR[r14*8+rdi]
262	xor	rsi,rcx
263	and	rsi,rax
264	xor	rsi,rcx
265	mov	QWORD PTR[r14*8+rsp],r14
266	mov	QWORD PTR[r14*8+rdi],rsi
267	lea	r14,QWORD PTR[1+r14]
268	sub	r15,1
269	jnz	$L$copy
270
271	mov	rsi,QWORD PTR[8+r9*8+rsp]
272	mov	rax,1
273	movaps	xmm6,XMMWORD PTR[((-88))+rsi]
274	movaps	xmm7,XMMWORD PTR[((-72))+rsi]
275	mov	r15,QWORD PTR[((-48))+rsi]
276	mov	r14,QWORD PTR[((-40))+rsi]
277	mov	r13,QWORD PTR[((-32))+rsi]
278	mov	r12,QWORD PTR[((-24))+rsi]
279	mov	rbp,QWORD PTR[((-16))+rsi]
280	mov	rbx,QWORD PTR[((-8))+rsi]
281	lea	rsp,QWORD PTR[rsi]
282$L$mul_epilogue::
283	mov	rdi,QWORD PTR[8+rsp]	;WIN64 epilogue
284	mov	rsi,QWORD PTR[16+rsp]
285	DB	0F3h,0C3h		;repret
286$L$SEH_end_bn_mul_mont_gather5::
287bn_mul_mont_gather5	ENDP
288
289ALIGN	32
290bn_mul4x_mont_gather5	PROC PRIVATE
291	mov	QWORD PTR[8+rsp],rdi	;WIN64 prologue
292	mov	QWORD PTR[16+rsp],rsi
293	mov	rax,rsp
294$L$SEH_begin_bn_mul4x_mont_gather5::
295	mov	rdi,rcx
296	mov	rsi,rdx
297	mov	rdx,r8
298	mov	rcx,r9
299	mov	r8,QWORD PTR[40+rsp]
300	mov	r9,QWORD PTR[48+rsp]
301
302
303$L$mul4x_enter::
304DB	067h
305	mov	rax,rsp
306	push	rbx
307	push	rbp
308	push	r12
309	push	r13
310	push	r14
311	push	r15
312	lea	rsp,QWORD PTR[((-40))+rsp]
313	movaps	XMMWORD PTR[rsp],xmm6
314	movaps	XMMWORD PTR[16+rsp],xmm7
315DB	067h
316	mov	r10d,r9d
317	shl	r9d,3
318	shl	r10d,3+2
319	neg	r9
320
321
322
323
324
325
326
327
328	lea	r11,QWORD PTR[((-64))+r9*2+rsp]
329	sub	r11,rsi
330	and	r11,4095
331	cmp	r10,r11
332	jb	$L$mul4xsp_alt
333	sub	rsp,r11
334	lea	rsp,QWORD PTR[((-64))+r9*2+rsp]
335	jmp	$L$mul4xsp_done
336
337ALIGN	32
338$L$mul4xsp_alt::
339	lea	r10,QWORD PTR[((4096-64))+r9*2]
340	lea	rsp,QWORD PTR[((-64))+r9*2+rsp]
341	sub	r11,r10
342	mov	r10,0
343	cmovc	r11,r10
344	sub	rsp,r11
345$L$mul4xsp_done::
346	and	rsp,-64
347	neg	r9
348
349	mov	QWORD PTR[40+rsp],rax
350$L$mul4x_body::
351
352	call	mul4x_internal
353
354	mov	rsi,QWORD PTR[40+rsp]
355	mov	rax,1
356	movaps	xmm6,XMMWORD PTR[((-88))+rsi]
357	movaps	xmm7,XMMWORD PTR[((-72))+rsi]
358	mov	r15,QWORD PTR[((-48))+rsi]
359	mov	r14,QWORD PTR[((-40))+rsi]
360	mov	r13,QWORD PTR[((-32))+rsi]
361	mov	r12,QWORD PTR[((-24))+rsi]
362	mov	rbp,QWORD PTR[((-16))+rsi]
363	mov	rbx,QWORD PTR[((-8))+rsi]
364	lea	rsp,QWORD PTR[rsi]
365$L$mul4x_epilogue::
366	mov	rdi,QWORD PTR[8+rsp]	;WIN64 epilogue
367	mov	rsi,QWORD PTR[16+rsp]
368	DB	0F3h,0C3h		;repret
369$L$SEH_end_bn_mul4x_mont_gather5::
370bn_mul4x_mont_gather5	ENDP
371
372
373ALIGN	32
374mul4x_internal	PROC PRIVATE
375	shl	r9,5
376	mov	r10d,DWORD PTR[56+rax]
377	lea	r13,QWORD PTR[256+r9*1+rdx]
378	shr	r9,5
379	mov	r11,r10
380	shr	r10,3
381	and	r11,7
382	not	r10
383	lea	rax,QWORD PTR[$L$magic_masks]
384	and	r10,3
385	lea	r12,QWORD PTR[96+r11*8+rdx]
386	movq	xmm4,QWORD PTR[r10*8+rax]
387	movq	xmm5,QWORD PTR[8+r10*8+rax]
388	add	r11,7
389	movq	xmm6,QWORD PTR[16+r10*8+rax]
390	movq	xmm7,QWORD PTR[24+r10*8+rax]
391	and	r11,7
392
393	movq	xmm0,QWORD PTR[(((-96)))+r12]
394	lea	r14,QWORD PTR[256+r12]
395	movq	xmm1,QWORD PTR[((-32))+r12]
396	pand	xmm0,xmm4
397	movq	xmm2,QWORD PTR[32+r12]
398	pand	xmm1,xmm5
399	movq	xmm3,QWORD PTR[96+r12]
400	pand	xmm2,xmm6
401DB	067h
402	por	xmm0,xmm1
403	movq	xmm1,QWORD PTR[((-96))+r14]
404DB	067h
405	pand	xmm3,xmm7
406DB	067h
407	por	xmm0,xmm2
408	movq	xmm2,QWORD PTR[((-32))+r14]
409DB	067h
410	pand	xmm1,xmm4
411DB	067h
412	por	xmm0,xmm3
413	movq	xmm3,QWORD PTR[32+r14]
414
415DB	102,72,15,126,195
416	movq	xmm0,QWORD PTR[96+r14]
417	mov	QWORD PTR[((16+8))+rsp],r13
418	mov	QWORD PTR[((56+8))+rsp],rdi
419
420	mov	r8,QWORD PTR[r8]
421	mov	rax,QWORD PTR[rsi]
422	lea	rsi,QWORD PTR[r9*1+rsi]
423	neg	r9
424
425	mov	rbp,r8
426	mul	rbx
427	mov	r10,rax
428	mov	rax,QWORD PTR[rcx]
429
430	pand	xmm2,xmm5
431	pand	xmm3,xmm6
432	por	xmm1,xmm2
433
434	imul	rbp,r10
435
436
437
438
439
440
441
442	lea	r14,QWORD PTR[((64+8))+r11*8+rsp]
443	mov	r11,rdx
444
445	pand	xmm0,xmm7
446	por	xmm1,xmm3
447	lea	r12,QWORD PTR[512+r12]
448	por	xmm0,xmm1
449
450	mul	rbp
451	add	r10,rax
452	mov	rax,QWORD PTR[8+r9*1+rsi]
453	adc	rdx,0
454	mov	rdi,rdx
455
456	mul	rbx
457	add	r11,rax
458	mov	rax,QWORD PTR[16+rcx]
459	adc	rdx,0
460	mov	r10,rdx
461
462	mul	rbp
463	add	rdi,rax
464	mov	rax,QWORD PTR[16+r9*1+rsi]
465	adc	rdx,0
466	add	rdi,r11
467	lea	r15,QWORD PTR[32+r9]
468	lea	rcx,QWORD PTR[64+rcx]
469	adc	rdx,0
470	mov	QWORD PTR[r14],rdi
471	mov	r13,rdx
472	jmp	$L$1st4x
473
474ALIGN	32
475$L$1st4x::
476	mul	rbx
477	add	r10,rax
478	mov	rax,QWORD PTR[((-32))+rcx]
479	lea	r14,QWORD PTR[32+r14]
480	adc	rdx,0
481	mov	r11,rdx
482
483	mul	rbp
484	add	r13,rax
485	mov	rax,QWORD PTR[((-8))+r15*1+rsi]
486	adc	rdx,0
487	add	r13,r10
488	adc	rdx,0
489	mov	QWORD PTR[((-24))+r14],r13
490	mov	rdi,rdx
491
492	mul	rbx
493	add	r11,rax
494	mov	rax,QWORD PTR[((-16))+rcx]
495	adc	rdx,0
496	mov	r10,rdx
497
498	mul	rbp
499	add	rdi,rax
500	mov	rax,QWORD PTR[r15*1+rsi]
501	adc	rdx,0
502	add	rdi,r11
503	adc	rdx,0
504	mov	QWORD PTR[((-16))+r14],rdi
505	mov	r13,rdx
506
507	mul	rbx
508	add	r10,rax
509	mov	rax,QWORD PTR[rcx]
510	adc	rdx,0
511	mov	r11,rdx
512
513	mul	rbp
514	add	r13,rax
515	mov	rax,QWORD PTR[8+r15*1+rsi]
516	adc	rdx,0
517	add	r13,r10
518	adc	rdx,0
519	mov	QWORD PTR[((-8))+r14],r13
520	mov	rdi,rdx
521
522	mul	rbx
523	add	r11,rax
524	mov	rax,QWORD PTR[16+rcx]
525	adc	rdx,0
526	mov	r10,rdx
527
528	mul	rbp
529	add	rdi,rax
530	mov	rax,QWORD PTR[16+r15*1+rsi]
531	adc	rdx,0
532	add	rdi,r11
533	lea	rcx,QWORD PTR[64+rcx]
534	adc	rdx,0
535	mov	QWORD PTR[r14],rdi
536	mov	r13,rdx
537
538	add	r15,32
539	jnz	$L$1st4x
540
541	mul	rbx
542	add	r10,rax
543	mov	rax,QWORD PTR[((-32))+rcx]
544	lea	r14,QWORD PTR[32+r14]
545	adc	rdx,0
546	mov	r11,rdx
547
548	mul	rbp
549	add	r13,rax
550	mov	rax,QWORD PTR[((-8))+rsi]
551	adc	rdx,0
552	add	r13,r10
553	adc	rdx,0
554	mov	QWORD PTR[((-24))+r14],r13
555	mov	rdi,rdx
556
557	mul	rbx
558	add	r11,rax
559	mov	rax,QWORD PTR[((-16))+rcx]
560	adc	rdx,0
561	mov	r10,rdx
562
563	mul	rbp
564	add	rdi,rax
565	mov	rax,QWORD PTR[r9*1+rsi]
566	adc	rdx,0
567	add	rdi,r11
568	adc	rdx,0
569	mov	QWORD PTR[((-16))+r14],rdi
570	mov	r13,rdx
571
572DB	102,72,15,126,195
573	lea	rcx,QWORD PTR[r9*2+rcx]
574
575	xor	rdi,rdi
576	add	r13,r10
577	adc	rdi,0
578	mov	QWORD PTR[((-8))+r14],r13
579
580	jmp	$L$outer4x
581
582ALIGN	32
583$L$outer4x::
584	mov	r10,QWORD PTR[r9*1+r14]
585	mov	rbp,r8
586	mul	rbx
587	add	r10,rax
588	mov	rax,QWORD PTR[rcx]
589	adc	rdx,0
590
591	movq	xmm0,QWORD PTR[(((-96)))+r12]
592	movq	xmm1,QWORD PTR[((-32))+r12]
593	pand	xmm0,xmm4
594	movq	xmm2,QWORD PTR[32+r12]
595	pand	xmm1,xmm5
596	movq	xmm3,QWORD PTR[96+r12]
597
598	imul	rbp,r10
599DB	067h
600	mov	r11,rdx
601	mov	QWORD PTR[r14],rdi
602
603	pand	xmm2,xmm6
604	por	xmm0,xmm1
605	pand	xmm3,xmm7
606	por	xmm0,xmm2
607	lea	r14,QWORD PTR[r9*1+r14]
608	lea	r12,QWORD PTR[256+r12]
609	por	xmm0,xmm3
610
611	mul	rbp
612	add	r10,rax
613	mov	rax,QWORD PTR[8+r9*1+rsi]
614	adc	rdx,0
615	mov	rdi,rdx
616
617	mul	rbx
618	add	r11,rax
619	mov	rax,QWORD PTR[16+rcx]
620	adc	rdx,0
621	add	r11,QWORD PTR[8+r14]
622	adc	rdx,0
623	mov	r10,rdx
624
625	mul	rbp
626	add	rdi,rax
627	mov	rax,QWORD PTR[16+r9*1+rsi]
628	adc	rdx,0
629	add	rdi,r11
630	lea	r15,QWORD PTR[32+r9]
631	lea	rcx,QWORD PTR[64+rcx]
632	adc	rdx,0
633	mov	r13,rdx
634	jmp	$L$inner4x
635
636ALIGN	32
637$L$inner4x::
638	mul	rbx
639	add	r10,rax
640	mov	rax,QWORD PTR[((-32))+rcx]
641	adc	rdx,0
642	add	r10,QWORD PTR[16+r14]
643	lea	r14,QWORD PTR[32+r14]
644	adc	rdx,0
645	mov	r11,rdx
646
647	mul	rbp
648	add	r13,rax
649	mov	rax,QWORD PTR[((-8))+r15*1+rsi]
650	adc	rdx,0
651	add	r13,r10
652	adc	rdx,0
653	mov	QWORD PTR[((-32))+r14],rdi
654	mov	rdi,rdx
655
656	mul	rbx
657	add	r11,rax
658	mov	rax,QWORD PTR[((-16))+rcx]
659	adc	rdx,0
660	add	r11,QWORD PTR[((-8))+r14]
661	adc	rdx,0
662	mov	r10,rdx
663
664	mul	rbp
665	add	rdi,rax
666	mov	rax,QWORD PTR[r15*1+rsi]
667	adc	rdx,0
668	add	rdi,r11
669	adc	rdx,0
670	mov	QWORD PTR[((-24))+r14],r13
671	mov	r13,rdx
672
673	mul	rbx
674	add	r10,rax
675	mov	rax,QWORD PTR[rcx]
676	adc	rdx,0
677	add	r10,QWORD PTR[r14]
678	adc	rdx,0
679	mov	r11,rdx
680
681	mul	rbp
682	add	r13,rax
683	mov	rax,QWORD PTR[8+r15*1+rsi]
684	adc	rdx,0
685	add	r13,r10
686	adc	rdx,0
687	mov	QWORD PTR[((-16))+r14],rdi
688	mov	rdi,rdx
689
690	mul	rbx
691	add	r11,rax
692	mov	rax,QWORD PTR[16+rcx]
693	adc	rdx,0
694	add	r11,QWORD PTR[8+r14]
695	adc	rdx,0
696	mov	r10,rdx
697
698	mul	rbp
699	add	rdi,rax
700	mov	rax,QWORD PTR[16+r15*1+rsi]
701	adc	rdx,0
702	add	rdi,r11
703	lea	rcx,QWORD PTR[64+rcx]
704	adc	rdx,0
705	mov	QWORD PTR[((-8))+r14],r13
706	mov	r13,rdx
707
708	add	r15,32
709	jnz	$L$inner4x
710
711	mul	rbx
712	add	r10,rax
713	mov	rax,QWORD PTR[((-32))+rcx]
714	adc	rdx,0
715	add	r10,QWORD PTR[16+r14]
716	lea	r14,QWORD PTR[32+r14]
717	adc	rdx,0
718	mov	r11,rdx
719
720	mul	rbp
721	add	r13,rax
722	mov	rax,QWORD PTR[((-8))+rsi]
723	adc	rdx,0
724	add	r13,r10
725	adc	rdx,0
726	mov	QWORD PTR[((-32))+r14],rdi
727	mov	rdi,rdx
728
729	mul	rbx
730	add	r11,rax
731	mov	rax,rbp
732	mov	rbp,QWORD PTR[((-16))+rcx]
733	adc	rdx,0
734	add	r11,QWORD PTR[((-8))+r14]
735	adc	rdx,0
736	mov	r10,rdx
737
738	mul	rbp
739	add	rdi,rax
740	mov	rax,QWORD PTR[r9*1+rsi]
741	adc	rdx,0
742	add	rdi,r11
743	adc	rdx,0
744	mov	QWORD PTR[((-24))+r14],r13
745	mov	r13,rdx
746
747DB	102,72,15,126,195
748	mov	QWORD PTR[((-16))+r14],rdi
749	lea	rcx,QWORD PTR[r9*2+rcx]
750
751	xor	rdi,rdi
752	add	r13,r10
753	adc	rdi,0
754	add	r13,QWORD PTR[r14]
755	adc	rdi,0
756	mov	QWORD PTR[((-8))+r14],r13
757
758	cmp	r12,QWORD PTR[((16+8))+rsp]
759	jb	$L$outer4x
760	sub	rbp,r13
761	adc	r15,r15
762	or	rdi,r15
763	xor	rdi,1
764	lea	rbx,QWORD PTR[r9*1+r14]
765	lea	rbp,QWORD PTR[rdi*8+rcx]
766	mov	rcx,r9
767	sar	rcx,3+2
768	mov	rdi,QWORD PTR[((56+8))+rsp]
769	jmp	$L$sqr4x_sub
770mul4x_internal	ENDP
771PUBLIC	bn_power5
772
773ALIGN	32
774bn_power5	PROC PUBLIC
775	mov	QWORD PTR[8+rsp],rdi	;WIN64 prologue
776	mov	QWORD PTR[16+rsp],rsi
777	mov	rax,rsp
778$L$SEH_begin_bn_power5::
779	mov	rdi,rcx
780	mov	rsi,rdx
781	mov	rdx,r8
782	mov	rcx,r9
783	mov	r8,QWORD PTR[40+rsp]
784	mov	r9,QWORD PTR[48+rsp]
785
786
787	mov	rax,rsp
788	push	rbx
789	push	rbp
790	push	r12
791	push	r13
792	push	r14
793	push	r15
794	lea	rsp,QWORD PTR[((-40))+rsp]
795	movaps	XMMWORD PTR[rsp],xmm6
796	movaps	XMMWORD PTR[16+rsp],xmm7
797	mov	r10d,r9d
798	shl	r9d,3
799	shl	r10d,3+2
800	neg	r9
801	mov	r8,QWORD PTR[r8]
802
803
804
805
806
807
808
809	lea	r11,QWORD PTR[((-64))+r9*2+rsp]
810	sub	r11,rsi
811	and	r11,4095
812	cmp	r10,r11
813	jb	$L$pwr_sp_alt
814	sub	rsp,r11
815	lea	rsp,QWORD PTR[((-64))+r9*2+rsp]
816	jmp	$L$pwr_sp_done
817
818ALIGN	32
819$L$pwr_sp_alt::
820	lea	r10,QWORD PTR[((4096-64))+r9*2]
821	lea	rsp,QWORD PTR[((-64))+r9*2+rsp]
822	sub	r11,r10
823	mov	r10,0
824	cmovc	r11,r10
825	sub	rsp,r11
826$L$pwr_sp_done::
827	and	rsp,-64
828	mov	r10,r9
829	neg	r9
830
831
832
833
834
835
836
837
838
839
840	mov	QWORD PTR[32+rsp],r8
841	mov	QWORD PTR[40+rsp],rax
842$L$power5_body::
843DB	102,72,15,110,207
844DB	102,72,15,110,209
845DB	102,73,15,110,218
846DB	102,72,15,110,226
847
848	call	__bn_sqr8x_internal
849	call	__bn_sqr8x_internal
850	call	__bn_sqr8x_internal
851	call	__bn_sqr8x_internal
852	call	__bn_sqr8x_internal
853
854DB	102,72,15,126,209
855DB	102,72,15,126,226
856	mov	rdi,rsi
857	mov	rax,QWORD PTR[40+rsp]
858	lea	r8,QWORD PTR[32+rsp]
859
860	call	mul4x_internal
861
862	mov	rsi,QWORD PTR[40+rsp]
863	mov	rax,1
864	mov	r15,QWORD PTR[((-48))+rsi]
865	mov	r14,QWORD PTR[((-40))+rsi]
866	mov	r13,QWORD PTR[((-32))+rsi]
867	mov	r12,QWORD PTR[((-24))+rsi]
868	mov	rbp,QWORD PTR[((-16))+rsi]
869	mov	rbx,QWORD PTR[((-8))+rsi]
870	lea	rsp,QWORD PTR[rsi]
871$L$power5_epilogue::
872	mov	rdi,QWORD PTR[8+rsp]	;WIN64 epilogue
873	mov	rsi,QWORD PTR[16+rsp]
874	DB	0F3h,0C3h		;repret
875$L$SEH_end_bn_power5::
876bn_power5	ENDP
877
878PUBLIC	bn_sqr8x_internal
879
880
881ALIGN	32
882bn_sqr8x_internal	PROC PUBLIC
883__bn_sqr8x_internal::
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957	lea	rbp,QWORD PTR[32+r10]
958	lea	rsi,QWORD PTR[r9*1+rsi]
959
960	mov	rcx,r9
961
962
963	mov	r14,QWORD PTR[((-32))+rbp*1+rsi]
964	lea	rdi,QWORD PTR[((48+8))+r9*2+rsp]
965	mov	rax,QWORD PTR[((-24))+rbp*1+rsi]
966	lea	rdi,QWORD PTR[((-32))+rbp*1+rdi]
967	mov	rbx,QWORD PTR[((-16))+rbp*1+rsi]
968	mov	r15,rax
969
970	mul	r14
971	mov	r10,rax
972	mov	rax,rbx
973	mov	r11,rdx
974	mov	QWORD PTR[((-24))+rbp*1+rdi],r10
975
976	mul	r14
977	add	r11,rax
978	mov	rax,rbx
979	adc	rdx,0
980	mov	QWORD PTR[((-16))+rbp*1+rdi],r11
981	mov	r10,rdx
982
983
984	mov	rbx,QWORD PTR[((-8))+rbp*1+rsi]
985	mul	r15
986	mov	r12,rax
987	mov	rax,rbx
988	mov	r13,rdx
989
990	lea	rcx,QWORD PTR[rbp]
991	mul	r14
992	add	r10,rax
993	mov	rax,rbx
994	mov	r11,rdx
995	adc	r11,0
996	add	r10,r12
997	adc	r11,0
998	mov	QWORD PTR[((-8))+rcx*1+rdi],r10
999	jmp	$L$sqr4x_1st
1000
1001ALIGN	32
1002$L$sqr4x_1st::
1003	mov	rbx,QWORD PTR[rcx*1+rsi]
1004	mul	r15
1005	add	r13,rax
1006	mov	rax,rbx
1007	mov	r12,rdx
1008	adc	r12,0
1009
1010	mul	r14
1011	add	r11,rax
1012	mov	rax,rbx
1013	mov	rbx,QWORD PTR[8+rcx*1+rsi]
1014	mov	r10,rdx
1015	adc	r10,0
1016	add	r11,r13
1017	adc	r10,0
1018
1019
1020	mul	r15
1021	add	r12,rax
1022	mov	rax,rbx
1023	mov	QWORD PTR[rcx*1+rdi],r11
1024	mov	r13,rdx
1025	adc	r13,0
1026
1027	mul	r14
1028	add	r10,rax
1029	mov	rax,rbx
1030	mov	rbx,QWORD PTR[16+rcx*1+rsi]
1031	mov	r11,rdx
1032	adc	r11,0
1033	add	r10,r12
1034	adc	r11,0
1035
1036	mul	r15
1037	add	r13,rax
1038	mov	rax,rbx
1039	mov	QWORD PTR[8+rcx*1+rdi],r10
1040	mov	r12,rdx
1041	adc	r12,0
1042
1043	mul	r14
1044	add	r11,rax
1045	mov	rax,rbx
1046	mov	rbx,QWORD PTR[24+rcx*1+rsi]
1047	mov	r10,rdx
1048	adc	r10,0
1049	add	r11,r13
1050	adc	r10,0
1051
1052
1053	mul	r15
1054	add	r12,rax
1055	mov	rax,rbx
1056	mov	QWORD PTR[16+rcx*1+rdi],r11
1057	mov	r13,rdx
1058	adc	r13,0
1059	lea	rcx,QWORD PTR[32+rcx]
1060
1061	mul	r14
1062	add	r10,rax
1063	mov	rax,rbx
1064	mov	r11,rdx
1065	adc	r11,0
1066	add	r10,r12
1067	adc	r11,0
1068	mov	QWORD PTR[((-8))+rcx*1+rdi],r10
1069
1070	cmp	rcx,0
1071	jne	$L$sqr4x_1st
1072
1073	mul	r15
1074	add	r13,rax
1075	lea	rbp,QWORD PTR[16+rbp]
1076	adc	rdx,0
1077	add	r13,r11
1078	adc	rdx,0
1079
1080	mov	QWORD PTR[rdi],r13
1081	mov	r12,rdx
1082	mov	QWORD PTR[8+rdi],rdx
1083	jmp	$L$sqr4x_outer
1084
1085ALIGN	32
1086$L$sqr4x_outer::
1087	mov	r14,QWORD PTR[((-32))+rbp*1+rsi]
1088	lea	rdi,QWORD PTR[((48+8))+r9*2+rsp]
1089	mov	rax,QWORD PTR[((-24))+rbp*1+rsi]
1090	lea	rdi,QWORD PTR[((-32))+rbp*1+rdi]
1091	mov	rbx,QWORD PTR[((-16))+rbp*1+rsi]
1092	mov	r15,rax
1093
1094	mul	r14
1095	mov	r10,QWORD PTR[((-24))+rbp*1+rdi]
1096	add	r10,rax
1097	mov	rax,rbx
1098	adc	rdx,0
1099	mov	QWORD PTR[((-24))+rbp*1+rdi],r10
1100	mov	r11,rdx
1101
1102	mul	r14
1103	add	r11,rax
1104	mov	rax,rbx
1105	adc	rdx,0
1106	add	r11,QWORD PTR[((-16))+rbp*1+rdi]
1107	mov	r10,rdx
1108	adc	r10,0
1109	mov	QWORD PTR[((-16))+rbp*1+rdi],r11
1110
1111	xor	r12,r12
1112
1113	mov	rbx,QWORD PTR[((-8))+rbp*1+rsi]
1114	mul	r15
1115	add	r12,rax
1116	mov	rax,rbx
1117	adc	rdx,0
1118	add	r12,QWORD PTR[((-8))+rbp*1+rdi]
1119	mov	r13,rdx
1120	adc	r13,0
1121
1122	mul	r14
1123	add	r10,rax
1124	mov	rax,rbx
1125	adc	rdx,0
1126	add	r10,r12
1127	mov	r11,rdx
1128	adc	r11,0
1129	mov	QWORD PTR[((-8))+rbp*1+rdi],r10
1130
1131	lea	rcx,QWORD PTR[rbp]
1132	jmp	$L$sqr4x_inner
1133
1134ALIGN	32
1135$L$sqr4x_inner::
1136	mov	rbx,QWORD PTR[rcx*1+rsi]
1137	mul	r15
1138	add	r13,rax
1139	mov	rax,rbx
1140	mov	r12,rdx
1141	adc	r12,0
1142	add	r13,QWORD PTR[rcx*1+rdi]
1143	adc	r12,0
1144
1145DB	067h
1146	mul	r14
1147	add	r11,rax
1148	mov	rax,rbx
1149	mov	rbx,QWORD PTR[8+rcx*1+rsi]
1150	mov	r10,rdx
1151	adc	r10,0
1152	add	r11,r13
1153	adc	r10,0
1154
1155	mul	r15
1156	add	r12,rax
1157	mov	QWORD PTR[rcx*1+rdi],r11
1158	mov	rax,rbx
1159	mov	r13,rdx
1160	adc	r13,0
1161	add	r12,QWORD PTR[8+rcx*1+rdi]
1162	lea	rcx,QWORD PTR[16+rcx]
1163	adc	r13,0
1164
1165	mul	r14
1166	add	r10,rax
1167	mov	rax,rbx
1168	adc	rdx,0
1169	add	r10,r12
1170	mov	r11,rdx
1171	adc	r11,0
1172	mov	QWORD PTR[((-8))+rcx*1+rdi],r10
1173
1174	cmp	rcx,0
1175	jne	$L$sqr4x_inner
1176
1177DB	067h
1178	mul	r15
1179	add	r13,rax
1180	adc	rdx,0
1181	add	r13,r11
1182	adc	rdx,0
1183
1184	mov	QWORD PTR[rdi],r13
1185	mov	r12,rdx
1186	mov	QWORD PTR[8+rdi],rdx
1187
1188	add	rbp,16
1189	jnz	$L$sqr4x_outer
1190
1191
1192	mov	r14,QWORD PTR[((-32))+rsi]
1193	lea	rdi,QWORD PTR[((48+8))+r9*2+rsp]
1194	mov	rax,QWORD PTR[((-24))+rsi]
1195	lea	rdi,QWORD PTR[((-32))+rbp*1+rdi]
1196	mov	rbx,QWORD PTR[((-16))+rsi]
1197	mov	r15,rax
1198
1199	mul	r14
1200	add	r10,rax
1201	mov	rax,rbx
1202	mov	r11,rdx
1203	adc	r11,0
1204
1205	mul	r14
1206	add	r11,rax
1207	mov	rax,rbx
1208	mov	QWORD PTR[((-24))+rdi],r10
1209	mov	r10,rdx
1210	adc	r10,0
1211	add	r11,r13
1212	mov	rbx,QWORD PTR[((-8))+rsi]
1213	adc	r10,0
1214
1215	mul	r15
1216	add	r12,rax
1217	mov	rax,rbx
1218	mov	QWORD PTR[((-16))+rdi],r11
1219	mov	r13,rdx
1220	adc	r13,0
1221
1222	mul	r14
1223	add	r10,rax
1224	mov	rax,rbx
1225	mov	r11,rdx
1226	adc	r11,0
1227	add	r10,r12
1228	adc	r11,0
1229	mov	QWORD PTR[((-8))+rdi],r10
1230
1231	mul	r15
1232	add	r13,rax
1233	mov	rax,QWORD PTR[((-16))+rsi]
1234	adc	rdx,0
1235	add	r13,r11
1236	adc	rdx,0
1237
1238	mov	QWORD PTR[rdi],r13
1239	mov	r12,rdx
1240	mov	QWORD PTR[8+rdi],rdx
1241
1242	mul	rbx
1243	add	rbp,16
1244	xor	r14,r14
1245	sub	rbp,r9
1246	xor	r15,r15
1247
1248	add	rax,r12
1249	adc	rdx,0
1250	mov	QWORD PTR[8+rdi],rax
1251	mov	QWORD PTR[16+rdi],rdx
1252	mov	QWORD PTR[24+rdi],r15
1253
1254	mov	rax,QWORD PTR[((-16))+rbp*1+rsi]
1255	lea	rdi,QWORD PTR[((48+8))+rsp]
1256	xor	r10,r10
1257	mov	r11,QWORD PTR[8+rdi]
1258
1259	lea	r12,QWORD PTR[r10*2+r14]
1260	shr	r10,63
1261	lea	r13,QWORD PTR[r11*2+rcx]
1262	shr	r11,63
1263	or	r13,r10
1264	mov	r10,QWORD PTR[16+rdi]
1265	mov	r14,r11
1266	mul	rax
1267	neg	r15
1268	mov	r11,QWORD PTR[24+rdi]
1269	adc	r12,rax
1270	mov	rax,QWORD PTR[((-8))+rbp*1+rsi]
1271	mov	QWORD PTR[rdi],r12
1272	adc	r13,rdx
1273
1274	lea	rbx,QWORD PTR[r10*2+r14]
1275	mov	QWORD PTR[8+rdi],r13
1276	sbb	r15,r15
1277	shr	r10,63
1278	lea	r8,QWORD PTR[r11*2+rcx]
1279	shr	r11,63
1280	or	r8,r10
1281	mov	r10,QWORD PTR[32+rdi]
1282	mov	r14,r11
1283	mul	rax
1284	neg	r15
1285	mov	r11,QWORD PTR[40+rdi]
1286	adc	rbx,rax
1287	mov	rax,QWORD PTR[rbp*1+rsi]
1288	mov	QWORD PTR[16+rdi],rbx
1289	adc	r8,rdx
1290	lea	rbp,QWORD PTR[16+rbp]
1291	mov	QWORD PTR[24+rdi],r8
1292	sbb	r15,r15
1293	lea	rdi,QWORD PTR[64+rdi]
1294	jmp	$L$sqr4x_shift_n_add
1295
1296ALIGN	32
1297$L$sqr4x_shift_n_add::
1298	lea	r12,QWORD PTR[r10*2+r14]
1299	shr	r10,63
1300	lea	r13,QWORD PTR[r11*2+rcx]
1301	shr	r11,63
1302	or	r13,r10
1303	mov	r10,QWORD PTR[((-16))+rdi]
1304	mov	r14,r11
1305	mul	rax
1306	neg	r15
1307	mov	r11,QWORD PTR[((-8))+rdi]
1308	adc	r12,rax
1309	mov	rax,QWORD PTR[((-8))+rbp*1+rsi]
1310	mov	QWORD PTR[((-32))+rdi],r12
1311	adc	r13,rdx
1312
1313	lea	rbx,QWORD PTR[r10*2+r14]
1314	mov	QWORD PTR[((-24))+rdi],r13
1315	sbb	r15,r15
1316	shr	r10,63
1317	lea	r8,QWORD PTR[r11*2+rcx]
1318	shr	r11,63
1319	or	r8,r10
1320	mov	r10,QWORD PTR[rdi]
1321	mov	r14,r11
1322	mul	rax
1323	neg	r15
1324	mov	r11,QWORD PTR[8+rdi]
1325	adc	rbx,rax
1326	mov	rax,QWORD PTR[rbp*1+rsi]
1327	mov	QWORD PTR[((-16))+rdi],rbx
1328	adc	r8,rdx
1329
1330	lea	r12,QWORD PTR[r10*2+r14]
1331	mov	QWORD PTR[((-8))+rdi],r8
1332	sbb	r15,r15
1333	shr	r10,63
1334	lea	r13,QWORD PTR[r11*2+rcx]
1335	shr	r11,63
1336	or	r13,r10
1337	mov	r10,QWORD PTR[16+rdi]
1338	mov	r14,r11
1339	mul	rax
1340	neg	r15
1341	mov	r11,QWORD PTR[24+rdi]
1342	adc	r12,rax
1343	mov	rax,QWORD PTR[8+rbp*1+rsi]
1344	mov	QWORD PTR[rdi],r12
1345	adc	r13,rdx
1346
1347	lea	rbx,QWORD PTR[r10*2+r14]
1348	mov	QWORD PTR[8+rdi],r13
1349	sbb	r15,r15
1350	shr	r10,63
1351	lea	r8,QWORD PTR[r11*2+rcx]
1352	shr	r11,63
1353	or	r8,r10
1354	mov	r10,QWORD PTR[32+rdi]
1355	mov	r14,r11
1356	mul	rax
1357	neg	r15
1358	mov	r11,QWORD PTR[40+rdi]
1359	adc	rbx,rax
1360	mov	rax,QWORD PTR[16+rbp*1+rsi]
1361	mov	QWORD PTR[16+rdi],rbx
1362	adc	r8,rdx
1363	mov	QWORD PTR[24+rdi],r8
1364	sbb	r15,r15
1365	lea	rdi,QWORD PTR[64+rdi]
1366	add	rbp,32
1367	jnz	$L$sqr4x_shift_n_add
1368
1369	lea	r12,QWORD PTR[r10*2+r14]
1370DB	067h
1371	shr	r10,63
1372	lea	r13,QWORD PTR[r11*2+rcx]
1373	shr	r11,63
1374	or	r13,r10
1375	mov	r10,QWORD PTR[((-16))+rdi]
1376	mov	r14,r11
1377	mul	rax
1378	neg	r15
1379	mov	r11,QWORD PTR[((-8))+rdi]
1380	adc	r12,rax
1381	mov	rax,QWORD PTR[((-8))+rsi]
1382	mov	QWORD PTR[((-32))+rdi],r12
1383	adc	r13,rdx
1384
1385	lea	rbx,QWORD PTR[r10*2+r14]
1386	mov	QWORD PTR[((-24))+rdi],r13
1387	sbb	r15,r15
1388	shr	r10,63
1389	lea	r8,QWORD PTR[r11*2+rcx]
1390	shr	r11,63
1391	or	r8,r10
1392	mul	rax
1393	neg	r15
1394	adc	rbx,rax
1395	adc	r8,rdx
1396	mov	QWORD PTR[((-16))+rdi],rbx
1397	mov	QWORD PTR[((-8))+rdi],r8
1398DB	102,72,15,126,213
1399sqr8x_reduction::
1400	xor	rax,rax
1401	lea	rcx,QWORD PTR[r9*2+rbp]
1402	lea	rdx,QWORD PTR[((48+8))+r9*2+rsp]
1403	mov	QWORD PTR[((0+8))+rsp],rcx
1404	lea	rdi,QWORD PTR[((48+8))+r9*1+rsp]
1405	mov	QWORD PTR[((8+8))+rsp],rdx
1406	neg	r9
1407	jmp	$L$8x_reduction_loop
1408
1409ALIGN	32
1410$L$8x_reduction_loop::
1411	lea	rdi,QWORD PTR[r9*1+rdi]
1412DB	066h
1413	mov	rbx,QWORD PTR[rdi]
1414	mov	r9,QWORD PTR[8+rdi]
1415	mov	r10,QWORD PTR[16+rdi]
1416	mov	r11,QWORD PTR[24+rdi]
1417	mov	r12,QWORD PTR[32+rdi]
1418	mov	r13,QWORD PTR[40+rdi]
1419	mov	r14,QWORD PTR[48+rdi]
1420	mov	r15,QWORD PTR[56+rdi]
1421	mov	QWORD PTR[rdx],rax
1422	lea	rdi,QWORD PTR[64+rdi]
1423
1424DB	067h
1425	mov	r8,rbx
1426	imul	rbx,QWORD PTR[((32+8))+rsp]
1427	mov	rax,QWORD PTR[rbp]
1428	mov	ecx,8
1429	jmp	$L$8x_reduce
1430
1431ALIGN	32
1432$L$8x_reduce::
1433	mul	rbx
1434	mov	rax,QWORD PTR[16+rbp]
1435	neg	r8
1436	mov	r8,rdx
1437	adc	r8,0
1438
1439	mul	rbx
1440	add	r9,rax
1441	mov	rax,QWORD PTR[32+rbp]
1442	adc	rdx,0
1443	add	r8,r9
1444	mov	QWORD PTR[((48-8+8))+rcx*8+rsp],rbx
1445	mov	r9,rdx
1446	adc	r9,0
1447
1448	mul	rbx
1449	add	r10,rax
1450	mov	rax,QWORD PTR[48+rbp]
1451	adc	rdx,0
1452	add	r9,r10
1453	mov	rsi,QWORD PTR[((32+8))+rsp]
1454	mov	r10,rdx
1455	adc	r10,0
1456
1457	mul	rbx
1458	add	r11,rax
1459	mov	rax,QWORD PTR[64+rbp]
1460	adc	rdx,0
1461	imul	rsi,r8
1462	add	r10,r11
1463	mov	r11,rdx
1464	adc	r11,0
1465
1466	mul	rbx
1467	add	r12,rax
1468	mov	rax,QWORD PTR[80+rbp]
1469	adc	rdx,0
1470	add	r11,r12
1471	mov	r12,rdx
1472	adc	r12,0
1473
1474	mul	rbx
1475	add	r13,rax
1476	mov	rax,QWORD PTR[96+rbp]
1477	adc	rdx,0
1478	add	r12,r13
1479	mov	r13,rdx
1480	adc	r13,0
1481
1482	mul	rbx
1483	add	r14,rax
1484	mov	rax,QWORD PTR[112+rbp]
1485	adc	rdx,0
1486	add	r13,r14
1487	mov	r14,rdx
1488	adc	r14,0
1489
1490	mul	rbx
1491	mov	rbx,rsi
1492	add	r15,rax
1493	mov	rax,QWORD PTR[rbp]
1494	adc	rdx,0
1495	add	r14,r15
1496	mov	r15,rdx
1497	adc	r15,0
1498
1499	dec	ecx
1500	jnz	$L$8x_reduce
1501
1502	lea	rbp,QWORD PTR[128+rbp]
1503	xor	rax,rax
1504	mov	rdx,QWORD PTR[((8+8))+rsp]
1505	cmp	rbp,QWORD PTR[((0+8))+rsp]
1506	jae	$L$8x_no_tail
1507
1508DB	066h
1509	add	r8,QWORD PTR[rdi]
1510	adc	r9,QWORD PTR[8+rdi]
1511	adc	r10,QWORD PTR[16+rdi]
1512	adc	r11,QWORD PTR[24+rdi]
1513	adc	r12,QWORD PTR[32+rdi]
1514	adc	r13,QWORD PTR[40+rdi]
1515	adc	r14,QWORD PTR[48+rdi]
1516	adc	r15,QWORD PTR[56+rdi]
1517	sbb	rsi,rsi
1518
1519	mov	rbx,QWORD PTR[((48+56+8))+rsp]
1520	mov	ecx,8
1521	mov	rax,QWORD PTR[rbp]
1522	jmp	$L$8x_tail
1523
1524ALIGN	32
1525$L$8x_tail::
1526	mul	rbx
1527	add	r8,rax
1528	mov	rax,QWORD PTR[16+rbp]
1529	mov	QWORD PTR[rdi],r8
1530	mov	r8,rdx
1531	adc	r8,0
1532
1533	mul	rbx
1534	add	r9,rax
1535	mov	rax,QWORD PTR[32+rbp]
1536	adc	rdx,0
1537	add	r8,r9
1538	lea	rdi,QWORD PTR[8+rdi]
1539	mov	r9,rdx
1540	adc	r9,0
1541
1542	mul	rbx
1543	add	r10,rax
1544	mov	rax,QWORD PTR[48+rbp]
1545	adc	rdx,0
1546	add	r9,r10
1547	mov	r10,rdx
1548	adc	r10,0
1549
1550	mul	rbx
1551	add	r11,rax
1552	mov	rax,QWORD PTR[64+rbp]
1553	adc	rdx,0
1554	add	r10,r11
1555	mov	r11,rdx
1556	adc	r11,0
1557
1558	mul	rbx
1559	add	r12,rax
1560	mov	rax,QWORD PTR[80+rbp]
1561	adc	rdx,0
1562	add	r11,r12
1563	mov	r12,rdx
1564	adc	r12,0
1565
1566	mul	rbx
1567	add	r13,rax
1568	mov	rax,QWORD PTR[96+rbp]
1569	adc	rdx,0
1570	add	r12,r13
1571	mov	r13,rdx
1572	adc	r13,0
1573
1574	mul	rbx
1575	add	r14,rax
1576	mov	rax,QWORD PTR[112+rbp]
1577	adc	rdx,0
1578	add	r13,r14
1579	mov	r14,rdx
1580	adc	r14,0
1581
1582	mul	rbx
1583	mov	rbx,QWORD PTR[((48-16+8))+rcx*8+rsp]
1584	add	r15,rax
1585	adc	rdx,0
1586	add	r14,r15
1587	mov	rax,QWORD PTR[rbp]
1588	mov	r15,rdx
1589	adc	r15,0
1590
1591	dec	ecx
1592	jnz	$L$8x_tail
1593
1594	lea	rbp,QWORD PTR[128+rbp]
1595	mov	rdx,QWORD PTR[((8+8))+rsp]
1596	cmp	rbp,QWORD PTR[((0+8))+rsp]
1597	jae	$L$8x_tail_done
1598
1599	mov	rbx,QWORD PTR[((48+56+8))+rsp]
1600	neg	rsi
1601	mov	rax,QWORD PTR[rbp]
1602	adc	r8,QWORD PTR[rdi]
1603	adc	r9,QWORD PTR[8+rdi]
1604	adc	r10,QWORD PTR[16+rdi]
1605	adc	r11,QWORD PTR[24+rdi]
1606	adc	r12,QWORD PTR[32+rdi]
1607	adc	r13,QWORD PTR[40+rdi]
1608	adc	r14,QWORD PTR[48+rdi]
1609	adc	r15,QWORD PTR[56+rdi]
1610	sbb	rsi,rsi
1611
1612	mov	ecx,8
1613	jmp	$L$8x_tail
1614
1615ALIGN	32
1616$L$8x_tail_done::
1617	add	r8,QWORD PTR[rdx]
1618	xor	rax,rax
1619
1620	neg	rsi
1621$L$8x_no_tail::
1622	adc	r8,QWORD PTR[rdi]
1623	adc	r9,QWORD PTR[8+rdi]
1624	adc	r10,QWORD PTR[16+rdi]
1625	adc	r11,QWORD PTR[24+rdi]
1626	adc	r12,QWORD PTR[32+rdi]
1627	adc	r13,QWORD PTR[40+rdi]
1628	adc	r14,QWORD PTR[48+rdi]
1629	adc	r15,QWORD PTR[56+rdi]
1630	adc	rax,0
1631	mov	rcx,QWORD PTR[((-16))+rbp]
1632	xor	rsi,rsi
1633
1634DB	102,72,15,126,213
1635
1636	mov	QWORD PTR[rdi],r8
1637	mov	QWORD PTR[8+rdi],r9
1638DB	102,73,15,126,217
1639	mov	QWORD PTR[16+rdi],r10
1640	mov	QWORD PTR[24+rdi],r11
1641	mov	QWORD PTR[32+rdi],r12
1642	mov	QWORD PTR[40+rdi],r13
1643	mov	QWORD PTR[48+rdi],r14
1644	mov	QWORD PTR[56+rdi],r15
1645	lea	rdi,QWORD PTR[64+rdi]
1646
1647	cmp	rdi,rdx
1648	jb	$L$8x_reduction_loop
1649
1650	sub	rcx,r15
1651	lea	rbx,QWORD PTR[r9*1+rdi]
1652	adc	rsi,rsi
1653	mov	rcx,r9
1654	or	rax,rsi
1655DB	102,72,15,126,207
1656	xor	rax,1
1657DB	102,72,15,126,206
1658	lea	rbp,QWORD PTR[rax*8+rbp]
1659	sar	rcx,3+2
1660	jmp	$L$sqr4x_sub
1661
1662ALIGN	32
1663$L$sqr4x_sub::
1664DB	066h
1665	mov	r12,QWORD PTR[rbx]
1666	mov	r13,QWORD PTR[8+rbx]
1667	sbb	r12,QWORD PTR[rbp]
1668	mov	r14,QWORD PTR[16+rbx]
1669	sbb	r13,QWORD PTR[16+rbp]
1670	mov	r15,QWORD PTR[24+rbx]
1671	lea	rbx,QWORD PTR[32+rbx]
1672	sbb	r14,QWORD PTR[32+rbp]
1673	mov	QWORD PTR[rdi],r12
1674	sbb	r15,QWORD PTR[48+rbp]
1675	lea	rbp,QWORD PTR[64+rbp]
1676	mov	QWORD PTR[8+rdi],r13
1677	mov	QWORD PTR[16+rdi],r14
1678	mov	QWORD PTR[24+rdi],r15
1679	lea	rdi,QWORD PTR[32+rdi]
1680
1681	inc	rcx
1682	jnz	$L$sqr4x_sub
1683	mov	r10,r9
1684	neg	r9
1685	DB	0F3h,0C3h		;repret
1686bn_sqr8x_internal	ENDP
1687PUBLIC	bn_from_montgomery
1688
1689ALIGN	32
1690bn_from_montgomery	PROC PUBLIC
1691	test	DWORD PTR[48+rsp],7
1692	jz	bn_from_mont8x
1693	xor	eax,eax
1694	DB	0F3h,0C3h		;repret
1695bn_from_montgomery	ENDP
1696
1697
1698ALIGN	32
1699bn_from_mont8x	PROC PRIVATE
1700	mov	QWORD PTR[8+rsp],rdi	;WIN64 prologue
1701	mov	QWORD PTR[16+rsp],rsi
1702	mov	rax,rsp
1703$L$SEH_begin_bn_from_mont8x::
1704	mov	rdi,rcx
1705	mov	rsi,rdx
1706	mov	rdx,r8
1707	mov	rcx,r9
1708	mov	r8,QWORD PTR[40+rsp]
1709	mov	r9,QWORD PTR[48+rsp]
1710
1711
1712DB	067h
1713	mov	rax,rsp
1714	push	rbx
1715	push	rbp
1716	push	r12
1717	push	r13
1718	push	r14
1719	push	r15
1720	lea	rsp,QWORD PTR[((-40))+rsp]
1721	movaps	XMMWORD PTR[rsp],xmm6
1722	movaps	XMMWORD PTR[16+rsp],xmm7
1723DB	067h
1724	mov	r10d,r9d
1725	shl	r9d,3
1726	shl	r10d,3+2
1727	neg	r9
1728	mov	r8,QWORD PTR[r8]
1729
1730
1731
1732
1733
1734
1735
1736	lea	r11,QWORD PTR[((-64))+r9*2+rsp]
1737	sub	r11,rsi
1738	and	r11,4095
1739	cmp	r10,r11
1740	jb	$L$from_sp_alt
1741	sub	rsp,r11
1742	lea	rsp,QWORD PTR[((-64))+r9*2+rsp]
1743	jmp	$L$from_sp_done
1744
1745ALIGN	32
1746$L$from_sp_alt::
1747	lea	r10,QWORD PTR[((4096-64))+r9*2]
1748	lea	rsp,QWORD PTR[((-64))+r9*2+rsp]
1749	sub	r11,r10
1750	mov	r10,0
1751	cmovc	r11,r10
1752	sub	rsp,r11
1753$L$from_sp_done::
1754	and	rsp,-64
1755	mov	r10,r9
1756	neg	r9
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767	mov	QWORD PTR[32+rsp],r8
1768	mov	QWORD PTR[40+rsp],rax
1769$L$from_body::
1770	mov	r11,r9
1771	lea	rax,QWORD PTR[48+rsp]
1772	pxor	xmm0,xmm0
1773	jmp	$L$mul_by_1
1774
1775ALIGN	32
1776$L$mul_by_1::
1777	movdqu	xmm1,XMMWORD PTR[rsi]
1778	movdqu	xmm2,XMMWORD PTR[16+rsi]
1779	movdqu	xmm3,XMMWORD PTR[32+rsi]
1780	movdqa	XMMWORD PTR[r9*1+rax],xmm0
1781	movdqu	xmm4,XMMWORD PTR[48+rsi]
1782	movdqa	XMMWORD PTR[16+r9*1+rax],xmm0
1783DB	048h,08dh,0b6h,040h,000h,000h,000h
1784	movdqa	XMMWORD PTR[rax],xmm1
1785	movdqa	XMMWORD PTR[32+r9*1+rax],xmm0
1786	movdqa	XMMWORD PTR[16+rax],xmm2
1787	movdqa	XMMWORD PTR[48+r9*1+rax],xmm0
1788	movdqa	XMMWORD PTR[32+rax],xmm3
1789	movdqa	XMMWORD PTR[48+rax],xmm4
1790	lea	rax,QWORD PTR[64+rax]
1791	sub	r11,64
1792	jnz	$L$mul_by_1
1793
1794DB	102,72,15,110,207
1795DB	102,72,15,110,209
1796DB	067h
1797	mov	rbp,rcx
1798DB	102,73,15,110,218
1799	call	sqr8x_reduction
1800
1801	pxor	xmm0,xmm0
1802	lea	rax,QWORD PTR[48+rsp]
1803	mov	rsi,QWORD PTR[40+rsp]
1804	jmp	$L$from_mont_zero
1805
1806ALIGN	32
1807$L$from_mont_zero::
1808	movdqa	XMMWORD PTR[rax],xmm0
1809	movdqa	XMMWORD PTR[16+rax],xmm0
1810	movdqa	XMMWORD PTR[32+rax],xmm0
1811	movdqa	XMMWORD PTR[48+rax],xmm0
1812	lea	rax,QWORD PTR[64+rax]
1813	sub	r9,32
1814	jnz	$L$from_mont_zero
1815
1816	mov	rax,1
1817	mov	r15,QWORD PTR[((-48))+rsi]
1818	mov	r14,QWORD PTR[((-40))+rsi]
1819	mov	r13,QWORD PTR[((-32))+rsi]
1820	mov	r12,QWORD PTR[((-24))+rsi]
1821	mov	rbp,QWORD PTR[((-16))+rsi]
1822	mov	rbx,QWORD PTR[((-8))+rsi]
1823	lea	rsp,QWORD PTR[rsi]
1824$L$from_epilogue::
1825	mov	rdi,QWORD PTR[8+rsp]	;WIN64 epilogue
1826	mov	rsi,QWORD PTR[16+rsp]
1827	DB	0F3h,0C3h		;repret
1828$L$SEH_end_bn_from_mont8x::
1829bn_from_mont8x	ENDP
1830PUBLIC	bn_scatter5
1831
1832ALIGN	16
1833bn_scatter5	PROC PUBLIC
1834	cmp	edx,0
1835	jz	$L$scatter_epilogue
1836	lea	r8,QWORD PTR[r9*8+r8]
1837$L$scatter::
1838	mov	rax,QWORD PTR[rcx]
1839	lea	rcx,QWORD PTR[8+rcx]
1840	mov	QWORD PTR[r8],rax
1841	lea	r8,QWORD PTR[256+r8]
1842	sub	edx,1
1843	jnz	$L$scatter
1844$L$scatter_epilogue::
1845	DB	0F3h,0C3h		;repret
1846bn_scatter5	ENDP
1847
1848PUBLIC	bn_gather5
1849
1850ALIGN	16
1851bn_gather5	PROC PUBLIC
1852$L$SEH_begin_bn_gather5::
1853
1854DB	048h,083h,0ech,028h
1855DB	00fh,029h,034h,024h
1856DB	00fh,029h,07ch,024h,010h
1857	mov	r11d,r9d
1858	shr	r9d,3
1859	and	r11,7
1860	not	r9d
1861	lea	rax,QWORD PTR[$L$magic_masks]
1862	and	r9d,3
1863	lea	r8,QWORD PTR[128+r11*8+r8]
1864	movq	xmm4,QWORD PTR[r9*8+rax]
1865	movq	xmm5,QWORD PTR[8+r9*8+rax]
1866	movq	xmm6,QWORD PTR[16+r9*8+rax]
1867	movq	xmm7,QWORD PTR[24+r9*8+rax]
1868	jmp	$L$gather
1869ALIGN	16
1870$L$gather::
1871	movq	xmm0,QWORD PTR[(((-128)))+r8]
1872	movq	xmm1,QWORD PTR[((-64))+r8]
1873	pand	xmm0,xmm4
1874	movq	xmm2,QWORD PTR[r8]
1875	pand	xmm1,xmm5
1876	movq	xmm3,QWORD PTR[64+r8]
1877	pand	xmm2,xmm6
1878	por	xmm0,xmm1
1879	pand	xmm3,xmm7
1880DB	067h,067h
1881	por	xmm0,xmm2
1882	lea	r8,QWORD PTR[256+r8]
1883	por	xmm0,xmm3
1884
1885	movq	QWORD PTR[rcx],xmm0
1886	lea	rcx,QWORD PTR[8+rcx]
1887	sub	edx,1
1888	jnz	$L$gather
1889	movaps	xmm6,XMMWORD PTR[rsp]
1890	movaps	xmm7,XMMWORD PTR[16+rsp]
1891	lea	rsp,QWORD PTR[40+rsp]
1892	DB	0F3h,0C3h		;repret
1893$L$SEH_end_bn_gather5::
1894bn_gather5	ENDP
1895ALIGN	64
1896$L$magic_masks::
1897	DD	0,0,0,0,0,0,-1,-1
1898	DD	0,0,0,0,0,0,0,0
1899DB	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
1900DB	112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115
1901DB	99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111
1902DB	114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79
1903DB	71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111
1904DB	112,101,110,115,115,108,46,111,114,103,62,0
1905EXTERN	__imp_RtlVirtualUnwind:NEAR
1906
1907ALIGN	16
1908mul_handler	PROC PRIVATE
1909	push	rsi
1910	push	rdi
1911	push	rbx
1912	push	rbp
1913	push	r12
1914	push	r13
1915	push	r14
1916	push	r15
1917	pushfq
1918	sub	rsp,64
1919
1920	mov	rax,QWORD PTR[120+r8]
1921	mov	rbx,QWORD PTR[248+r8]
1922
1923	mov	rsi,QWORD PTR[8+r9]
1924	mov	r11,QWORD PTR[56+r9]
1925
1926	mov	r10d,DWORD PTR[r11]
1927	lea	r10,QWORD PTR[r10*1+rsi]
1928	cmp	rbx,r10
1929	jb	$L$common_seh_tail
1930
1931	mov	rax,QWORD PTR[152+r8]
1932
1933	mov	r10d,DWORD PTR[4+r11]
1934	lea	r10,QWORD PTR[r10*1+rsi]
1935	cmp	rbx,r10
1936	jae	$L$common_seh_tail
1937
1938	lea	r10,QWORD PTR[$L$mul_epilogue]
1939	cmp	rbx,r10
1940	jb	$L$body_40
1941
1942	mov	r10,QWORD PTR[192+r8]
1943	mov	rax,QWORD PTR[8+r10*8+rax]
1944	jmp	$L$body_proceed
1945
1946$L$body_40::
1947	mov	rax,QWORD PTR[40+rax]
1948$L$body_proceed::
1949
1950	movaps	xmm0,XMMWORD PTR[((-88))+rax]
1951	movaps	xmm1,XMMWORD PTR[((-72))+rax]
1952
1953	mov	rbx,QWORD PTR[((-8))+rax]
1954	mov	rbp,QWORD PTR[((-16))+rax]
1955	mov	r12,QWORD PTR[((-24))+rax]
1956	mov	r13,QWORD PTR[((-32))+rax]
1957	mov	r14,QWORD PTR[((-40))+rax]
1958	mov	r15,QWORD PTR[((-48))+rax]
1959	mov	QWORD PTR[144+r8],rbx
1960	mov	QWORD PTR[160+r8],rbp
1961	mov	QWORD PTR[216+r8],r12
1962	mov	QWORD PTR[224+r8],r13
1963	mov	QWORD PTR[232+r8],r14
1964	mov	QWORD PTR[240+r8],r15
1965	movups	XMMWORD PTR[512+r8],xmm0
1966	movups	XMMWORD PTR[528+r8],xmm1
1967
1968$L$common_seh_tail::
1969	mov	rdi,QWORD PTR[8+rax]
1970	mov	rsi,QWORD PTR[16+rax]
1971	mov	QWORD PTR[152+r8],rax
1972	mov	QWORD PTR[168+r8],rsi
1973	mov	QWORD PTR[176+r8],rdi
1974
1975	mov	rdi,QWORD PTR[40+r9]
1976	mov	rsi,r8
1977	mov	ecx,154
1978	DD	0a548f3fch
1979
1980	mov	rsi,r9
1981	xor	rcx,rcx
1982	mov	rdx,QWORD PTR[8+rsi]
1983	mov	r8,QWORD PTR[rsi]
1984	mov	r9,QWORD PTR[16+rsi]
1985	mov	r10,QWORD PTR[40+rsi]
1986	lea	r11,QWORD PTR[56+rsi]
1987	lea	r12,QWORD PTR[24+rsi]
1988	mov	QWORD PTR[32+rsp],r10
1989	mov	QWORD PTR[40+rsp],r11
1990	mov	QWORD PTR[48+rsp],r12
1991	mov	QWORD PTR[56+rsp],rcx
1992	call	QWORD PTR[__imp_RtlVirtualUnwind]
1993
1994	mov	eax,1
1995	add	rsp,64
1996	popfq
1997	pop	r15
1998	pop	r14
1999	pop	r13
2000	pop	r12
2001	pop	rbp
2002	pop	rbx
2003	pop	rdi
2004	pop	rsi
2005	DB	0F3h,0C3h		;repret
2006mul_handler	ENDP
2007
2008.text$	ENDS
2009.pdata	SEGMENT READONLY ALIGN(4)
2010ALIGN	4
2011	DD	imagerel $L$SEH_begin_bn_mul_mont_gather5
2012	DD	imagerel $L$SEH_end_bn_mul_mont_gather5
2013	DD	imagerel $L$SEH_info_bn_mul_mont_gather5
2014
2015	DD	imagerel $L$SEH_begin_bn_mul4x_mont_gather5
2016	DD	imagerel $L$SEH_end_bn_mul4x_mont_gather5
2017	DD	imagerel $L$SEH_info_bn_mul4x_mont_gather5
2018
2019	DD	imagerel $L$SEH_begin_bn_power5
2020	DD	imagerel $L$SEH_end_bn_power5
2021	DD	imagerel $L$SEH_info_bn_power5
2022
2023	DD	imagerel $L$SEH_begin_bn_from_mont8x
2024	DD	imagerel $L$SEH_end_bn_from_mont8x
2025	DD	imagerel $L$SEH_info_bn_from_mont8x
2026	DD	imagerel $L$SEH_begin_bn_gather5
2027	DD	imagerel $L$SEH_end_bn_gather5
2028	DD	imagerel $L$SEH_info_bn_gather5
2029
2030.pdata	ENDS
2031.xdata	SEGMENT READONLY ALIGN(8)
2032ALIGN	8
2033$L$SEH_info_bn_mul_mont_gather5::
2034DB	9,0,0,0
2035	DD	imagerel mul_handler
2036	DD	imagerel $L$mul_body,imagerel $L$mul_epilogue
2037ALIGN	8
2038$L$SEH_info_bn_mul4x_mont_gather5::
2039DB	9,0,0,0
2040	DD	imagerel mul_handler
2041	DD	imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue
2042ALIGN	8
2043$L$SEH_info_bn_power5::
2044DB	9,0,0,0
2045	DD	imagerel mul_handler
2046	DD	imagerel $L$power5_body,imagerel $L$power5_epilogue
2047ALIGN	8
2048$L$SEH_info_bn_from_mont8x::
2049DB	9,0,0,0
2050	DD	imagerel mul_handler
2051	DD	imagerel $L$from_body,imagerel $L$from_epilogue
2052ALIGN	8
2053$L$SEH_info_bn_gather5::
2054DB	001h,00dh,005h,000h
2055DB	00dh,078h,001h,000h
2056DB	008h,068h,000h,000h
2057DB	004h,042h,000h,000h
2058ALIGN	8
2059
2060.xdata	ENDS
2061END
2062