1%ifidn __OUTPUT_FORMAT__,obj
2section	code	use32 class=code align=64
3%elifidn __OUTPUT_FORMAT__,win32
4%ifdef __YASM_VERSION_ID__
5%if __YASM_VERSION_ID__ < 01010000h
6%error yasm version 1.1.0 or later needed.
7%endif
8; Yasm automatically includes .00 and complains about redefining it.
9; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
10%else
11$@feat.00 equ 1
12%endif
13section	.text	code align=64
14%else
15section	.text	code
16%endif
17;extern	_OPENSSL_ia32cap_P
18global	_aesni_encrypt
19align	16
20_aesni_encrypt:
21L$_aesni_encrypt_begin:
22	mov	eax,DWORD [4+esp]
23	mov	edx,DWORD [12+esp]
24	movups	xmm2,[eax]
25	mov	ecx,DWORD [240+edx]
26	mov	eax,DWORD [8+esp]
27	movups	xmm0,[edx]
28	movups	xmm1,[16+edx]
29	lea	edx,[32+edx]
30	xorps	xmm2,xmm0
31L$000enc1_loop_1:
32db	102,15,56,220,209
33	dec	ecx
34	movups	xmm1,[edx]
35	lea	edx,[16+edx]
36	jnz	NEAR L$000enc1_loop_1
37db	102,15,56,221,209
38	pxor	xmm0,xmm0
39	pxor	xmm1,xmm1
40	movups	[eax],xmm2
41	pxor	xmm2,xmm2
42	ret
43global	_aesni_decrypt
44align	16
45_aesni_decrypt:
46L$_aesni_decrypt_begin:
47	mov	eax,DWORD [4+esp]
48	mov	edx,DWORD [12+esp]
49	movups	xmm2,[eax]
50	mov	ecx,DWORD [240+edx]
51	mov	eax,DWORD [8+esp]
52	movups	xmm0,[edx]
53	movups	xmm1,[16+edx]
54	lea	edx,[32+edx]
55	xorps	xmm2,xmm0
56L$001dec1_loop_2:
57db	102,15,56,222,209
58	dec	ecx
59	movups	xmm1,[edx]
60	lea	edx,[16+edx]
61	jnz	NEAR L$001dec1_loop_2
62db	102,15,56,223,209
63	pxor	xmm0,xmm0
64	pxor	xmm1,xmm1
65	movups	[eax],xmm2
66	pxor	xmm2,xmm2
67	ret
68align	16
69__aesni_encrypt2:
70	movups	xmm0,[edx]
71	shl	ecx,4
72	movups	xmm1,[16+edx]
73	xorps	xmm2,xmm0
74	pxor	xmm3,xmm0
75	movups	xmm0,[32+edx]
76	lea	edx,[32+ecx*1+edx]
77	neg	ecx
78	add	ecx,16
79L$002enc2_loop:
80db	102,15,56,220,209
81db	102,15,56,220,217
82	movups	xmm1,[ecx*1+edx]
83	add	ecx,32
84db	102,15,56,220,208
85db	102,15,56,220,216
86	movups	xmm0,[ecx*1+edx-16]
87	jnz	NEAR L$002enc2_loop
88db	102,15,56,220,209
89db	102,15,56,220,217
90db	102,15,56,221,208
91db	102,15,56,221,216
92	ret
93align	16
94__aesni_decrypt2:
95	movups	xmm0,[edx]
96	shl	ecx,4
97	movups	xmm1,[16+edx]
98	xorps	xmm2,xmm0
99	pxor	xmm3,xmm0
100	movups	xmm0,[32+edx]
101	lea	edx,[32+ecx*1+edx]
102	neg	ecx
103	add	ecx,16
104L$003dec2_loop:
105db	102,15,56,222,209
106db	102,15,56,222,217
107	movups	xmm1,[ecx*1+edx]
108	add	ecx,32
109db	102,15,56,222,208
110db	102,15,56,222,216
111	movups	xmm0,[ecx*1+edx-16]
112	jnz	NEAR L$003dec2_loop
113db	102,15,56,222,209
114db	102,15,56,222,217
115db	102,15,56,223,208
116db	102,15,56,223,216
117	ret
118align	16
119__aesni_encrypt3:
120	movups	xmm0,[edx]
121	shl	ecx,4
122	movups	xmm1,[16+edx]
123	xorps	xmm2,xmm0
124	pxor	xmm3,xmm0
125	pxor	xmm4,xmm0
126	movups	xmm0,[32+edx]
127	lea	edx,[32+ecx*1+edx]
128	neg	ecx
129	add	ecx,16
130L$004enc3_loop:
131db	102,15,56,220,209
132db	102,15,56,220,217
133db	102,15,56,220,225
134	movups	xmm1,[ecx*1+edx]
135	add	ecx,32
136db	102,15,56,220,208
137db	102,15,56,220,216
138db	102,15,56,220,224
139	movups	xmm0,[ecx*1+edx-16]
140	jnz	NEAR L$004enc3_loop
141db	102,15,56,220,209
142db	102,15,56,220,217
143db	102,15,56,220,225
144db	102,15,56,221,208
145db	102,15,56,221,216
146db	102,15,56,221,224
147	ret
148align	16
149__aesni_decrypt3:
150	movups	xmm0,[edx]
151	shl	ecx,4
152	movups	xmm1,[16+edx]
153	xorps	xmm2,xmm0
154	pxor	xmm3,xmm0
155	pxor	xmm4,xmm0
156	movups	xmm0,[32+edx]
157	lea	edx,[32+ecx*1+edx]
158	neg	ecx
159	add	ecx,16
160L$005dec3_loop:
161db	102,15,56,222,209
162db	102,15,56,222,217
163db	102,15,56,222,225
164	movups	xmm1,[ecx*1+edx]
165	add	ecx,32
166db	102,15,56,222,208
167db	102,15,56,222,216
168db	102,15,56,222,224
169	movups	xmm0,[ecx*1+edx-16]
170	jnz	NEAR L$005dec3_loop
171db	102,15,56,222,209
172db	102,15,56,222,217
173db	102,15,56,222,225
174db	102,15,56,223,208
175db	102,15,56,223,216
176db	102,15,56,223,224
177	ret
178align	16
179__aesni_encrypt4:
180	movups	xmm0,[edx]
181	movups	xmm1,[16+edx]
182	shl	ecx,4
183	xorps	xmm2,xmm0
184	pxor	xmm3,xmm0
185	pxor	xmm4,xmm0
186	pxor	xmm5,xmm0
187	movups	xmm0,[32+edx]
188	lea	edx,[32+ecx*1+edx]
189	neg	ecx
190db	15,31,64,0
191	add	ecx,16
192L$006enc4_loop:
193db	102,15,56,220,209
194db	102,15,56,220,217
195db	102,15,56,220,225
196db	102,15,56,220,233
197	movups	xmm1,[ecx*1+edx]
198	add	ecx,32
199db	102,15,56,220,208
200db	102,15,56,220,216
201db	102,15,56,220,224
202db	102,15,56,220,232
203	movups	xmm0,[ecx*1+edx-16]
204	jnz	NEAR L$006enc4_loop
205db	102,15,56,220,209
206db	102,15,56,220,217
207db	102,15,56,220,225
208db	102,15,56,220,233
209db	102,15,56,221,208
210db	102,15,56,221,216
211db	102,15,56,221,224
212db	102,15,56,221,232
213	ret
214align	16
215__aesni_decrypt4:
216	movups	xmm0,[edx]
217	movups	xmm1,[16+edx]
218	shl	ecx,4
219	xorps	xmm2,xmm0
220	pxor	xmm3,xmm0
221	pxor	xmm4,xmm0
222	pxor	xmm5,xmm0
223	movups	xmm0,[32+edx]
224	lea	edx,[32+ecx*1+edx]
225	neg	ecx
226db	15,31,64,0
227	add	ecx,16
228L$007dec4_loop:
229db	102,15,56,222,209
230db	102,15,56,222,217
231db	102,15,56,222,225
232db	102,15,56,222,233
233	movups	xmm1,[ecx*1+edx]
234	add	ecx,32
235db	102,15,56,222,208
236db	102,15,56,222,216
237db	102,15,56,222,224
238db	102,15,56,222,232
239	movups	xmm0,[ecx*1+edx-16]
240	jnz	NEAR L$007dec4_loop
241db	102,15,56,222,209
242db	102,15,56,222,217
243db	102,15,56,222,225
244db	102,15,56,222,233
245db	102,15,56,223,208
246db	102,15,56,223,216
247db	102,15,56,223,224
248db	102,15,56,223,232
249	ret
250align	16
251__aesni_encrypt6:
252	movups	xmm0,[edx]
253	shl	ecx,4
254	movups	xmm1,[16+edx]
255	xorps	xmm2,xmm0
256	pxor	xmm3,xmm0
257	pxor	xmm4,xmm0
258db	102,15,56,220,209
259	pxor	xmm5,xmm0
260	pxor	xmm6,xmm0
261db	102,15,56,220,217
262	lea	edx,[32+ecx*1+edx]
263	neg	ecx
264db	102,15,56,220,225
265	pxor	xmm7,xmm0
266	movups	xmm0,[ecx*1+edx]
267	add	ecx,16
268	jmp	NEAR L$008_aesni_encrypt6_inner
269align	16
270L$009enc6_loop:
271db	102,15,56,220,209
272db	102,15,56,220,217
273db	102,15,56,220,225
274L$008_aesni_encrypt6_inner:
275db	102,15,56,220,233
276db	102,15,56,220,241
277db	102,15,56,220,249
278L$_aesni_encrypt6_enter:
279	movups	xmm1,[ecx*1+edx]
280	add	ecx,32
281db	102,15,56,220,208
282db	102,15,56,220,216
283db	102,15,56,220,224
284db	102,15,56,220,232
285db	102,15,56,220,240
286db	102,15,56,220,248
287	movups	xmm0,[ecx*1+edx-16]
288	jnz	NEAR L$009enc6_loop
289db	102,15,56,220,209
290db	102,15,56,220,217
291db	102,15,56,220,225
292db	102,15,56,220,233
293db	102,15,56,220,241
294db	102,15,56,220,249
295db	102,15,56,221,208
296db	102,15,56,221,216
297db	102,15,56,221,224
298db	102,15,56,221,232
299db	102,15,56,221,240
300db	102,15,56,221,248
301	ret
302align	16
303__aesni_decrypt6:
304	movups	xmm0,[edx]
305	shl	ecx,4
306	movups	xmm1,[16+edx]
307	xorps	xmm2,xmm0
308	pxor	xmm3,xmm0
309	pxor	xmm4,xmm0
310db	102,15,56,222,209
311	pxor	xmm5,xmm0
312	pxor	xmm6,xmm0
313db	102,15,56,222,217
314	lea	edx,[32+ecx*1+edx]
315	neg	ecx
316db	102,15,56,222,225
317	pxor	xmm7,xmm0
318	movups	xmm0,[ecx*1+edx]
319	add	ecx,16
320	jmp	NEAR L$010_aesni_decrypt6_inner
321align	16
322L$011dec6_loop:
323db	102,15,56,222,209
324db	102,15,56,222,217
325db	102,15,56,222,225
326L$010_aesni_decrypt6_inner:
327db	102,15,56,222,233
328db	102,15,56,222,241
329db	102,15,56,222,249
330L$_aesni_decrypt6_enter:
331	movups	xmm1,[ecx*1+edx]
332	add	ecx,32
333db	102,15,56,222,208
334db	102,15,56,222,216
335db	102,15,56,222,224
336db	102,15,56,222,232
337db	102,15,56,222,240
338db	102,15,56,222,248
339	movups	xmm0,[ecx*1+edx-16]
340	jnz	NEAR L$011dec6_loop
341db	102,15,56,222,209
342db	102,15,56,222,217
343db	102,15,56,222,225
344db	102,15,56,222,233
345db	102,15,56,222,241
346db	102,15,56,222,249
347db	102,15,56,223,208
348db	102,15,56,223,216
349db	102,15,56,223,224
350db	102,15,56,223,232
351db	102,15,56,223,240
352db	102,15,56,223,248
353	ret
354global	_aesni_ecb_encrypt
355align	16
356_aesni_ecb_encrypt:
357L$_aesni_ecb_encrypt_begin:
358	push	ebp
359	push	ebx
360	push	esi
361	push	edi
362	mov	esi,DWORD [20+esp]
363	mov	edi,DWORD [24+esp]
364	mov	eax,DWORD [28+esp]
365	mov	edx,DWORD [32+esp]
366	mov	ebx,DWORD [36+esp]
367	and	eax,-16
368	jz	NEAR L$012ecb_ret
369	mov	ecx,DWORD [240+edx]
370	test	ebx,ebx
371	jz	NEAR L$013ecb_decrypt
372	mov	ebp,edx
373	mov	ebx,ecx
374	cmp	eax,96
375	jb	NEAR L$014ecb_enc_tail
376	movdqu	xmm2,[esi]
377	movdqu	xmm3,[16+esi]
378	movdqu	xmm4,[32+esi]
379	movdqu	xmm5,[48+esi]
380	movdqu	xmm6,[64+esi]
381	movdqu	xmm7,[80+esi]
382	lea	esi,[96+esi]
383	sub	eax,96
384	jmp	NEAR L$015ecb_enc_loop6_enter
385align	16
386L$016ecb_enc_loop6:
387	movups	[edi],xmm2
388	movdqu	xmm2,[esi]
389	movups	[16+edi],xmm3
390	movdqu	xmm3,[16+esi]
391	movups	[32+edi],xmm4
392	movdqu	xmm4,[32+esi]
393	movups	[48+edi],xmm5
394	movdqu	xmm5,[48+esi]
395	movups	[64+edi],xmm6
396	movdqu	xmm6,[64+esi]
397	movups	[80+edi],xmm7
398	lea	edi,[96+edi]
399	movdqu	xmm7,[80+esi]
400	lea	esi,[96+esi]
401L$015ecb_enc_loop6_enter:
402	call	__aesni_encrypt6
403	mov	edx,ebp
404	mov	ecx,ebx
405	sub	eax,96
406	jnc	NEAR L$016ecb_enc_loop6
407	movups	[edi],xmm2
408	movups	[16+edi],xmm3
409	movups	[32+edi],xmm4
410	movups	[48+edi],xmm5
411	movups	[64+edi],xmm6
412	movups	[80+edi],xmm7
413	lea	edi,[96+edi]
414	add	eax,96
415	jz	NEAR L$012ecb_ret
416L$014ecb_enc_tail:
417	movups	xmm2,[esi]
418	cmp	eax,32
419	jb	NEAR L$017ecb_enc_one
420	movups	xmm3,[16+esi]
421	je	NEAR L$018ecb_enc_two
422	movups	xmm4,[32+esi]
423	cmp	eax,64
424	jb	NEAR L$019ecb_enc_three
425	movups	xmm5,[48+esi]
426	je	NEAR L$020ecb_enc_four
427	movups	xmm6,[64+esi]
428	xorps	xmm7,xmm7
429	call	__aesni_encrypt6
430	movups	[edi],xmm2
431	movups	[16+edi],xmm3
432	movups	[32+edi],xmm4
433	movups	[48+edi],xmm5
434	movups	[64+edi],xmm6
435	jmp	NEAR L$012ecb_ret
436align	16
437L$017ecb_enc_one:
438	movups	xmm0,[edx]
439	movups	xmm1,[16+edx]
440	lea	edx,[32+edx]
441	xorps	xmm2,xmm0
442L$021enc1_loop_3:
443db	102,15,56,220,209
444	dec	ecx
445	movups	xmm1,[edx]
446	lea	edx,[16+edx]
447	jnz	NEAR L$021enc1_loop_3
448db	102,15,56,221,209
449	movups	[edi],xmm2
450	jmp	NEAR L$012ecb_ret
451align	16
452L$018ecb_enc_two:
453	call	__aesni_encrypt2
454	movups	[edi],xmm2
455	movups	[16+edi],xmm3
456	jmp	NEAR L$012ecb_ret
457align	16
458L$019ecb_enc_three:
459	call	__aesni_encrypt3
460	movups	[edi],xmm2
461	movups	[16+edi],xmm3
462	movups	[32+edi],xmm4
463	jmp	NEAR L$012ecb_ret
464align	16
465L$020ecb_enc_four:
466	call	__aesni_encrypt4
467	movups	[edi],xmm2
468	movups	[16+edi],xmm3
469	movups	[32+edi],xmm4
470	movups	[48+edi],xmm5
471	jmp	NEAR L$012ecb_ret
472align	16
473L$013ecb_decrypt:
474	mov	ebp,edx
475	mov	ebx,ecx
476	cmp	eax,96
477	jb	NEAR L$022ecb_dec_tail
478	movdqu	xmm2,[esi]
479	movdqu	xmm3,[16+esi]
480	movdqu	xmm4,[32+esi]
481	movdqu	xmm5,[48+esi]
482	movdqu	xmm6,[64+esi]
483	movdqu	xmm7,[80+esi]
484	lea	esi,[96+esi]
485	sub	eax,96
486	jmp	NEAR L$023ecb_dec_loop6_enter
487align	16
488L$024ecb_dec_loop6:
489	movups	[edi],xmm2
490	movdqu	xmm2,[esi]
491	movups	[16+edi],xmm3
492	movdqu	xmm3,[16+esi]
493	movups	[32+edi],xmm4
494	movdqu	xmm4,[32+esi]
495	movups	[48+edi],xmm5
496	movdqu	xmm5,[48+esi]
497	movups	[64+edi],xmm6
498	movdqu	xmm6,[64+esi]
499	movups	[80+edi],xmm7
500	lea	edi,[96+edi]
501	movdqu	xmm7,[80+esi]
502	lea	esi,[96+esi]
503L$023ecb_dec_loop6_enter:
504	call	__aesni_decrypt6
505	mov	edx,ebp
506	mov	ecx,ebx
507	sub	eax,96
508	jnc	NEAR L$024ecb_dec_loop6
509	movups	[edi],xmm2
510	movups	[16+edi],xmm3
511	movups	[32+edi],xmm4
512	movups	[48+edi],xmm5
513	movups	[64+edi],xmm6
514	movups	[80+edi],xmm7
515	lea	edi,[96+edi]
516	add	eax,96
517	jz	NEAR L$012ecb_ret
518L$022ecb_dec_tail:
519	movups	xmm2,[esi]
520	cmp	eax,32
521	jb	NEAR L$025ecb_dec_one
522	movups	xmm3,[16+esi]
523	je	NEAR L$026ecb_dec_two
524	movups	xmm4,[32+esi]
525	cmp	eax,64
526	jb	NEAR L$027ecb_dec_three
527	movups	xmm5,[48+esi]
528	je	NEAR L$028ecb_dec_four
529	movups	xmm6,[64+esi]
530	xorps	xmm7,xmm7
531	call	__aesni_decrypt6
532	movups	[edi],xmm2
533	movups	[16+edi],xmm3
534	movups	[32+edi],xmm4
535	movups	[48+edi],xmm5
536	movups	[64+edi],xmm6
537	jmp	NEAR L$012ecb_ret
538align	16
539L$025ecb_dec_one:
540	movups	xmm0,[edx]
541	movups	xmm1,[16+edx]
542	lea	edx,[32+edx]
543	xorps	xmm2,xmm0
544L$029dec1_loop_4:
545db	102,15,56,222,209
546	dec	ecx
547	movups	xmm1,[edx]
548	lea	edx,[16+edx]
549	jnz	NEAR L$029dec1_loop_4
550db	102,15,56,223,209
551	movups	[edi],xmm2
552	jmp	NEAR L$012ecb_ret
553align	16
554L$026ecb_dec_two:
555	call	__aesni_decrypt2
556	movups	[edi],xmm2
557	movups	[16+edi],xmm3
558	jmp	NEAR L$012ecb_ret
559align	16
560L$027ecb_dec_three:
561	call	__aesni_decrypt3
562	movups	[edi],xmm2
563	movups	[16+edi],xmm3
564	movups	[32+edi],xmm4
565	jmp	NEAR L$012ecb_ret
566align	16
567L$028ecb_dec_four:
568	call	__aesni_decrypt4
569	movups	[edi],xmm2
570	movups	[16+edi],xmm3
571	movups	[32+edi],xmm4
572	movups	[48+edi],xmm5
573L$012ecb_ret:
574	pxor	xmm0,xmm0
575	pxor	xmm1,xmm1
576	pxor	xmm2,xmm2
577	pxor	xmm3,xmm3
578	pxor	xmm4,xmm4
579	pxor	xmm5,xmm5
580	pxor	xmm6,xmm6
581	pxor	xmm7,xmm7
582	pop	edi
583	pop	esi
584	pop	ebx
585	pop	ebp
586	ret
587global	_aesni_ccm64_encrypt_blocks
588align	16
589_aesni_ccm64_encrypt_blocks:
590L$_aesni_ccm64_encrypt_blocks_begin:
591	push	ebp
592	push	ebx
593	push	esi
594	push	edi
595	mov	esi,DWORD [20+esp]
596	mov	edi,DWORD [24+esp]
597	mov	eax,DWORD [28+esp]
598	mov	edx,DWORD [32+esp]
599	mov	ebx,DWORD [36+esp]
600	mov	ecx,DWORD [40+esp]
601	mov	ebp,esp
602	sub	esp,60
603	and	esp,-16
604	mov	DWORD [48+esp],ebp
605	movdqu	xmm7,[ebx]
606	movdqu	xmm3,[ecx]
607	mov	ecx,DWORD [240+edx]
608	mov	DWORD [esp],202182159
609	mov	DWORD [4+esp],134810123
610	mov	DWORD [8+esp],67438087
611	mov	DWORD [12+esp],66051
612	mov	ebx,1
613	xor	ebp,ebp
614	mov	DWORD [16+esp],ebx
615	mov	DWORD [20+esp],ebp
616	mov	DWORD [24+esp],ebp
617	mov	DWORD [28+esp],ebp
618	shl	ecx,4
619	mov	ebx,16
620	lea	ebp,[edx]
621	movdqa	xmm5,[esp]
622	movdqa	xmm2,xmm7
623	lea	edx,[32+ecx*1+edx]
624	sub	ebx,ecx
625db	102,15,56,0,253
626L$030ccm64_enc_outer:
627	movups	xmm0,[ebp]
628	mov	ecx,ebx
629	movups	xmm6,[esi]
630	xorps	xmm2,xmm0
631	movups	xmm1,[16+ebp]
632	xorps	xmm0,xmm6
633	xorps	xmm3,xmm0
634	movups	xmm0,[32+ebp]
635L$031ccm64_enc2_loop:
636db	102,15,56,220,209
637db	102,15,56,220,217
638	movups	xmm1,[ecx*1+edx]
639	add	ecx,32
640db	102,15,56,220,208
641db	102,15,56,220,216
642	movups	xmm0,[ecx*1+edx-16]
643	jnz	NEAR L$031ccm64_enc2_loop
644db	102,15,56,220,209
645db	102,15,56,220,217
646	paddq	xmm7,[16+esp]
647	dec	eax
648db	102,15,56,221,208
649db	102,15,56,221,216
650	lea	esi,[16+esi]
651	xorps	xmm6,xmm2
652	movdqa	xmm2,xmm7
653	movups	[edi],xmm6
654db	102,15,56,0,213
655	lea	edi,[16+edi]
656	jnz	NEAR L$030ccm64_enc_outer
657	mov	esp,DWORD [48+esp]
658	mov	edi,DWORD [40+esp]
659	movups	[edi],xmm3
660	pxor	xmm0,xmm0
661	pxor	xmm1,xmm1
662	pxor	xmm2,xmm2
663	pxor	xmm3,xmm3
664	pxor	xmm4,xmm4
665	pxor	xmm5,xmm5
666	pxor	xmm6,xmm6
667	pxor	xmm7,xmm7
668	pop	edi
669	pop	esi
670	pop	ebx
671	pop	ebp
672	ret
673global	_aesni_ccm64_decrypt_blocks
674align	16
675_aesni_ccm64_decrypt_blocks:
676L$_aesni_ccm64_decrypt_blocks_begin:
677	push	ebp
678	push	ebx
679	push	esi
680	push	edi
681	mov	esi,DWORD [20+esp]
682	mov	edi,DWORD [24+esp]
683	mov	eax,DWORD [28+esp]
684	mov	edx,DWORD [32+esp]
685	mov	ebx,DWORD [36+esp]
686	mov	ecx,DWORD [40+esp]
687	mov	ebp,esp
688	sub	esp,60
689	and	esp,-16
690	mov	DWORD [48+esp],ebp
691	movdqu	xmm7,[ebx]
692	movdqu	xmm3,[ecx]
693	mov	ecx,DWORD [240+edx]
694	mov	DWORD [esp],202182159
695	mov	DWORD [4+esp],134810123
696	mov	DWORD [8+esp],67438087
697	mov	DWORD [12+esp],66051
698	mov	ebx,1
699	xor	ebp,ebp
700	mov	DWORD [16+esp],ebx
701	mov	DWORD [20+esp],ebp
702	mov	DWORD [24+esp],ebp
703	mov	DWORD [28+esp],ebp
704	movdqa	xmm5,[esp]
705	movdqa	xmm2,xmm7
706	mov	ebp,edx
707	mov	ebx,ecx
708db	102,15,56,0,253
709	movups	xmm0,[edx]
710	movups	xmm1,[16+edx]
711	lea	edx,[32+edx]
712	xorps	xmm2,xmm0
713L$032enc1_loop_5:
714db	102,15,56,220,209
715	dec	ecx
716	movups	xmm1,[edx]
717	lea	edx,[16+edx]
718	jnz	NEAR L$032enc1_loop_5
719db	102,15,56,221,209
720	shl	ebx,4
721	mov	ecx,16
722	movups	xmm6,[esi]
723	paddq	xmm7,[16+esp]
724	lea	esi,[16+esi]
725	sub	ecx,ebx
726	lea	edx,[32+ebx*1+ebp]
727	mov	ebx,ecx
728	jmp	NEAR L$033ccm64_dec_outer
729align	16
730L$033ccm64_dec_outer:
731	xorps	xmm6,xmm2
732	movdqa	xmm2,xmm7
733	movups	[edi],xmm6
734	lea	edi,[16+edi]
735db	102,15,56,0,213
736	sub	eax,1
737	jz	NEAR L$034ccm64_dec_break
738	movups	xmm0,[ebp]
739	mov	ecx,ebx
740	movups	xmm1,[16+ebp]
741	xorps	xmm6,xmm0
742	xorps	xmm2,xmm0
743	xorps	xmm3,xmm6
744	movups	xmm0,[32+ebp]
745L$035ccm64_dec2_loop:
746db	102,15,56,220,209
747db	102,15,56,220,217
748	movups	xmm1,[ecx*1+edx]
749	add	ecx,32
750db	102,15,56,220,208
751db	102,15,56,220,216
752	movups	xmm0,[ecx*1+edx-16]
753	jnz	NEAR L$035ccm64_dec2_loop
754	movups	xmm6,[esi]
755	paddq	xmm7,[16+esp]
756db	102,15,56,220,209
757db	102,15,56,220,217
758db	102,15,56,221,208
759db	102,15,56,221,216
760	lea	esi,[16+esi]
761	jmp	NEAR L$033ccm64_dec_outer
762align	16
763L$034ccm64_dec_break:
764	mov	ecx,DWORD [240+ebp]
765	mov	edx,ebp
766	movups	xmm0,[edx]
767	movups	xmm1,[16+edx]
768	xorps	xmm6,xmm0
769	lea	edx,[32+edx]
770	xorps	xmm3,xmm6
771L$036enc1_loop_6:
772db	102,15,56,220,217
773	dec	ecx
774	movups	xmm1,[edx]
775	lea	edx,[16+edx]
776	jnz	NEAR L$036enc1_loop_6
777db	102,15,56,221,217
778	mov	esp,DWORD [48+esp]
779	mov	edi,DWORD [40+esp]
780	movups	[edi],xmm3
781	pxor	xmm0,xmm0
782	pxor	xmm1,xmm1
783	pxor	xmm2,xmm2
784	pxor	xmm3,xmm3
785	pxor	xmm4,xmm4
786	pxor	xmm5,xmm5
787	pxor	xmm6,xmm6
788	pxor	xmm7,xmm7
789	pop	edi
790	pop	esi
791	pop	ebx
792	pop	ebp
793	ret
794global	_aesni_ctr32_encrypt_blocks
795align	16
796_aesni_ctr32_encrypt_blocks:
797L$_aesni_ctr32_encrypt_blocks_begin:
798	push	ebp
799	push	ebx
800	push	esi
801	push	edi
802	mov	esi,DWORD [20+esp]
803	mov	edi,DWORD [24+esp]
804	mov	eax,DWORD [28+esp]
805	mov	edx,DWORD [32+esp]
806	mov	ebx,DWORD [36+esp]
807	mov	ebp,esp
808	sub	esp,88
809	and	esp,-16
810	mov	DWORD [80+esp],ebp
811	cmp	eax,1
812	je	NEAR L$037ctr32_one_shortcut
813	movdqu	xmm7,[ebx]
814	mov	DWORD [esp],202182159
815	mov	DWORD [4+esp],134810123
816	mov	DWORD [8+esp],67438087
817	mov	DWORD [12+esp],66051
818	mov	ecx,6
819	xor	ebp,ebp
820	mov	DWORD [16+esp],ecx
821	mov	DWORD [20+esp],ecx
822	mov	DWORD [24+esp],ecx
823	mov	DWORD [28+esp],ebp
824db	102,15,58,22,251,3
825db	102,15,58,34,253,3
826	mov	ecx,DWORD [240+edx]
827	bswap	ebx
828	pxor	xmm0,xmm0
829	pxor	xmm1,xmm1
830	movdqa	xmm2,[esp]
831db	102,15,58,34,195,0
832	lea	ebp,[3+ebx]
833db	102,15,58,34,205,0
834	inc	ebx
835db	102,15,58,34,195,1
836	inc	ebp
837db	102,15,58,34,205,1
838	inc	ebx
839db	102,15,58,34,195,2
840	inc	ebp
841db	102,15,58,34,205,2
842	movdqa	[48+esp],xmm0
843db	102,15,56,0,194
844	movdqu	xmm6,[edx]
845	movdqa	[64+esp],xmm1
846db	102,15,56,0,202
847	pshufd	xmm2,xmm0,192
848	pshufd	xmm3,xmm0,128
849	cmp	eax,6
850	jb	NEAR L$038ctr32_tail
851	pxor	xmm7,xmm6
852	shl	ecx,4
853	mov	ebx,16
854	movdqa	[32+esp],xmm7
855	mov	ebp,edx
856	sub	ebx,ecx
857	lea	edx,[32+ecx*1+edx]
858	sub	eax,6
859	jmp	NEAR L$039ctr32_loop6
860align	16
861L$039ctr32_loop6:
862	pshufd	xmm4,xmm0,64
863	movdqa	xmm0,[32+esp]
864	pshufd	xmm5,xmm1,192
865	pxor	xmm2,xmm0
866	pshufd	xmm6,xmm1,128
867	pxor	xmm3,xmm0
868	pshufd	xmm7,xmm1,64
869	movups	xmm1,[16+ebp]
870	pxor	xmm4,xmm0
871	pxor	xmm5,xmm0
872db	102,15,56,220,209
873	pxor	xmm6,xmm0
874	pxor	xmm7,xmm0
875db	102,15,56,220,217
876	movups	xmm0,[32+ebp]
877	mov	ecx,ebx
878db	102,15,56,220,225
879db	102,15,56,220,233
880db	102,15,56,220,241
881db	102,15,56,220,249
882	call	L$_aesni_encrypt6_enter
883	movups	xmm1,[esi]
884	movups	xmm0,[16+esi]
885	xorps	xmm2,xmm1
886	movups	xmm1,[32+esi]
887	xorps	xmm3,xmm0
888	movups	[edi],xmm2
889	movdqa	xmm0,[16+esp]
890	xorps	xmm4,xmm1
891	movdqa	xmm1,[64+esp]
892	movups	[16+edi],xmm3
893	movups	[32+edi],xmm4
894	paddd	xmm1,xmm0
895	paddd	xmm0,[48+esp]
896	movdqa	xmm2,[esp]
897	movups	xmm3,[48+esi]
898	movups	xmm4,[64+esi]
899	xorps	xmm5,xmm3
900	movups	xmm3,[80+esi]
901	lea	esi,[96+esi]
902	movdqa	[48+esp],xmm0
903db	102,15,56,0,194
904	xorps	xmm6,xmm4
905	movups	[48+edi],xmm5
906	xorps	xmm7,xmm3
907	movdqa	[64+esp],xmm1
908db	102,15,56,0,202
909	movups	[64+edi],xmm6
910	pshufd	xmm2,xmm0,192
911	movups	[80+edi],xmm7
912	lea	edi,[96+edi]
913	pshufd	xmm3,xmm0,128
914	sub	eax,6
915	jnc	NEAR L$039ctr32_loop6
916	add	eax,6
917	jz	NEAR L$040ctr32_ret
918	movdqu	xmm7,[ebp]
919	mov	edx,ebp
920	pxor	xmm7,[32+esp]
921	mov	ecx,DWORD [240+ebp]
922L$038ctr32_tail:
923	por	xmm2,xmm7
924	cmp	eax,2
925	jb	NEAR L$041ctr32_one
926	pshufd	xmm4,xmm0,64
927	por	xmm3,xmm7
928	je	NEAR L$042ctr32_two
929	pshufd	xmm5,xmm1,192
930	por	xmm4,xmm7
931	cmp	eax,4
932	jb	NEAR L$043ctr32_three
933	pshufd	xmm6,xmm1,128
934	por	xmm5,xmm7
935	je	NEAR L$044ctr32_four
936	por	xmm6,xmm7
937	call	__aesni_encrypt6
938	movups	xmm1,[esi]
939	movups	xmm0,[16+esi]
940	xorps	xmm2,xmm1
941	movups	xmm1,[32+esi]
942	xorps	xmm3,xmm0
943	movups	xmm0,[48+esi]
944	xorps	xmm4,xmm1
945	movups	xmm1,[64+esi]
946	xorps	xmm5,xmm0
947	movups	[edi],xmm2
948	xorps	xmm6,xmm1
949	movups	[16+edi],xmm3
950	movups	[32+edi],xmm4
951	movups	[48+edi],xmm5
952	movups	[64+edi],xmm6
953	jmp	NEAR L$040ctr32_ret
954align	16
955L$037ctr32_one_shortcut:
956	movups	xmm2,[ebx]
957	mov	ecx,DWORD [240+edx]
958L$041ctr32_one:
959	movups	xmm0,[edx]
960	movups	xmm1,[16+edx]
961	lea	edx,[32+edx]
962	xorps	xmm2,xmm0
963L$045enc1_loop_7:
964db	102,15,56,220,209
965	dec	ecx
966	movups	xmm1,[edx]
967	lea	edx,[16+edx]
968	jnz	NEAR L$045enc1_loop_7
969db	102,15,56,221,209
970	movups	xmm6,[esi]
971	xorps	xmm6,xmm2
972	movups	[edi],xmm6
973	jmp	NEAR L$040ctr32_ret
974align	16
975L$042ctr32_two:
976	call	__aesni_encrypt2
977	movups	xmm5,[esi]
978	movups	xmm6,[16+esi]
979	xorps	xmm2,xmm5
980	xorps	xmm3,xmm6
981	movups	[edi],xmm2
982	movups	[16+edi],xmm3
983	jmp	NEAR L$040ctr32_ret
984align	16
985L$043ctr32_three:
986	call	__aesni_encrypt3
987	movups	xmm5,[esi]
988	movups	xmm6,[16+esi]
989	xorps	xmm2,xmm5
990	movups	xmm7,[32+esi]
991	xorps	xmm3,xmm6
992	movups	[edi],xmm2
993	xorps	xmm4,xmm7
994	movups	[16+edi],xmm3
995	movups	[32+edi],xmm4
996	jmp	NEAR L$040ctr32_ret
997align	16
998L$044ctr32_four:
999	call	__aesni_encrypt4
1000	movups	xmm6,[esi]
1001	movups	xmm7,[16+esi]
1002	movups	xmm1,[32+esi]
1003	xorps	xmm2,xmm6
1004	movups	xmm0,[48+esi]
1005	xorps	xmm3,xmm7
1006	movups	[edi],xmm2
1007	xorps	xmm4,xmm1
1008	movups	[16+edi],xmm3
1009	xorps	xmm5,xmm0
1010	movups	[32+edi],xmm4
1011	movups	[48+edi],xmm5
1012L$040ctr32_ret:
1013	pxor	xmm0,xmm0
1014	pxor	xmm1,xmm1
1015	pxor	xmm2,xmm2
1016	pxor	xmm3,xmm3
1017	pxor	xmm4,xmm4
1018	movdqa	[32+esp],xmm0
1019	pxor	xmm5,xmm5
1020	movdqa	[48+esp],xmm0
1021	pxor	xmm6,xmm6
1022	movdqa	[64+esp],xmm0
1023	pxor	xmm7,xmm7
1024	mov	esp,DWORD [80+esp]
1025	pop	edi
1026	pop	esi
1027	pop	ebx
1028	pop	ebp
1029	ret
1030global	_aesni_xts_encrypt
1031align	16
1032_aesni_xts_encrypt:
1033L$_aesni_xts_encrypt_begin:
1034	push	ebp
1035	push	ebx
1036	push	esi
1037	push	edi
1038	mov	edx,DWORD [36+esp]
1039	mov	esi,DWORD [40+esp]
1040	mov	ecx,DWORD [240+edx]
1041	movups	xmm2,[esi]
1042	movups	xmm0,[edx]
1043	movups	xmm1,[16+edx]
1044	lea	edx,[32+edx]
1045	xorps	xmm2,xmm0
1046L$046enc1_loop_8:
1047db	102,15,56,220,209
1048	dec	ecx
1049	movups	xmm1,[edx]
1050	lea	edx,[16+edx]
1051	jnz	NEAR L$046enc1_loop_8
1052db	102,15,56,221,209
1053	mov	esi,DWORD [20+esp]
1054	mov	edi,DWORD [24+esp]
1055	mov	eax,DWORD [28+esp]
1056	mov	edx,DWORD [32+esp]
1057	mov	ebp,esp
1058	sub	esp,120
1059	mov	ecx,DWORD [240+edx]
1060	and	esp,-16
1061	mov	DWORD [96+esp],135
1062	mov	DWORD [100+esp],0
1063	mov	DWORD [104+esp],1
1064	mov	DWORD [108+esp],0
1065	mov	DWORD [112+esp],eax
1066	mov	DWORD [116+esp],ebp
1067	movdqa	xmm1,xmm2
1068	pxor	xmm0,xmm0
1069	movdqa	xmm3,[96+esp]
1070	pcmpgtd	xmm0,xmm1
1071	and	eax,-16
1072	mov	ebp,edx
1073	mov	ebx,ecx
1074	sub	eax,96
1075	jc	NEAR L$047xts_enc_short
1076	shl	ecx,4
1077	mov	ebx,16
1078	sub	ebx,ecx
1079	lea	edx,[32+ecx*1+edx]
1080	jmp	NEAR L$048xts_enc_loop6
1081align	16
1082L$048xts_enc_loop6:
1083	pshufd	xmm2,xmm0,19
1084	pxor	xmm0,xmm0
1085	movdqa	[esp],xmm1
1086	paddq	xmm1,xmm1
1087	pand	xmm2,xmm3
1088	pcmpgtd	xmm0,xmm1
1089	pxor	xmm1,xmm2
1090	pshufd	xmm2,xmm0,19
1091	pxor	xmm0,xmm0
1092	movdqa	[16+esp],xmm1
1093	paddq	xmm1,xmm1
1094	pand	xmm2,xmm3
1095	pcmpgtd	xmm0,xmm1
1096	pxor	xmm1,xmm2
1097	pshufd	xmm2,xmm0,19
1098	pxor	xmm0,xmm0
1099	movdqa	[32+esp],xmm1
1100	paddq	xmm1,xmm1
1101	pand	xmm2,xmm3
1102	pcmpgtd	xmm0,xmm1
1103	pxor	xmm1,xmm2
1104	pshufd	xmm2,xmm0,19
1105	pxor	xmm0,xmm0
1106	movdqa	[48+esp],xmm1
1107	paddq	xmm1,xmm1
1108	pand	xmm2,xmm3
1109	pcmpgtd	xmm0,xmm1
1110	pxor	xmm1,xmm2
1111	pshufd	xmm7,xmm0,19
1112	movdqa	[64+esp],xmm1
1113	paddq	xmm1,xmm1
1114	movups	xmm0,[ebp]
1115	pand	xmm7,xmm3
1116	movups	xmm2,[esi]
1117	pxor	xmm7,xmm1
1118	mov	ecx,ebx
1119	movdqu	xmm3,[16+esi]
1120	xorps	xmm2,xmm0
1121	movdqu	xmm4,[32+esi]
1122	pxor	xmm3,xmm0
1123	movdqu	xmm5,[48+esi]
1124	pxor	xmm4,xmm0
1125	movdqu	xmm6,[64+esi]
1126	pxor	xmm5,xmm0
1127	movdqu	xmm1,[80+esi]
1128	pxor	xmm6,xmm0
1129	lea	esi,[96+esi]
1130	pxor	xmm2,[esp]
1131	movdqa	[80+esp],xmm7
1132	pxor	xmm7,xmm1
1133	movups	xmm1,[16+ebp]
1134	pxor	xmm3,[16+esp]
1135	pxor	xmm4,[32+esp]
1136db	102,15,56,220,209
1137	pxor	xmm5,[48+esp]
1138	pxor	xmm6,[64+esp]
1139db	102,15,56,220,217
1140	pxor	xmm7,xmm0
1141	movups	xmm0,[32+ebp]
1142db	102,15,56,220,225
1143db	102,15,56,220,233
1144db	102,15,56,220,241
1145db	102,15,56,220,249
1146	call	L$_aesni_encrypt6_enter
1147	movdqa	xmm1,[80+esp]
1148	pxor	xmm0,xmm0
1149	xorps	xmm2,[esp]
1150	pcmpgtd	xmm0,xmm1
1151	xorps	xmm3,[16+esp]
1152	movups	[edi],xmm2
1153	xorps	xmm4,[32+esp]
1154	movups	[16+edi],xmm3
1155	xorps	xmm5,[48+esp]
1156	movups	[32+edi],xmm4
1157	xorps	xmm6,[64+esp]
1158	movups	[48+edi],xmm5
1159	xorps	xmm7,xmm1
1160	movups	[64+edi],xmm6
1161	pshufd	xmm2,xmm0,19
1162	movups	[80+edi],xmm7
1163	lea	edi,[96+edi]
1164	movdqa	xmm3,[96+esp]
1165	pxor	xmm0,xmm0
1166	paddq	xmm1,xmm1
1167	pand	xmm2,xmm3
1168	pcmpgtd	xmm0,xmm1
1169	pxor	xmm1,xmm2
1170	sub	eax,96
1171	jnc	NEAR L$048xts_enc_loop6
1172	mov	ecx,DWORD [240+ebp]
1173	mov	edx,ebp
1174	mov	ebx,ecx
1175L$047xts_enc_short:
1176	add	eax,96
1177	jz	NEAR L$049xts_enc_done6x
1178	movdqa	xmm5,xmm1
1179	cmp	eax,32
1180	jb	NEAR L$050xts_enc_one
1181	pshufd	xmm2,xmm0,19
1182	pxor	xmm0,xmm0
1183	paddq	xmm1,xmm1
1184	pand	xmm2,xmm3
1185	pcmpgtd	xmm0,xmm1
1186	pxor	xmm1,xmm2
1187	je	NEAR L$051xts_enc_two
1188	pshufd	xmm2,xmm0,19
1189	pxor	xmm0,xmm0
1190	movdqa	xmm6,xmm1
1191	paddq	xmm1,xmm1
1192	pand	xmm2,xmm3
1193	pcmpgtd	xmm0,xmm1
1194	pxor	xmm1,xmm2
1195	cmp	eax,64
1196	jb	NEAR L$052xts_enc_three
1197	pshufd	xmm2,xmm0,19
1198	pxor	xmm0,xmm0
1199	movdqa	xmm7,xmm1
1200	paddq	xmm1,xmm1
1201	pand	xmm2,xmm3
1202	pcmpgtd	xmm0,xmm1
1203	pxor	xmm1,xmm2
1204	movdqa	[esp],xmm5
1205	movdqa	[16+esp],xmm6
1206	je	NEAR L$053xts_enc_four
1207	movdqa	[32+esp],xmm7
1208	pshufd	xmm7,xmm0,19
1209	movdqa	[48+esp],xmm1
1210	paddq	xmm1,xmm1
1211	pand	xmm7,xmm3
1212	pxor	xmm7,xmm1
1213	movdqu	xmm2,[esi]
1214	movdqu	xmm3,[16+esi]
1215	movdqu	xmm4,[32+esi]
1216	pxor	xmm2,[esp]
1217	movdqu	xmm5,[48+esi]
1218	pxor	xmm3,[16+esp]
1219	movdqu	xmm6,[64+esi]
1220	pxor	xmm4,[32+esp]
1221	lea	esi,[80+esi]
1222	pxor	xmm5,[48+esp]
1223	movdqa	[64+esp],xmm7
1224	pxor	xmm6,xmm7
1225	call	__aesni_encrypt6
1226	movaps	xmm1,[64+esp]
1227	xorps	xmm2,[esp]
1228	xorps	xmm3,[16+esp]
1229	xorps	xmm4,[32+esp]
1230	movups	[edi],xmm2
1231	xorps	xmm5,[48+esp]
1232	movups	[16+edi],xmm3
1233	xorps	xmm6,xmm1
1234	movups	[32+edi],xmm4
1235	movups	[48+edi],xmm5
1236	movups	[64+edi],xmm6
1237	lea	edi,[80+edi]
1238	jmp	NEAR L$054xts_enc_done
1239align	16
1240L$050xts_enc_one:
1241	movups	xmm2,[esi]
1242	lea	esi,[16+esi]
1243	xorps	xmm2,xmm5
1244	movups	xmm0,[edx]
1245	movups	xmm1,[16+edx]
1246	lea	edx,[32+edx]
1247	xorps	xmm2,xmm0
1248L$055enc1_loop_9:
1249db	102,15,56,220,209
1250	dec	ecx
1251	movups	xmm1,[edx]
1252	lea	edx,[16+edx]
1253	jnz	NEAR L$055enc1_loop_9
1254db	102,15,56,221,209
1255	xorps	xmm2,xmm5
1256	movups	[edi],xmm2
1257	lea	edi,[16+edi]
1258	movdqa	xmm1,xmm5
1259	jmp	NEAR L$054xts_enc_done
1260align	16
1261L$051xts_enc_two:
1262	movaps	xmm6,xmm1
1263	movups	xmm2,[esi]
1264	movups	xmm3,[16+esi]
1265	lea	esi,[32+esi]
1266	xorps	xmm2,xmm5
1267	xorps	xmm3,xmm6
1268	call	__aesni_encrypt2
1269	xorps	xmm2,xmm5
1270	xorps	xmm3,xmm6
1271	movups	[edi],xmm2
1272	movups	[16+edi],xmm3
1273	lea	edi,[32+edi]
1274	movdqa	xmm1,xmm6
1275	jmp	NEAR L$054xts_enc_done
1276align	16
1277L$052xts_enc_three:
1278	movaps	xmm7,xmm1
1279	movups	xmm2,[esi]
1280	movups	xmm3,[16+esi]
1281	movups	xmm4,[32+esi]
1282	lea	esi,[48+esi]
1283	xorps	xmm2,xmm5
1284	xorps	xmm3,xmm6
1285	xorps	xmm4,xmm7
1286	call	__aesni_encrypt3
1287	xorps	xmm2,xmm5
1288	xorps	xmm3,xmm6
1289	xorps	xmm4,xmm7
1290	movups	[edi],xmm2
1291	movups	[16+edi],xmm3
1292	movups	[32+edi],xmm4
1293	lea	edi,[48+edi]
1294	movdqa	xmm1,xmm7
1295	jmp	NEAR L$054xts_enc_done
1296align	16
1297L$053xts_enc_four:
1298	movaps	xmm6,xmm1
1299	movups	xmm2,[esi]
1300	movups	xmm3,[16+esi]
1301	movups	xmm4,[32+esi]
1302	xorps	xmm2,[esp]
1303	movups	xmm5,[48+esi]
1304	lea	esi,[64+esi]
1305	xorps	xmm3,[16+esp]
1306	xorps	xmm4,xmm7
1307	xorps	xmm5,xmm6
1308	call	__aesni_encrypt4
1309	xorps	xmm2,[esp]
1310	xorps	xmm3,[16+esp]
1311	xorps	xmm4,xmm7
1312	movups	[edi],xmm2
1313	xorps	xmm5,xmm6
1314	movups	[16+edi],xmm3
1315	movups	[32+edi],xmm4
1316	movups	[48+edi],xmm5
1317	lea	edi,[64+edi]
1318	movdqa	xmm1,xmm6
1319	jmp	NEAR L$054xts_enc_done
1320align	16
1321L$049xts_enc_done6x:
1322	mov	eax,DWORD [112+esp]
1323	and	eax,15
1324	jz	NEAR L$056xts_enc_ret
1325	movdqa	xmm5,xmm1
1326	mov	DWORD [112+esp],eax
1327	jmp	NEAR L$057xts_enc_steal
1328align	16
1329L$054xts_enc_done:
1330	mov	eax,DWORD [112+esp]
1331	pxor	xmm0,xmm0
1332	and	eax,15
1333	jz	NEAR L$056xts_enc_ret
1334	pcmpgtd	xmm0,xmm1
1335	mov	DWORD [112+esp],eax
1336	pshufd	xmm5,xmm0,19
1337	paddq	xmm1,xmm1
1338	pand	xmm5,[96+esp]
1339	pxor	xmm5,xmm1
1340L$057xts_enc_steal:
1341	movzx	ecx,BYTE [esi]
1342	movzx	edx,BYTE [edi-16]
1343	lea	esi,[1+esi]
1344	mov	BYTE [edi-16],cl
1345	mov	BYTE [edi],dl
1346	lea	edi,[1+edi]
1347	sub	eax,1
1348	jnz	NEAR L$057xts_enc_steal
1349	sub	edi,DWORD [112+esp]
1350	mov	edx,ebp
1351	mov	ecx,ebx
1352	movups	xmm2,[edi-16]
1353	xorps	xmm2,xmm5
1354	movups	xmm0,[edx]
1355	movups	xmm1,[16+edx]
1356	lea	edx,[32+edx]
1357	xorps	xmm2,xmm0
1358L$058enc1_loop_10:
1359db	102,15,56,220,209
1360	dec	ecx
1361	movups	xmm1,[edx]
1362	lea	edx,[16+edx]
1363	jnz	NEAR L$058enc1_loop_10
1364db	102,15,56,221,209
1365	xorps	xmm2,xmm5
1366	movups	[edi-16],xmm2
1367L$056xts_enc_ret:
1368	pxor	xmm0,xmm0
1369	pxor	xmm1,xmm1
1370	pxor	xmm2,xmm2
1371	movdqa	[esp],xmm0
1372	pxor	xmm3,xmm3
1373	movdqa	[16+esp],xmm0
1374	pxor	xmm4,xmm4
1375	movdqa	[32+esp],xmm0
1376	pxor	xmm5,xmm5
1377	movdqa	[48+esp],xmm0
1378	pxor	xmm6,xmm6
1379	movdqa	[64+esp],xmm0
1380	pxor	xmm7,xmm7
1381	movdqa	[80+esp],xmm0
1382	mov	esp,DWORD [116+esp]
1383	pop	edi
1384	pop	esi
1385	pop	ebx
1386	pop	ebp
1387	ret
1388global	_aesni_xts_decrypt
1389align	16
1390_aesni_xts_decrypt:
1391L$_aesni_xts_decrypt_begin:
1392	push	ebp
1393	push	ebx
1394	push	esi
1395	push	edi
1396	mov	edx,DWORD [36+esp]
1397	mov	esi,DWORD [40+esp]
1398	mov	ecx,DWORD [240+edx]
1399	movups	xmm2,[esi]
1400	movups	xmm0,[edx]
1401	movups	xmm1,[16+edx]
1402	lea	edx,[32+edx]
1403	xorps	xmm2,xmm0
1404L$059enc1_loop_11:
1405db	102,15,56,220,209
1406	dec	ecx
1407	movups	xmm1,[edx]
1408	lea	edx,[16+edx]
1409	jnz	NEAR L$059enc1_loop_11
1410db	102,15,56,221,209
1411	mov	esi,DWORD [20+esp]
1412	mov	edi,DWORD [24+esp]
1413	mov	eax,DWORD [28+esp]
1414	mov	edx,DWORD [32+esp]
1415	mov	ebp,esp
1416	sub	esp,120
1417	and	esp,-16
1418	xor	ebx,ebx
1419	test	eax,15
1420	setnz	bl
1421	shl	ebx,4
1422	sub	eax,ebx
1423	mov	DWORD [96+esp],135
1424	mov	DWORD [100+esp],0
1425	mov	DWORD [104+esp],1
1426	mov	DWORD [108+esp],0
1427	mov	DWORD [112+esp],eax
1428	mov	DWORD [116+esp],ebp
1429	mov	ecx,DWORD [240+edx]
1430	mov	ebp,edx
1431	mov	ebx,ecx
1432	movdqa	xmm1,xmm2
1433	pxor	xmm0,xmm0
1434	movdqa	xmm3,[96+esp]
1435	pcmpgtd	xmm0,xmm1
1436	and	eax,-16
1437	sub	eax,96
1438	jc	NEAR L$060xts_dec_short
1439	shl	ecx,4
1440	mov	ebx,16
1441	sub	ebx,ecx
1442	lea	edx,[32+ecx*1+edx]
1443	jmp	NEAR L$061xts_dec_loop6
1444align	16
1445L$061xts_dec_loop6:
1446	pshufd	xmm2,xmm0,19
1447	pxor	xmm0,xmm0
1448	movdqa	[esp],xmm1
1449	paddq	xmm1,xmm1
1450	pand	xmm2,xmm3
1451	pcmpgtd	xmm0,xmm1
1452	pxor	xmm1,xmm2
1453	pshufd	xmm2,xmm0,19
1454	pxor	xmm0,xmm0
1455	movdqa	[16+esp],xmm1
1456	paddq	xmm1,xmm1
1457	pand	xmm2,xmm3
1458	pcmpgtd	xmm0,xmm1
1459	pxor	xmm1,xmm2
1460	pshufd	xmm2,xmm0,19
1461	pxor	xmm0,xmm0
1462	movdqa	[32+esp],xmm1
1463	paddq	xmm1,xmm1
1464	pand	xmm2,xmm3
1465	pcmpgtd	xmm0,xmm1
1466	pxor	xmm1,xmm2
1467	pshufd	xmm2,xmm0,19
1468	pxor	xmm0,xmm0
1469	movdqa	[48+esp],xmm1
1470	paddq	xmm1,xmm1
1471	pand	xmm2,xmm3
1472	pcmpgtd	xmm0,xmm1
1473	pxor	xmm1,xmm2
1474	pshufd	xmm7,xmm0,19
1475	movdqa	[64+esp],xmm1
1476	paddq	xmm1,xmm1
1477	movups	xmm0,[ebp]
1478	pand	xmm7,xmm3
1479	movups	xmm2,[esi]
1480	pxor	xmm7,xmm1
1481	mov	ecx,ebx
1482	movdqu	xmm3,[16+esi]
1483	xorps	xmm2,xmm0
1484	movdqu	xmm4,[32+esi]
1485	pxor	xmm3,xmm0
1486	movdqu	xmm5,[48+esi]
1487	pxor	xmm4,xmm0
1488	movdqu	xmm6,[64+esi]
1489	pxor	xmm5,xmm0
1490	movdqu	xmm1,[80+esi]
1491	pxor	xmm6,xmm0
1492	lea	esi,[96+esi]
1493	pxor	xmm2,[esp]
1494	movdqa	[80+esp],xmm7
1495	pxor	xmm7,xmm1
1496	movups	xmm1,[16+ebp]
1497	pxor	xmm3,[16+esp]
1498	pxor	xmm4,[32+esp]
1499db	102,15,56,222,209
1500	pxor	xmm5,[48+esp]
1501	pxor	xmm6,[64+esp]
1502db	102,15,56,222,217
1503	pxor	xmm7,xmm0
1504	movups	xmm0,[32+ebp]
1505db	102,15,56,222,225
1506db	102,15,56,222,233
1507db	102,15,56,222,241
1508db	102,15,56,222,249
1509	call	L$_aesni_decrypt6_enter
1510	movdqa	xmm1,[80+esp]
1511	pxor	xmm0,xmm0
1512	xorps	xmm2,[esp]
1513	pcmpgtd	xmm0,xmm1
1514	xorps	xmm3,[16+esp]
1515	movups	[edi],xmm2
1516	xorps	xmm4,[32+esp]
1517	movups	[16+edi],xmm3
1518	xorps	xmm5,[48+esp]
1519	movups	[32+edi],xmm4
1520	xorps	xmm6,[64+esp]
1521	movups	[48+edi],xmm5
1522	xorps	xmm7,xmm1
1523	movups	[64+edi],xmm6
1524	pshufd	xmm2,xmm0,19
1525	movups	[80+edi],xmm7
1526	lea	edi,[96+edi]
1527	movdqa	xmm3,[96+esp]
1528	pxor	xmm0,xmm0
1529	paddq	xmm1,xmm1
1530	pand	xmm2,xmm3
1531	pcmpgtd	xmm0,xmm1
1532	pxor	xmm1,xmm2
1533	sub	eax,96
1534	jnc	NEAR L$061xts_dec_loop6
1535	mov	ecx,DWORD [240+ebp]
1536	mov	edx,ebp
1537	mov	ebx,ecx
1538L$060xts_dec_short:
1539	add	eax,96
1540	jz	NEAR L$062xts_dec_done6x
1541	movdqa	xmm5,xmm1
1542	cmp	eax,32
1543	jb	NEAR L$063xts_dec_one
1544	pshufd	xmm2,xmm0,19
1545	pxor	xmm0,xmm0
1546	paddq	xmm1,xmm1
1547	pand	xmm2,xmm3
1548	pcmpgtd	xmm0,xmm1
1549	pxor	xmm1,xmm2
1550	je	NEAR L$064xts_dec_two
1551	pshufd	xmm2,xmm0,19
1552	pxor	xmm0,xmm0
1553	movdqa	xmm6,xmm1
1554	paddq	xmm1,xmm1
1555	pand	xmm2,xmm3
1556	pcmpgtd	xmm0,xmm1
1557	pxor	xmm1,xmm2
1558	cmp	eax,64
1559	jb	NEAR L$065xts_dec_three
1560	pshufd	xmm2,xmm0,19
1561	pxor	xmm0,xmm0
1562	movdqa	xmm7,xmm1
1563	paddq	xmm1,xmm1
1564	pand	xmm2,xmm3
1565	pcmpgtd	xmm0,xmm1
1566	pxor	xmm1,xmm2
1567	movdqa	[esp],xmm5
1568	movdqa	[16+esp],xmm6
1569	je	NEAR L$066xts_dec_four
1570	movdqa	[32+esp],xmm7
1571	pshufd	xmm7,xmm0,19
1572	movdqa	[48+esp],xmm1
1573	paddq	xmm1,xmm1
1574	pand	xmm7,xmm3
1575	pxor	xmm7,xmm1
1576	movdqu	xmm2,[esi]
1577	movdqu	xmm3,[16+esi]
1578	movdqu	xmm4,[32+esi]
1579	pxor	xmm2,[esp]
1580	movdqu	xmm5,[48+esi]
1581	pxor	xmm3,[16+esp]
1582	movdqu	xmm6,[64+esi]
1583	pxor	xmm4,[32+esp]
1584	lea	esi,[80+esi]
1585	pxor	xmm5,[48+esp]
1586	movdqa	[64+esp],xmm7
1587	pxor	xmm6,xmm7
1588	call	__aesni_decrypt6
1589	movaps	xmm1,[64+esp]
1590	xorps	xmm2,[esp]
1591	xorps	xmm3,[16+esp]
1592	xorps	xmm4,[32+esp]
1593	movups	[edi],xmm2
1594	xorps	xmm5,[48+esp]
1595	movups	[16+edi],xmm3
1596	xorps	xmm6,xmm1
1597	movups	[32+edi],xmm4
1598	movups	[48+edi],xmm5
1599	movups	[64+edi],xmm6
1600	lea	edi,[80+edi]
1601	jmp	NEAR L$067xts_dec_done
1602align	16
1603L$063xts_dec_one:
1604	movups	xmm2,[esi]
1605	lea	esi,[16+esi]
1606	xorps	xmm2,xmm5
1607	movups	xmm0,[edx]
1608	movups	xmm1,[16+edx]
1609	lea	edx,[32+edx]
1610	xorps	xmm2,xmm0
1611L$068dec1_loop_12:
1612db	102,15,56,222,209
1613	dec	ecx
1614	movups	xmm1,[edx]
1615	lea	edx,[16+edx]
1616	jnz	NEAR L$068dec1_loop_12
1617db	102,15,56,223,209
1618	xorps	xmm2,xmm5
1619	movups	[edi],xmm2
1620	lea	edi,[16+edi]
1621	movdqa	xmm1,xmm5
1622	jmp	NEAR L$067xts_dec_done
1623align	16
1624L$064xts_dec_two:
1625	movaps	xmm6,xmm1
1626	movups	xmm2,[esi]
1627	movups	xmm3,[16+esi]
1628	lea	esi,[32+esi]
1629	xorps	xmm2,xmm5
1630	xorps	xmm3,xmm6
1631	call	__aesni_decrypt2
1632	xorps	xmm2,xmm5
1633	xorps	xmm3,xmm6
1634	movups	[edi],xmm2
1635	movups	[16+edi],xmm3
1636	lea	edi,[32+edi]
1637	movdqa	xmm1,xmm6
1638	jmp	NEAR L$067xts_dec_done
1639align	16
1640L$065xts_dec_three:
1641	movaps	xmm7,xmm1
1642	movups	xmm2,[esi]
1643	movups	xmm3,[16+esi]
1644	movups	xmm4,[32+esi]
1645	lea	esi,[48+esi]
1646	xorps	xmm2,xmm5
1647	xorps	xmm3,xmm6
1648	xorps	xmm4,xmm7
1649	call	__aesni_decrypt3
1650	xorps	xmm2,xmm5
1651	xorps	xmm3,xmm6
1652	xorps	xmm4,xmm7
1653	movups	[edi],xmm2
1654	movups	[16+edi],xmm3
1655	movups	[32+edi],xmm4
1656	lea	edi,[48+edi]
1657	movdqa	xmm1,xmm7
1658	jmp	NEAR L$067xts_dec_done
1659align	16
1660L$066xts_dec_four:
1661	movaps	xmm6,xmm1
1662	movups	xmm2,[esi]
1663	movups	xmm3,[16+esi]
1664	movups	xmm4,[32+esi]
1665	xorps	xmm2,[esp]
1666	movups	xmm5,[48+esi]
1667	lea	esi,[64+esi]
1668	xorps	xmm3,[16+esp]
1669	xorps	xmm4,xmm7
1670	xorps	xmm5,xmm6
1671	call	__aesni_decrypt4
1672	xorps	xmm2,[esp]
1673	xorps	xmm3,[16+esp]
1674	xorps	xmm4,xmm7
1675	movups	[edi],xmm2
1676	xorps	xmm5,xmm6
1677	movups	[16+edi],xmm3
1678	movups	[32+edi],xmm4
1679	movups	[48+edi],xmm5
1680	lea	edi,[64+edi]
1681	movdqa	xmm1,xmm6
1682	jmp	NEAR L$067xts_dec_done
1683align	16
1684L$062xts_dec_done6x:
1685	mov	eax,DWORD [112+esp]
1686	and	eax,15
1687	jz	NEAR L$069xts_dec_ret
1688	mov	DWORD [112+esp],eax
1689	jmp	NEAR L$070xts_dec_only_one_more
1690align	16
1691L$067xts_dec_done:
1692	mov	eax,DWORD [112+esp]
1693	pxor	xmm0,xmm0
1694	and	eax,15
1695	jz	NEAR L$069xts_dec_ret
1696	pcmpgtd	xmm0,xmm1
1697	mov	DWORD [112+esp],eax
1698	pshufd	xmm2,xmm0,19
1699	pxor	xmm0,xmm0
1700	movdqa	xmm3,[96+esp]
1701	paddq	xmm1,xmm1
1702	pand	xmm2,xmm3
1703	pcmpgtd	xmm0,xmm1
1704	pxor	xmm1,xmm2
1705L$070xts_dec_only_one_more:
1706	pshufd	xmm5,xmm0,19
1707	movdqa	xmm6,xmm1
1708	paddq	xmm1,xmm1
1709	pand	xmm5,xmm3
1710	pxor	xmm5,xmm1
1711	mov	edx,ebp
1712	mov	ecx,ebx
1713	movups	xmm2,[esi]
1714	xorps	xmm2,xmm5
1715	movups	xmm0,[edx]
1716	movups	xmm1,[16+edx]
1717	lea	edx,[32+edx]
1718	xorps	xmm2,xmm0
1719L$071dec1_loop_13:
1720db	102,15,56,222,209
1721	dec	ecx
1722	movups	xmm1,[edx]
1723	lea	edx,[16+edx]
1724	jnz	NEAR L$071dec1_loop_13
1725db	102,15,56,223,209
1726	xorps	xmm2,xmm5
1727	movups	[edi],xmm2
1728L$072xts_dec_steal:
1729	movzx	ecx,BYTE [16+esi]
1730	movzx	edx,BYTE [edi]
1731	lea	esi,[1+esi]
1732	mov	BYTE [edi],cl
1733	mov	BYTE [16+edi],dl
1734	lea	edi,[1+edi]
1735	sub	eax,1
1736	jnz	NEAR L$072xts_dec_steal
1737	sub	edi,DWORD [112+esp]
1738	mov	edx,ebp
1739	mov	ecx,ebx
1740	movups	xmm2,[edi]
1741	xorps	xmm2,xmm6
1742	movups	xmm0,[edx]
1743	movups	xmm1,[16+edx]
1744	lea	edx,[32+edx]
1745	xorps	xmm2,xmm0
1746L$073dec1_loop_14:
1747db	102,15,56,222,209
1748	dec	ecx
1749	movups	xmm1,[edx]
1750	lea	edx,[16+edx]
1751	jnz	NEAR L$073dec1_loop_14
1752db	102,15,56,223,209
1753	xorps	xmm2,xmm6
1754	movups	[edi],xmm2
1755L$069xts_dec_ret:
1756	pxor	xmm0,xmm0
1757	pxor	xmm1,xmm1
1758	pxor	xmm2,xmm2
1759	movdqa	[esp],xmm0
1760	pxor	xmm3,xmm3
1761	movdqa	[16+esp],xmm0
1762	pxor	xmm4,xmm4
1763	movdqa	[32+esp],xmm0
1764	pxor	xmm5,xmm5
1765	movdqa	[48+esp],xmm0
1766	pxor	xmm6,xmm6
1767	movdqa	[64+esp],xmm0
1768	pxor	xmm7,xmm7
1769	movdqa	[80+esp],xmm0
1770	mov	esp,DWORD [116+esp]
1771	pop	edi
1772	pop	esi
1773	pop	ebx
1774	pop	ebp
1775	ret
1776global	_aesni_cbc_encrypt
1777align	16
1778_aesni_cbc_encrypt:
1779L$_aesni_cbc_encrypt_begin:
1780	push	ebp
1781	push	ebx
1782	push	esi
1783	push	edi
1784	mov	esi,DWORD [20+esp]
1785	mov	ebx,esp
1786	mov	edi,DWORD [24+esp]
1787	sub	ebx,24
1788	mov	eax,DWORD [28+esp]
1789	and	ebx,-16
1790	mov	edx,DWORD [32+esp]
1791	mov	ebp,DWORD [36+esp]
1792	test	eax,eax
1793	jz	NEAR L$074cbc_abort
1794	cmp	DWORD [40+esp],0
1795	xchg	ebx,esp
1796	movups	xmm7,[ebp]
1797	mov	ecx,DWORD [240+edx]
1798	mov	ebp,edx
1799	mov	DWORD [16+esp],ebx
1800	mov	ebx,ecx
1801	je	NEAR L$075cbc_decrypt
1802	movaps	xmm2,xmm7
1803	cmp	eax,16
1804	jb	NEAR L$076cbc_enc_tail
1805	sub	eax,16
1806	jmp	NEAR L$077cbc_enc_loop
1807align	16
1808L$077cbc_enc_loop:
1809	movups	xmm7,[esi]
1810	lea	esi,[16+esi]
1811	movups	xmm0,[edx]
1812	movups	xmm1,[16+edx]
1813	xorps	xmm7,xmm0
1814	lea	edx,[32+edx]
1815	xorps	xmm2,xmm7
1816L$078enc1_loop_15:
1817db	102,15,56,220,209
1818	dec	ecx
1819	movups	xmm1,[edx]
1820	lea	edx,[16+edx]
1821	jnz	NEAR L$078enc1_loop_15
1822db	102,15,56,221,209
1823	mov	ecx,ebx
1824	mov	edx,ebp
1825	movups	[edi],xmm2
1826	lea	edi,[16+edi]
1827	sub	eax,16
1828	jnc	NEAR L$077cbc_enc_loop
1829	add	eax,16
1830	jnz	NEAR L$076cbc_enc_tail
1831	movaps	xmm7,xmm2
1832	pxor	xmm2,xmm2
1833	jmp	NEAR L$079cbc_ret
1834L$076cbc_enc_tail:
1835	mov	ecx,eax
1836dd	2767451785
1837	mov	ecx,16
1838	sub	ecx,eax
1839	xor	eax,eax
1840dd	2868115081
1841	lea	edi,[edi-16]
1842	mov	ecx,ebx
1843	mov	esi,edi
1844	mov	edx,ebp
1845	jmp	NEAR L$077cbc_enc_loop
1846align	16
1847L$075cbc_decrypt:
1848	cmp	eax,80
1849	jbe	NEAR L$080cbc_dec_tail
1850	movaps	[esp],xmm7
1851	sub	eax,80
1852	jmp	NEAR L$081cbc_dec_loop6_enter
1853align	16
1854L$082cbc_dec_loop6:
1855	movaps	[esp],xmm0
1856	movups	[edi],xmm7
1857	lea	edi,[16+edi]
1858L$081cbc_dec_loop6_enter:
1859	movdqu	xmm2,[esi]
1860	movdqu	xmm3,[16+esi]
1861	movdqu	xmm4,[32+esi]
1862	movdqu	xmm5,[48+esi]
1863	movdqu	xmm6,[64+esi]
1864	movdqu	xmm7,[80+esi]
1865	call	__aesni_decrypt6
1866	movups	xmm1,[esi]
1867	movups	xmm0,[16+esi]
1868	xorps	xmm2,[esp]
1869	xorps	xmm3,xmm1
1870	movups	xmm1,[32+esi]
1871	xorps	xmm4,xmm0
1872	movups	xmm0,[48+esi]
1873	xorps	xmm5,xmm1
1874	movups	xmm1,[64+esi]
1875	xorps	xmm6,xmm0
1876	movups	xmm0,[80+esi]
1877	xorps	xmm7,xmm1
1878	movups	[edi],xmm2
1879	movups	[16+edi],xmm3
1880	lea	esi,[96+esi]
1881	movups	[32+edi],xmm4
1882	mov	ecx,ebx
1883	movups	[48+edi],xmm5
1884	mov	edx,ebp
1885	movups	[64+edi],xmm6
1886	lea	edi,[80+edi]
1887	sub	eax,96
1888	ja	NEAR L$082cbc_dec_loop6
1889	movaps	xmm2,xmm7
1890	movaps	xmm7,xmm0
1891	add	eax,80
1892	jle	NEAR L$083cbc_dec_clear_tail_collected
1893	movups	[edi],xmm2
1894	lea	edi,[16+edi]
1895L$080cbc_dec_tail:
1896	movups	xmm2,[esi]
1897	movaps	xmm6,xmm2
1898	cmp	eax,16
1899	jbe	NEAR L$084cbc_dec_one
1900	movups	xmm3,[16+esi]
1901	movaps	xmm5,xmm3
1902	cmp	eax,32
1903	jbe	NEAR L$085cbc_dec_two
1904	movups	xmm4,[32+esi]
1905	cmp	eax,48
1906	jbe	NEAR L$086cbc_dec_three
1907	movups	xmm5,[48+esi]
1908	cmp	eax,64
1909	jbe	NEAR L$087cbc_dec_four
1910	movups	xmm6,[64+esi]
1911	movaps	[esp],xmm7
1912	movups	xmm2,[esi]
1913	xorps	xmm7,xmm7
1914	call	__aesni_decrypt6
1915	movups	xmm1,[esi]
1916	movups	xmm0,[16+esi]
1917	xorps	xmm2,[esp]
1918	xorps	xmm3,xmm1
1919	movups	xmm1,[32+esi]
1920	xorps	xmm4,xmm0
1921	movups	xmm0,[48+esi]
1922	xorps	xmm5,xmm1
1923	movups	xmm7,[64+esi]
1924	xorps	xmm6,xmm0
1925	movups	[edi],xmm2
1926	movups	[16+edi],xmm3
1927	pxor	xmm3,xmm3
1928	movups	[32+edi],xmm4
1929	pxor	xmm4,xmm4
1930	movups	[48+edi],xmm5
1931	pxor	xmm5,xmm5
1932	lea	edi,[64+edi]
1933	movaps	xmm2,xmm6
1934	pxor	xmm6,xmm6
1935	sub	eax,80
1936	jmp	NEAR L$088cbc_dec_tail_collected
1937align	16
1938L$084cbc_dec_one:
1939	movups	xmm0,[edx]
1940	movups	xmm1,[16+edx]
1941	lea	edx,[32+edx]
1942	xorps	xmm2,xmm0
1943L$089dec1_loop_16:
1944db	102,15,56,222,209
1945	dec	ecx
1946	movups	xmm1,[edx]
1947	lea	edx,[16+edx]
1948	jnz	NEAR L$089dec1_loop_16
1949db	102,15,56,223,209
1950	xorps	xmm2,xmm7
1951	movaps	xmm7,xmm6
1952	sub	eax,16
1953	jmp	NEAR L$088cbc_dec_tail_collected
1954align	16
1955L$085cbc_dec_two:
1956	call	__aesni_decrypt2
1957	xorps	xmm2,xmm7
1958	xorps	xmm3,xmm6
1959	movups	[edi],xmm2
1960	movaps	xmm2,xmm3
1961	pxor	xmm3,xmm3
1962	lea	edi,[16+edi]
1963	movaps	xmm7,xmm5
1964	sub	eax,32
1965	jmp	NEAR L$088cbc_dec_tail_collected
1966align	16
1967L$086cbc_dec_three:
1968	call	__aesni_decrypt3
1969	xorps	xmm2,xmm7
1970	xorps	xmm3,xmm6
1971	xorps	xmm4,xmm5
1972	movups	[edi],xmm2
1973	movaps	xmm2,xmm4
1974	pxor	xmm4,xmm4
1975	movups	[16+edi],xmm3
1976	pxor	xmm3,xmm3
1977	lea	edi,[32+edi]
1978	movups	xmm7,[32+esi]
1979	sub	eax,48
1980	jmp	NEAR L$088cbc_dec_tail_collected
1981align	16
1982L$087cbc_dec_four:
1983	call	__aesni_decrypt4
1984	movups	xmm1,[16+esi]
1985	movups	xmm0,[32+esi]
1986	xorps	xmm2,xmm7
1987	movups	xmm7,[48+esi]
1988	xorps	xmm3,xmm6
1989	movups	[edi],xmm2
1990	xorps	xmm4,xmm1
1991	movups	[16+edi],xmm3
1992	pxor	xmm3,xmm3
1993	xorps	xmm5,xmm0
1994	movups	[32+edi],xmm4
1995	pxor	xmm4,xmm4
1996	lea	edi,[48+edi]
1997	movaps	xmm2,xmm5
1998	pxor	xmm5,xmm5
1999	sub	eax,64
2000	jmp	NEAR L$088cbc_dec_tail_collected
2001align	16
2002L$083cbc_dec_clear_tail_collected:
2003	pxor	xmm3,xmm3
2004	pxor	xmm4,xmm4
2005	pxor	xmm5,xmm5
2006	pxor	xmm6,xmm6
2007L$088cbc_dec_tail_collected:
2008	and	eax,15
2009	jnz	NEAR L$090cbc_dec_tail_partial
2010	movups	[edi],xmm2
2011	pxor	xmm0,xmm0
2012	jmp	NEAR L$079cbc_ret
2013align	16
2014L$090cbc_dec_tail_partial:
2015	movaps	[esp],xmm2
2016	pxor	xmm0,xmm0
2017	mov	ecx,16
2018	mov	esi,esp
2019	sub	ecx,eax
2020dd	2767451785
2021	movdqa	[esp],xmm2
2022L$079cbc_ret:
2023	mov	esp,DWORD [16+esp]
2024	mov	ebp,DWORD [36+esp]
2025	pxor	xmm2,xmm2
2026	pxor	xmm1,xmm1
2027	movups	[ebp],xmm7
2028	pxor	xmm7,xmm7
2029L$074cbc_abort:
2030	pop	edi
2031	pop	esi
2032	pop	ebx
2033	pop	ebp
2034	ret
2035align	16
2036__aesni_set_encrypt_key:
2037	push	ebp
2038	push	ebx
2039	test	eax,eax
2040	jz	NEAR L$091bad_pointer
2041	test	edx,edx
2042	jz	NEAR L$091bad_pointer
2043	call	L$092pic
2044L$092pic:
2045	pop	ebx
2046	lea	ebx,[(L$key_const-L$092pic)+ebx]
2047	lea	ebp,[_OPENSSL_ia32cap_P]
2048	movups	xmm0,[eax]
2049	xorps	xmm4,xmm4
2050	mov	ebp,DWORD [4+ebp]
2051	lea	edx,[16+edx]
2052	and	ebp,268437504
2053	cmp	ecx,256
2054	je	NEAR L$09314rounds
2055	cmp	ecx,192
2056	je	NEAR L$09412rounds
2057	cmp	ecx,128
2058	jne	NEAR L$095bad_keybits
2059align	16
2060L$09610rounds:
2061	cmp	ebp,268435456
2062	je	NEAR L$09710rounds_alt
2063	mov	ecx,9
2064	movups	[edx-16],xmm0
2065db	102,15,58,223,200,1
2066	call	L$098key_128_cold
2067db	102,15,58,223,200,2
2068	call	L$099key_128
2069db	102,15,58,223,200,4
2070	call	L$099key_128
2071db	102,15,58,223,200,8
2072	call	L$099key_128
2073db	102,15,58,223,200,16
2074	call	L$099key_128
2075db	102,15,58,223,200,32
2076	call	L$099key_128
2077db	102,15,58,223,200,64
2078	call	L$099key_128
2079db	102,15,58,223,200,128
2080	call	L$099key_128
2081db	102,15,58,223,200,27
2082	call	L$099key_128
2083db	102,15,58,223,200,54
2084	call	L$099key_128
2085	movups	[edx],xmm0
2086	mov	DWORD [80+edx],ecx
2087	jmp	NEAR L$100good_key
2088align	16
2089L$099key_128:
2090	movups	[edx],xmm0
2091	lea	edx,[16+edx]
2092L$098key_128_cold:
2093	shufps	xmm4,xmm0,16
2094	xorps	xmm0,xmm4
2095	shufps	xmm4,xmm0,140
2096	xorps	xmm0,xmm4
2097	shufps	xmm1,xmm1,255
2098	xorps	xmm0,xmm1
2099	ret
2100align	16
2101L$09710rounds_alt:
2102	movdqa	xmm5,[ebx]
2103	mov	ecx,8
2104	movdqa	xmm4,[32+ebx]
2105	movdqa	xmm2,xmm0
2106	movdqu	[edx-16],xmm0
2107L$101loop_key128:
2108db	102,15,56,0,197
2109db	102,15,56,221,196
2110	pslld	xmm4,1
2111	lea	edx,[16+edx]
2112	movdqa	xmm3,xmm2
2113	pslldq	xmm2,4
2114	pxor	xmm3,xmm2
2115	pslldq	xmm2,4
2116	pxor	xmm3,xmm2
2117	pslldq	xmm2,4
2118	pxor	xmm2,xmm3
2119	pxor	xmm0,xmm2
2120	movdqu	[edx-16],xmm0
2121	movdqa	xmm2,xmm0
2122	dec	ecx
2123	jnz	NEAR L$101loop_key128
2124	movdqa	xmm4,[48+ebx]
2125db	102,15,56,0,197
2126db	102,15,56,221,196
2127	pslld	xmm4,1
2128	movdqa	xmm3,xmm2
2129	pslldq	xmm2,4
2130	pxor	xmm3,xmm2
2131	pslldq	xmm2,4
2132	pxor	xmm3,xmm2
2133	pslldq	xmm2,4
2134	pxor	xmm2,xmm3
2135	pxor	xmm0,xmm2
2136	movdqu	[edx],xmm0
2137	movdqa	xmm2,xmm0
2138db	102,15,56,0,197
2139db	102,15,56,221,196
2140	movdqa	xmm3,xmm2
2141	pslldq	xmm2,4
2142	pxor	xmm3,xmm2
2143	pslldq	xmm2,4
2144	pxor	xmm3,xmm2
2145	pslldq	xmm2,4
2146	pxor	xmm2,xmm3
2147	pxor	xmm0,xmm2
2148	movdqu	[16+edx],xmm0
2149	mov	ecx,9
2150	mov	DWORD [96+edx],ecx
2151	jmp	NEAR L$100good_key
2152align	16
2153L$09412rounds:
2154	movq	xmm2,[16+eax]
2155	cmp	ebp,268435456
2156	je	NEAR L$10212rounds_alt
2157	mov	ecx,11
2158	movups	[edx-16],xmm0
2159db	102,15,58,223,202,1
2160	call	L$103key_192a_cold
2161db	102,15,58,223,202,2
2162	call	L$104key_192b
2163db	102,15,58,223,202,4
2164	call	L$105key_192a
2165db	102,15,58,223,202,8
2166	call	L$104key_192b
2167db	102,15,58,223,202,16
2168	call	L$105key_192a
2169db	102,15,58,223,202,32
2170	call	L$104key_192b
2171db	102,15,58,223,202,64
2172	call	L$105key_192a
2173db	102,15,58,223,202,128
2174	call	L$104key_192b
2175	movups	[edx],xmm0
2176	mov	DWORD [48+edx],ecx
2177	jmp	NEAR L$100good_key
2178align	16
2179L$105key_192a:
2180	movups	[edx],xmm0
2181	lea	edx,[16+edx]
2182align	16
2183L$103key_192a_cold:
2184	movaps	xmm5,xmm2
2185L$106key_192b_warm:
2186	shufps	xmm4,xmm0,16
2187	movdqa	xmm3,xmm2
2188	xorps	xmm0,xmm4
2189	shufps	xmm4,xmm0,140
2190	pslldq	xmm3,4
2191	xorps	xmm0,xmm4
2192	pshufd	xmm1,xmm1,85
2193	pxor	xmm2,xmm3
2194	pxor	xmm0,xmm1
2195	pshufd	xmm3,xmm0,255
2196	pxor	xmm2,xmm3
2197	ret
2198align	16
2199L$104key_192b:
2200	movaps	xmm3,xmm0
2201	shufps	xmm5,xmm0,68
2202	movups	[edx],xmm5
2203	shufps	xmm3,xmm2,78
2204	movups	[16+edx],xmm3
2205	lea	edx,[32+edx]
2206	jmp	NEAR L$106key_192b_warm
2207align	16
2208L$10212rounds_alt:
2209	movdqa	xmm5,[16+ebx]
2210	movdqa	xmm4,[32+ebx]
2211	mov	ecx,8
2212	movdqu	[edx-16],xmm0
2213L$107loop_key192:
2214	movq	[edx],xmm2
2215	movdqa	xmm1,xmm2
2216db	102,15,56,0,213
2217db	102,15,56,221,212
2218	pslld	xmm4,1
2219	lea	edx,[24+edx]
2220	movdqa	xmm3,xmm0
2221	pslldq	xmm0,4
2222	pxor	xmm3,xmm0
2223	pslldq	xmm0,4
2224	pxor	xmm3,xmm0
2225	pslldq	xmm0,4
2226	pxor	xmm0,xmm3
2227	pshufd	xmm3,xmm0,255
2228	pxor	xmm3,xmm1
2229	pslldq	xmm1,4
2230	pxor	xmm3,xmm1
2231	pxor	xmm0,xmm2
2232	pxor	xmm2,xmm3
2233	movdqu	[edx-16],xmm0
2234	dec	ecx
2235	jnz	NEAR L$107loop_key192
2236	mov	ecx,11
2237	mov	DWORD [32+edx],ecx
2238	jmp	NEAR L$100good_key
2239align	16
2240L$09314rounds:
2241	movups	xmm2,[16+eax]
2242	lea	edx,[16+edx]
2243	cmp	ebp,268435456
2244	je	NEAR L$10814rounds_alt
2245	mov	ecx,13
2246	movups	[edx-32],xmm0
2247	movups	[edx-16],xmm2
2248db	102,15,58,223,202,1
2249	call	L$109key_256a_cold
2250db	102,15,58,223,200,1
2251	call	L$110key_256b
2252db	102,15,58,223,202,2
2253	call	L$111key_256a
2254db	102,15,58,223,200,2
2255	call	L$110key_256b
2256db	102,15,58,223,202,4
2257	call	L$111key_256a
2258db	102,15,58,223,200,4
2259	call	L$110key_256b
2260db	102,15,58,223,202,8
2261	call	L$111key_256a
2262db	102,15,58,223,200,8
2263	call	L$110key_256b
2264db	102,15,58,223,202,16
2265	call	L$111key_256a
2266db	102,15,58,223,200,16
2267	call	L$110key_256b
2268db	102,15,58,223,202,32
2269	call	L$111key_256a
2270db	102,15,58,223,200,32
2271	call	L$110key_256b
2272db	102,15,58,223,202,64
2273	call	L$111key_256a
2274	movups	[edx],xmm0
2275	mov	DWORD [16+edx],ecx
2276	xor	eax,eax
2277	jmp	NEAR L$100good_key
2278align	16
2279L$111key_256a:
2280	movups	[edx],xmm2
2281	lea	edx,[16+edx]
2282L$109key_256a_cold:
2283	shufps	xmm4,xmm0,16
2284	xorps	xmm0,xmm4
2285	shufps	xmm4,xmm0,140
2286	xorps	xmm0,xmm4
2287	shufps	xmm1,xmm1,255
2288	xorps	xmm0,xmm1
2289	ret
2290align	16
2291L$110key_256b:
2292	movups	[edx],xmm0
2293	lea	edx,[16+edx]
2294	shufps	xmm4,xmm2,16
2295	xorps	xmm2,xmm4
2296	shufps	xmm4,xmm2,140
2297	xorps	xmm2,xmm4
2298	shufps	xmm1,xmm1,170
2299	xorps	xmm2,xmm1
2300	ret
2301align	16
2302L$10814rounds_alt:
2303	movdqa	xmm5,[ebx]
2304	movdqa	xmm4,[32+ebx]
2305	mov	ecx,7
2306	movdqu	[edx-32],xmm0
2307	movdqa	xmm1,xmm2
2308	movdqu	[edx-16],xmm2
2309L$112loop_key256:
2310db	102,15,56,0,213
2311db	102,15,56,221,212
2312	movdqa	xmm3,xmm0
2313	pslldq	xmm0,4
2314	pxor	xmm3,xmm0
2315	pslldq	xmm0,4
2316	pxor	xmm3,xmm0
2317	pslldq	xmm0,4
2318	pxor	xmm0,xmm3
2319	pslld	xmm4,1
2320	pxor	xmm0,xmm2
2321	movdqu	[edx],xmm0
2322	dec	ecx
2323	jz	NEAR L$113done_key256
2324	pshufd	xmm2,xmm0,255
2325	pxor	xmm3,xmm3
2326db	102,15,56,221,211
2327	movdqa	xmm3,xmm1
2328	pslldq	xmm1,4
2329	pxor	xmm3,xmm1
2330	pslldq	xmm1,4
2331	pxor	xmm3,xmm1
2332	pslldq	xmm1,4
2333	pxor	xmm1,xmm3
2334	pxor	xmm2,xmm1
2335	movdqu	[16+edx],xmm2
2336	lea	edx,[32+edx]
2337	movdqa	xmm1,xmm2
2338	jmp	NEAR L$112loop_key256
2339L$113done_key256:
2340	mov	ecx,13
2341	mov	DWORD [16+edx],ecx
2342L$100good_key:
2343	pxor	xmm0,xmm0
2344	pxor	xmm1,xmm1
2345	pxor	xmm2,xmm2
2346	pxor	xmm3,xmm3
2347	pxor	xmm4,xmm4
2348	pxor	xmm5,xmm5
2349	xor	eax,eax
2350	pop	ebx
2351	pop	ebp
2352	ret
2353align	4
2354L$091bad_pointer:
2355	mov	eax,-1
2356	pop	ebx
2357	pop	ebp
2358	ret
2359align	4
2360L$095bad_keybits:
2361	pxor	xmm0,xmm0
2362	mov	eax,-2
2363	pop	ebx
2364	pop	ebp
2365	ret
2366global	_aesni_set_encrypt_key
2367align	16
2368_aesni_set_encrypt_key:
2369L$_aesni_set_encrypt_key_begin:
2370	mov	eax,DWORD [4+esp]
2371	mov	ecx,DWORD [8+esp]
2372	mov	edx,DWORD [12+esp]
2373	call	__aesni_set_encrypt_key
2374	ret
2375global	_aesni_set_decrypt_key
2376align	16
2377_aesni_set_decrypt_key:
2378L$_aesni_set_decrypt_key_begin:
2379	mov	eax,DWORD [4+esp]
2380	mov	ecx,DWORD [8+esp]
2381	mov	edx,DWORD [12+esp]
2382	call	__aesni_set_encrypt_key
2383	mov	edx,DWORD [12+esp]
2384	shl	ecx,4
2385	test	eax,eax
2386	jnz	NEAR L$114dec_key_ret
2387	lea	eax,[16+ecx*1+edx]
2388	movups	xmm0,[edx]
2389	movups	xmm1,[eax]
2390	movups	[eax],xmm0
2391	movups	[edx],xmm1
2392	lea	edx,[16+edx]
2393	lea	eax,[eax-16]
2394L$115dec_key_inverse:
2395	movups	xmm0,[edx]
2396	movups	xmm1,[eax]
2397db	102,15,56,219,192
2398db	102,15,56,219,201
2399	lea	edx,[16+edx]
2400	lea	eax,[eax-16]
2401	movups	[16+eax],xmm0
2402	movups	[edx-16],xmm1
2403	cmp	eax,edx
2404	ja	NEAR L$115dec_key_inverse
2405	movups	xmm0,[edx]
2406db	102,15,56,219,192
2407	movups	[edx],xmm0
2408	pxor	xmm0,xmm0
2409	pxor	xmm1,xmm1
2410	xor	eax,eax
2411L$114dec_key_ret:
2412	ret
2413align	64
2414L$key_const:
2415dd	202313229,202313229,202313229,202313229
2416dd	67569157,67569157,67569157,67569157
2417dd	1,1,1,1
2418dd	27,27,27,27
2419db	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
2420db	83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
2421db	32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
2422db	115,108,46,111,114,103,62,0
2423segment	.bss
2424common	_OPENSSL_ia32cap_P 16
2425