1default	rel
2%define XMMWORD
3%define YMMWORD
4%define ZMMWORD
5section	.text code align=64
6
7EXTERN	OPENSSL_ia32cap_P
8global	aesni_encrypt
9
10ALIGN	16
11aesni_encrypt:
12	movups	xmm2,XMMWORD[rcx]
13	mov	eax,DWORD[240+r8]
14	movups	xmm0,XMMWORD[r8]
15	movups	xmm1,XMMWORD[16+r8]
16	lea	r8,[32+r8]
17	xorps	xmm2,xmm0
18$L$oop_enc1_1:
19DB	102,15,56,220,209
20	dec	eax
21	movups	xmm1,XMMWORD[r8]
22	lea	r8,[16+r8]
23	jnz	NEAR $L$oop_enc1_1
24DB	102,15,56,221,209
25	pxor	xmm0,xmm0
26	pxor	xmm1,xmm1
27	movups	XMMWORD[rdx],xmm2
28	pxor	xmm2,xmm2
29	DB	0F3h,0C3h		;repret
30
31
32global	aesni_decrypt
33
34ALIGN	16
35aesni_decrypt:
36	movups	xmm2,XMMWORD[rcx]
37	mov	eax,DWORD[240+r8]
38	movups	xmm0,XMMWORD[r8]
39	movups	xmm1,XMMWORD[16+r8]
40	lea	r8,[32+r8]
41	xorps	xmm2,xmm0
42$L$oop_dec1_2:
43DB	102,15,56,222,209
44	dec	eax
45	movups	xmm1,XMMWORD[r8]
46	lea	r8,[16+r8]
47	jnz	NEAR $L$oop_dec1_2
48DB	102,15,56,223,209
49	pxor	xmm0,xmm0
50	pxor	xmm1,xmm1
51	movups	XMMWORD[rdx],xmm2
52	pxor	xmm2,xmm2
53	DB	0F3h,0C3h		;repret
54
55
56ALIGN	16
57_aesni_encrypt2:
58	movups	xmm0,XMMWORD[rcx]
59	shl	eax,4
60	movups	xmm1,XMMWORD[16+rcx]
61	xorps	xmm2,xmm0
62	xorps	xmm3,xmm0
63	movups	xmm0,XMMWORD[32+rcx]
64	lea	rcx,[32+rax*1+rcx]
65	neg	rax
66	add	rax,16
67
68$L$enc_loop2:
69DB	102,15,56,220,209
70DB	102,15,56,220,217
71	movups	xmm1,XMMWORD[rax*1+rcx]
72	add	rax,32
73DB	102,15,56,220,208
74DB	102,15,56,220,216
75	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
76	jnz	NEAR $L$enc_loop2
77
78DB	102,15,56,220,209
79DB	102,15,56,220,217
80DB	102,15,56,221,208
81DB	102,15,56,221,216
82	DB	0F3h,0C3h		;repret
83
84
85ALIGN	16
86_aesni_decrypt2:
87	movups	xmm0,XMMWORD[rcx]
88	shl	eax,4
89	movups	xmm1,XMMWORD[16+rcx]
90	xorps	xmm2,xmm0
91	xorps	xmm3,xmm0
92	movups	xmm0,XMMWORD[32+rcx]
93	lea	rcx,[32+rax*1+rcx]
94	neg	rax
95	add	rax,16
96
97$L$dec_loop2:
98DB	102,15,56,222,209
99DB	102,15,56,222,217
100	movups	xmm1,XMMWORD[rax*1+rcx]
101	add	rax,32
102DB	102,15,56,222,208
103DB	102,15,56,222,216
104	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
105	jnz	NEAR $L$dec_loop2
106
107DB	102,15,56,222,209
108DB	102,15,56,222,217
109DB	102,15,56,223,208
110DB	102,15,56,223,216
111	DB	0F3h,0C3h		;repret
112
113
114ALIGN	16
115_aesni_encrypt3:
116	movups	xmm0,XMMWORD[rcx]
117	shl	eax,4
118	movups	xmm1,XMMWORD[16+rcx]
119	xorps	xmm2,xmm0
120	xorps	xmm3,xmm0
121	xorps	xmm4,xmm0
122	movups	xmm0,XMMWORD[32+rcx]
123	lea	rcx,[32+rax*1+rcx]
124	neg	rax
125	add	rax,16
126
127$L$enc_loop3:
128DB	102,15,56,220,209
129DB	102,15,56,220,217
130DB	102,15,56,220,225
131	movups	xmm1,XMMWORD[rax*1+rcx]
132	add	rax,32
133DB	102,15,56,220,208
134DB	102,15,56,220,216
135DB	102,15,56,220,224
136	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
137	jnz	NEAR $L$enc_loop3
138
139DB	102,15,56,220,209
140DB	102,15,56,220,217
141DB	102,15,56,220,225
142DB	102,15,56,221,208
143DB	102,15,56,221,216
144DB	102,15,56,221,224
145	DB	0F3h,0C3h		;repret
146
147
148ALIGN	16
149_aesni_decrypt3:
150	movups	xmm0,XMMWORD[rcx]
151	shl	eax,4
152	movups	xmm1,XMMWORD[16+rcx]
153	xorps	xmm2,xmm0
154	xorps	xmm3,xmm0
155	xorps	xmm4,xmm0
156	movups	xmm0,XMMWORD[32+rcx]
157	lea	rcx,[32+rax*1+rcx]
158	neg	rax
159	add	rax,16
160
161$L$dec_loop3:
162DB	102,15,56,222,209
163DB	102,15,56,222,217
164DB	102,15,56,222,225
165	movups	xmm1,XMMWORD[rax*1+rcx]
166	add	rax,32
167DB	102,15,56,222,208
168DB	102,15,56,222,216
169DB	102,15,56,222,224
170	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
171	jnz	NEAR $L$dec_loop3
172
173DB	102,15,56,222,209
174DB	102,15,56,222,217
175DB	102,15,56,222,225
176DB	102,15,56,223,208
177DB	102,15,56,223,216
178DB	102,15,56,223,224
179	DB	0F3h,0C3h		;repret
180
181
182ALIGN	16
183_aesni_encrypt4:
184	movups	xmm0,XMMWORD[rcx]
185	shl	eax,4
186	movups	xmm1,XMMWORD[16+rcx]
187	xorps	xmm2,xmm0
188	xorps	xmm3,xmm0
189	xorps	xmm4,xmm0
190	xorps	xmm5,xmm0
191	movups	xmm0,XMMWORD[32+rcx]
192	lea	rcx,[32+rax*1+rcx]
193	neg	rax
194DB	0x0f,0x1f,0x00
195	add	rax,16
196
197$L$enc_loop4:
198DB	102,15,56,220,209
199DB	102,15,56,220,217
200DB	102,15,56,220,225
201DB	102,15,56,220,233
202	movups	xmm1,XMMWORD[rax*1+rcx]
203	add	rax,32
204DB	102,15,56,220,208
205DB	102,15,56,220,216
206DB	102,15,56,220,224
207DB	102,15,56,220,232
208	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
209	jnz	NEAR $L$enc_loop4
210
211DB	102,15,56,220,209
212DB	102,15,56,220,217
213DB	102,15,56,220,225
214DB	102,15,56,220,233
215DB	102,15,56,221,208
216DB	102,15,56,221,216
217DB	102,15,56,221,224
218DB	102,15,56,221,232
219	DB	0F3h,0C3h		;repret
220
221
222ALIGN	16
223_aesni_decrypt4:
224	movups	xmm0,XMMWORD[rcx]
225	shl	eax,4
226	movups	xmm1,XMMWORD[16+rcx]
227	xorps	xmm2,xmm0
228	xorps	xmm3,xmm0
229	xorps	xmm4,xmm0
230	xorps	xmm5,xmm0
231	movups	xmm0,XMMWORD[32+rcx]
232	lea	rcx,[32+rax*1+rcx]
233	neg	rax
234DB	0x0f,0x1f,0x00
235	add	rax,16
236
237$L$dec_loop4:
238DB	102,15,56,222,209
239DB	102,15,56,222,217
240DB	102,15,56,222,225
241DB	102,15,56,222,233
242	movups	xmm1,XMMWORD[rax*1+rcx]
243	add	rax,32
244DB	102,15,56,222,208
245DB	102,15,56,222,216
246DB	102,15,56,222,224
247DB	102,15,56,222,232
248	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
249	jnz	NEAR $L$dec_loop4
250
251DB	102,15,56,222,209
252DB	102,15,56,222,217
253DB	102,15,56,222,225
254DB	102,15,56,222,233
255DB	102,15,56,223,208
256DB	102,15,56,223,216
257DB	102,15,56,223,224
258DB	102,15,56,223,232
259	DB	0F3h,0C3h		;repret
260
261
262ALIGN	16
263_aesni_encrypt6:
264	movups	xmm0,XMMWORD[rcx]
265	shl	eax,4
266	movups	xmm1,XMMWORD[16+rcx]
267	xorps	xmm2,xmm0
268	pxor	xmm3,xmm0
269	pxor	xmm4,xmm0
270DB	102,15,56,220,209
271	lea	rcx,[32+rax*1+rcx]
272	neg	rax
273DB	102,15,56,220,217
274	pxor	xmm5,xmm0
275	pxor	xmm6,xmm0
276DB	102,15,56,220,225
277	pxor	xmm7,xmm0
278	movups	xmm0,XMMWORD[rax*1+rcx]
279	add	rax,16
280	jmp	NEAR $L$enc_loop6_enter
281ALIGN	16
282$L$enc_loop6:
283DB	102,15,56,220,209
284DB	102,15,56,220,217
285DB	102,15,56,220,225
286$L$enc_loop6_enter:
287DB	102,15,56,220,233
288DB	102,15,56,220,241
289DB	102,15,56,220,249
290	movups	xmm1,XMMWORD[rax*1+rcx]
291	add	rax,32
292DB	102,15,56,220,208
293DB	102,15,56,220,216
294DB	102,15,56,220,224
295DB	102,15,56,220,232
296DB	102,15,56,220,240
297DB	102,15,56,220,248
298	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
299	jnz	NEAR $L$enc_loop6
300
301DB	102,15,56,220,209
302DB	102,15,56,220,217
303DB	102,15,56,220,225
304DB	102,15,56,220,233
305DB	102,15,56,220,241
306DB	102,15,56,220,249
307DB	102,15,56,221,208
308DB	102,15,56,221,216
309DB	102,15,56,221,224
310DB	102,15,56,221,232
311DB	102,15,56,221,240
312DB	102,15,56,221,248
313	DB	0F3h,0C3h		;repret
314
315
316ALIGN	16
317_aesni_decrypt6:
318	movups	xmm0,XMMWORD[rcx]
319	shl	eax,4
320	movups	xmm1,XMMWORD[16+rcx]
321	xorps	xmm2,xmm0
322	pxor	xmm3,xmm0
323	pxor	xmm4,xmm0
324DB	102,15,56,222,209
325	lea	rcx,[32+rax*1+rcx]
326	neg	rax
327DB	102,15,56,222,217
328	pxor	xmm5,xmm0
329	pxor	xmm6,xmm0
330DB	102,15,56,222,225
331	pxor	xmm7,xmm0
332	movups	xmm0,XMMWORD[rax*1+rcx]
333	add	rax,16
334	jmp	NEAR $L$dec_loop6_enter
335ALIGN	16
336$L$dec_loop6:
337DB	102,15,56,222,209
338DB	102,15,56,222,217
339DB	102,15,56,222,225
340$L$dec_loop6_enter:
341DB	102,15,56,222,233
342DB	102,15,56,222,241
343DB	102,15,56,222,249
344	movups	xmm1,XMMWORD[rax*1+rcx]
345	add	rax,32
346DB	102,15,56,222,208
347DB	102,15,56,222,216
348DB	102,15,56,222,224
349DB	102,15,56,222,232
350DB	102,15,56,222,240
351DB	102,15,56,222,248
352	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
353	jnz	NEAR $L$dec_loop6
354
355DB	102,15,56,222,209
356DB	102,15,56,222,217
357DB	102,15,56,222,225
358DB	102,15,56,222,233
359DB	102,15,56,222,241
360DB	102,15,56,222,249
361DB	102,15,56,223,208
362DB	102,15,56,223,216
363DB	102,15,56,223,224
364DB	102,15,56,223,232
365DB	102,15,56,223,240
366DB	102,15,56,223,248
367	DB	0F3h,0C3h		;repret
368
369
370ALIGN	16
371_aesni_encrypt8:
372	movups	xmm0,XMMWORD[rcx]
373	shl	eax,4
374	movups	xmm1,XMMWORD[16+rcx]
375	xorps	xmm2,xmm0
376	xorps	xmm3,xmm0
377	pxor	xmm4,xmm0
378	pxor	xmm5,xmm0
379	pxor	xmm6,xmm0
380	lea	rcx,[32+rax*1+rcx]
381	neg	rax
382DB	102,15,56,220,209
383	pxor	xmm7,xmm0
384	pxor	xmm8,xmm0
385DB	102,15,56,220,217
386	pxor	xmm9,xmm0
387	movups	xmm0,XMMWORD[rax*1+rcx]
388	add	rax,16
389	jmp	NEAR $L$enc_loop8_inner
390ALIGN	16
391$L$enc_loop8:
392DB	102,15,56,220,209
393DB	102,15,56,220,217
394$L$enc_loop8_inner:
395DB	102,15,56,220,225
396DB	102,15,56,220,233
397DB	102,15,56,220,241
398DB	102,15,56,220,249
399DB	102,68,15,56,220,193
400DB	102,68,15,56,220,201
401$L$enc_loop8_enter:
402	movups	xmm1,XMMWORD[rax*1+rcx]
403	add	rax,32
404DB	102,15,56,220,208
405DB	102,15,56,220,216
406DB	102,15,56,220,224
407DB	102,15,56,220,232
408DB	102,15,56,220,240
409DB	102,15,56,220,248
410DB	102,68,15,56,220,192
411DB	102,68,15,56,220,200
412	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
413	jnz	NEAR $L$enc_loop8
414
415DB	102,15,56,220,209
416DB	102,15,56,220,217
417DB	102,15,56,220,225
418DB	102,15,56,220,233
419DB	102,15,56,220,241
420DB	102,15,56,220,249
421DB	102,68,15,56,220,193
422DB	102,68,15,56,220,201
423DB	102,15,56,221,208
424DB	102,15,56,221,216
425DB	102,15,56,221,224
426DB	102,15,56,221,232
427DB	102,15,56,221,240
428DB	102,15,56,221,248
429DB	102,68,15,56,221,192
430DB	102,68,15,56,221,200
431	DB	0F3h,0C3h		;repret
432
433
434ALIGN	16
435_aesni_decrypt8:
436	movups	xmm0,XMMWORD[rcx]
437	shl	eax,4
438	movups	xmm1,XMMWORD[16+rcx]
439	xorps	xmm2,xmm0
440	xorps	xmm3,xmm0
441	pxor	xmm4,xmm0
442	pxor	xmm5,xmm0
443	pxor	xmm6,xmm0
444	lea	rcx,[32+rax*1+rcx]
445	neg	rax
446DB	102,15,56,222,209
447	pxor	xmm7,xmm0
448	pxor	xmm8,xmm0
449DB	102,15,56,222,217
450	pxor	xmm9,xmm0
451	movups	xmm0,XMMWORD[rax*1+rcx]
452	add	rax,16
453	jmp	NEAR $L$dec_loop8_inner
454ALIGN	16
455$L$dec_loop8:
456DB	102,15,56,222,209
457DB	102,15,56,222,217
458$L$dec_loop8_inner:
459DB	102,15,56,222,225
460DB	102,15,56,222,233
461DB	102,15,56,222,241
462DB	102,15,56,222,249
463DB	102,68,15,56,222,193
464DB	102,68,15,56,222,201
465$L$dec_loop8_enter:
466	movups	xmm1,XMMWORD[rax*1+rcx]
467	add	rax,32
468DB	102,15,56,222,208
469DB	102,15,56,222,216
470DB	102,15,56,222,224
471DB	102,15,56,222,232
472DB	102,15,56,222,240
473DB	102,15,56,222,248
474DB	102,68,15,56,222,192
475DB	102,68,15,56,222,200
476	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
477	jnz	NEAR $L$dec_loop8
478
479DB	102,15,56,222,209
480DB	102,15,56,222,217
481DB	102,15,56,222,225
482DB	102,15,56,222,233
483DB	102,15,56,222,241
484DB	102,15,56,222,249
485DB	102,68,15,56,222,193
486DB	102,68,15,56,222,201
487DB	102,15,56,223,208
488DB	102,15,56,223,216
489DB	102,15,56,223,224
490DB	102,15,56,223,232
491DB	102,15,56,223,240
492DB	102,15,56,223,248
493DB	102,68,15,56,223,192
494DB	102,68,15,56,223,200
495	DB	0F3h,0C3h		;repret
496
497global	aesni_ecb_encrypt
498
499ALIGN	16
500aesni_ecb_encrypt:
501	mov	QWORD[8+rsp],rdi	;WIN64 prologue
502	mov	QWORD[16+rsp],rsi
503	mov	rax,rsp
504$L$SEH_begin_aesni_ecb_encrypt:
505	mov	rdi,rcx
506	mov	rsi,rdx
507	mov	rdx,r8
508	mov	rcx,r9
509	mov	r8,QWORD[40+rsp]
510
511
512	lea	rsp,[((-88))+rsp]
513	movaps	XMMWORD[rsp],xmm6
514	movaps	XMMWORD[16+rsp],xmm7
515	movaps	XMMWORD[32+rsp],xmm8
516	movaps	XMMWORD[48+rsp],xmm9
517$L$ecb_enc_body:
518	and	rdx,-16
519	jz	NEAR $L$ecb_ret
520
521	mov	eax,DWORD[240+rcx]
522	movups	xmm0,XMMWORD[rcx]
523	mov	r11,rcx
524	mov	r10d,eax
525	test	r8d,r8d
526	jz	NEAR $L$ecb_decrypt
527
528	cmp	rdx,0x80
529	jb	NEAR $L$ecb_enc_tail
530
531	movdqu	xmm2,XMMWORD[rdi]
532	movdqu	xmm3,XMMWORD[16+rdi]
533	movdqu	xmm4,XMMWORD[32+rdi]
534	movdqu	xmm5,XMMWORD[48+rdi]
535	movdqu	xmm6,XMMWORD[64+rdi]
536	movdqu	xmm7,XMMWORD[80+rdi]
537	movdqu	xmm8,XMMWORD[96+rdi]
538	movdqu	xmm9,XMMWORD[112+rdi]
539	lea	rdi,[128+rdi]
540	sub	rdx,0x80
541	jmp	NEAR $L$ecb_enc_loop8_enter
542ALIGN	16
543$L$ecb_enc_loop8:
544	movups	XMMWORD[rsi],xmm2
545	mov	rcx,r11
546	movdqu	xmm2,XMMWORD[rdi]
547	mov	eax,r10d
548	movups	XMMWORD[16+rsi],xmm3
549	movdqu	xmm3,XMMWORD[16+rdi]
550	movups	XMMWORD[32+rsi],xmm4
551	movdqu	xmm4,XMMWORD[32+rdi]
552	movups	XMMWORD[48+rsi],xmm5
553	movdqu	xmm5,XMMWORD[48+rdi]
554	movups	XMMWORD[64+rsi],xmm6
555	movdqu	xmm6,XMMWORD[64+rdi]
556	movups	XMMWORD[80+rsi],xmm7
557	movdqu	xmm7,XMMWORD[80+rdi]
558	movups	XMMWORD[96+rsi],xmm8
559	movdqu	xmm8,XMMWORD[96+rdi]
560	movups	XMMWORD[112+rsi],xmm9
561	lea	rsi,[128+rsi]
562	movdqu	xmm9,XMMWORD[112+rdi]
563	lea	rdi,[128+rdi]
564$L$ecb_enc_loop8_enter:
565
566	call	_aesni_encrypt8
567
568	sub	rdx,0x80
569	jnc	NEAR $L$ecb_enc_loop8
570
571	movups	XMMWORD[rsi],xmm2
572	mov	rcx,r11
573	movups	XMMWORD[16+rsi],xmm3
574	mov	eax,r10d
575	movups	XMMWORD[32+rsi],xmm4
576	movups	XMMWORD[48+rsi],xmm5
577	movups	XMMWORD[64+rsi],xmm6
578	movups	XMMWORD[80+rsi],xmm7
579	movups	XMMWORD[96+rsi],xmm8
580	movups	XMMWORD[112+rsi],xmm9
581	lea	rsi,[128+rsi]
582	add	rdx,0x80
583	jz	NEAR $L$ecb_ret
584
585$L$ecb_enc_tail:
586	movups	xmm2,XMMWORD[rdi]
587	cmp	rdx,0x20
588	jb	NEAR $L$ecb_enc_one
589	movups	xmm3,XMMWORD[16+rdi]
590	je	NEAR $L$ecb_enc_two
591	movups	xmm4,XMMWORD[32+rdi]
592	cmp	rdx,0x40
593	jb	NEAR $L$ecb_enc_three
594	movups	xmm5,XMMWORD[48+rdi]
595	je	NEAR $L$ecb_enc_four
596	movups	xmm6,XMMWORD[64+rdi]
597	cmp	rdx,0x60
598	jb	NEAR $L$ecb_enc_five
599	movups	xmm7,XMMWORD[80+rdi]
600	je	NEAR $L$ecb_enc_six
601	movdqu	xmm8,XMMWORD[96+rdi]
602	xorps	xmm9,xmm9
603	call	_aesni_encrypt8
604	movups	XMMWORD[rsi],xmm2
605	movups	XMMWORD[16+rsi],xmm3
606	movups	XMMWORD[32+rsi],xmm4
607	movups	XMMWORD[48+rsi],xmm5
608	movups	XMMWORD[64+rsi],xmm6
609	movups	XMMWORD[80+rsi],xmm7
610	movups	XMMWORD[96+rsi],xmm8
611	jmp	NEAR $L$ecb_ret
612ALIGN	16
613$L$ecb_enc_one:
614	movups	xmm0,XMMWORD[rcx]
615	movups	xmm1,XMMWORD[16+rcx]
616	lea	rcx,[32+rcx]
617	xorps	xmm2,xmm0
618$L$oop_enc1_3:
619DB	102,15,56,220,209
620	dec	eax
621	movups	xmm1,XMMWORD[rcx]
622	lea	rcx,[16+rcx]
623	jnz	NEAR $L$oop_enc1_3
624DB	102,15,56,221,209
625	movups	XMMWORD[rsi],xmm2
626	jmp	NEAR $L$ecb_ret
627ALIGN	16
628$L$ecb_enc_two:
629	call	_aesni_encrypt2
630	movups	XMMWORD[rsi],xmm2
631	movups	XMMWORD[16+rsi],xmm3
632	jmp	NEAR $L$ecb_ret
633ALIGN	16
634$L$ecb_enc_three:
635	call	_aesni_encrypt3
636	movups	XMMWORD[rsi],xmm2
637	movups	XMMWORD[16+rsi],xmm3
638	movups	XMMWORD[32+rsi],xmm4
639	jmp	NEAR $L$ecb_ret
640ALIGN	16
641$L$ecb_enc_four:
642	call	_aesni_encrypt4
643	movups	XMMWORD[rsi],xmm2
644	movups	XMMWORD[16+rsi],xmm3
645	movups	XMMWORD[32+rsi],xmm4
646	movups	XMMWORD[48+rsi],xmm5
647	jmp	NEAR $L$ecb_ret
648ALIGN	16
649$L$ecb_enc_five:
650	xorps	xmm7,xmm7
651	call	_aesni_encrypt6
652	movups	XMMWORD[rsi],xmm2
653	movups	XMMWORD[16+rsi],xmm3
654	movups	XMMWORD[32+rsi],xmm4
655	movups	XMMWORD[48+rsi],xmm5
656	movups	XMMWORD[64+rsi],xmm6
657	jmp	NEAR $L$ecb_ret
658ALIGN	16
659$L$ecb_enc_six:
660	call	_aesni_encrypt6
661	movups	XMMWORD[rsi],xmm2
662	movups	XMMWORD[16+rsi],xmm3
663	movups	XMMWORD[32+rsi],xmm4
664	movups	XMMWORD[48+rsi],xmm5
665	movups	XMMWORD[64+rsi],xmm6
666	movups	XMMWORD[80+rsi],xmm7
667	jmp	NEAR $L$ecb_ret
668
669ALIGN	16
670$L$ecb_decrypt:
671	cmp	rdx,0x80
672	jb	NEAR $L$ecb_dec_tail
673
674	movdqu	xmm2,XMMWORD[rdi]
675	movdqu	xmm3,XMMWORD[16+rdi]
676	movdqu	xmm4,XMMWORD[32+rdi]
677	movdqu	xmm5,XMMWORD[48+rdi]
678	movdqu	xmm6,XMMWORD[64+rdi]
679	movdqu	xmm7,XMMWORD[80+rdi]
680	movdqu	xmm8,XMMWORD[96+rdi]
681	movdqu	xmm9,XMMWORD[112+rdi]
682	lea	rdi,[128+rdi]
683	sub	rdx,0x80
684	jmp	NEAR $L$ecb_dec_loop8_enter
685ALIGN	16
686$L$ecb_dec_loop8:
687	movups	XMMWORD[rsi],xmm2
688	mov	rcx,r11
689	movdqu	xmm2,XMMWORD[rdi]
690	mov	eax,r10d
691	movups	XMMWORD[16+rsi],xmm3
692	movdqu	xmm3,XMMWORD[16+rdi]
693	movups	XMMWORD[32+rsi],xmm4
694	movdqu	xmm4,XMMWORD[32+rdi]
695	movups	XMMWORD[48+rsi],xmm5
696	movdqu	xmm5,XMMWORD[48+rdi]
697	movups	XMMWORD[64+rsi],xmm6
698	movdqu	xmm6,XMMWORD[64+rdi]
699	movups	XMMWORD[80+rsi],xmm7
700	movdqu	xmm7,XMMWORD[80+rdi]
701	movups	XMMWORD[96+rsi],xmm8
702	movdqu	xmm8,XMMWORD[96+rdi]
703	movups	XMMWORD[112+rsi],xmm9
704	lea	rsi,[128+rsi]
705	movdqu	xmm9,XMMWORD[112+rdi]
706	lea	rdi,[128+rdi]
707$L$ecb_dec_loop8_enter:
708
709	call	_aesni_decrypt8
710
711	movups	xmm0,XMMWORD[r11]
712	sub	rdx,0x80
713	jnc	NEAR $L$ecb_dec_loop8
714
715	movups	XMMWORD[rsi],xmm2
716	pxor	xmm2,xmm2
717	mov	rcx,r11
718	movups	XMMWORD[16+rsi],xmm3
719	pxor	xmm3,xmm3
720	mov	eax,r10d
721	movups	XMMWORD[32+rsi],xmm4
722	pxor	xmm4,xmm4
723	movups	XMMWORD[48+rsi],xmm5
724	pxor	xmm5,xmm5
725	movups	XMMWORD[64+rsi],xmm6
726	pxor	xmm6,xmm6
727	movups	XMMWORD[80+rsi],xmm7
728	pxor	xmm7,xmm7
729	movups	XMMWORD[96+rsi],xmm8
730	pxor	xmm8,xmm8
731	movups	XMMWORD[112+rsi],xmm9
732	pxor	xmm9,xmm9
733	lea	rsi,[128+rsi]
734	add	rdx,0x80
735	jz	NEAR $L$ecb_ret
736
737$L$ecb_dec_tail:
738	movups	xmm2,XMMWORD[rdi]
739	cmp	rdx,0x20
740	jb	NEAR $L$ecb_dec_one
741	movups	xmm3,XMMWORD[16+rdi]
742	je	NEAR $L$ecb_dec_two
743	movups	xmm4,XMMWORD[32+rdi]
744	cmp	rdx,0x40
745	jb	NEAR $L$ecb_dec_three
746	movups	xmm5,XMMWORD[48+rdi]
747	je	NEAR $L$ecb_dec_four
748	movups	xmm6,XMMWORD[64+rdi]
749	cmp	rdx,0x60
750	jb	NEAR $L$ecb_dec_five
751	movups	xmm7,XMMWORD[80+rdi]
752	je	NEAR $L$ecb_dec_six
753	movups	xmm8,XMMWORD[96+rdi]
754	movups	xmm0,XMMWORD[rcx]
755	xorps	xmm9,xmm9
756	call	_aesni_decrypt8
757	movups	XMMWORD[rsi],xmm2
758	pxor	xmm2,xmm2
759	movups	XMMWORD[16+rsi],xmm3
760	pxor	xmm3,xmm3
761	movups	XMMWORD[32+rsi],xmm4
762	pxor	xmm4,xmm4
763	movups	XMMWORD[48+rsi],xmm5
764	pxor	xmm5,xmm5
765	movups	XMMWORD[64+rsi],xmm6
766	pxor	xmm6,xmm6
767	movups	XMMWORD[80+rsi],xmm7
768	pxor	xmm7,xmm7
769	movups	XMMWORD[96+rsi],xmm8
770	pxor	xmm8,xmm8
771	pxor	xmm9,xmm9
772	jmp	NEAR $L$ecb_ret
773ALIGN	16
774$L$ecb_dec_one:
775	movups	xmm0,XMMWORD[rcx]
776	movups	xmm1,XMMWORD[16+rcx]
777	lea	rcx,[32+rcx]
778	xorps	xmm2,xmm0
779$L$oop_dec1_4:
780DB	102,15,56,222,209
781	dec	eax
782	movups	xmm1,XMMWORD[rcx]
783	lea	rcx,[16+rcx]
784	jnz	NEAR $L$oop_dec1_4
785DB	102,15,56,223,209
786	movups	XMMWORD[rsi],xmm2
787	pxor	xmm2,xmm2
788	jmp	NEAR $L$ecb_ret
789ALIGN	16
790$L$ecb_dec_two:
791	call	_aesni_decrypt2
792	movups	XMMWORD[rsi],xmm2
793	pxor	xmm2,xmm2
794	movups	XMMWORD[16+rsi],xmm3
795	pxor	xmm3,xmm3
796	jmp	NEAR $L$ecb_ret
797ALIGN	16
798$L$ecb_dec_three:
799	call	_aesni_decrypt3
800	movups	XMMWORD[rsi],xmm2
801	pxor	xmm2,xmm2
802	movups	XMMWORD[16+rsi],xmm3
803	pxor	xmm3,xmm3
804	movups	XMMWORD[32+rsi],xmm4
805	pxor	xmm4,xmm4
806	jmp	NEAR $L$ecb_ret
807ALIGN	16
808$L$ecb_dec_four:
809	call	_aesni_decrypt4
810	movups	XMMWORD[rsi],xmm2
811	pxor	xmm2,xmm2
812	movups	XMMWORD[16+rsi],xmm3
813	pxor	xmm3,xmm3
814	movups	XMMWORD[32+rsi],xmm4
815	pxor	xmm4,xmm4
816	movups	XMMWORD[48+rsi],xmm5
817	pxor	xmm5,xmm5
818	jmp	NEAR $L$ecb_ret
819ALIGN	16
820$L$ecb_dec_five:
821	xorps	xmm7,xmm7
822	call	_aesni_decrypt6
823	movups	XMMWORD[rsi],xmm2
824	pxor	xmm2,xmm2
825	movups	XMMWORD[16+rsi],xmm3
826	pxor	xmm3,xmm3
827	movups	XMMWORD[32+rsi],xmm4
828	pxor	xmm4,xmm4
829	movups	XMMWORD[48+rsi],xmm5
830	pxor	xmm5,xmm5
831	movups	XMMWORD[64+rsi],xmm6
832	pxor	xmm6,xmm6
833	pxor	xmm7,xmm7
834	jmp	NEAR $L$ecb_ret
835ALIGN	16
836$L$ecb_dec_six:
837	call	_aesni_decrypt6
838	movups	XMMWORD[rsi],xmm2
839	pxor	xmm2,xmm2
840	movups	XMMWORD[16+rsi],xmm3
841	pxor	xmm3,xmm3
842	movups	XMMWORD[32+rsi],xmm4
843	pxor	xmm4,xmm4
844	movups	XMMWORD[48+rsi],xmm5
845	pxor	xmm5,xmm5
846	movups	XMMWORD[64+rsi],xmm6
847	pxor	xmm6,xmm6
848	movups	XMMWORD[80+rsi],xmm7
849	pxor	xmm7,xmm7
850
851$L$ecb_ret:
852	xorps	xmm0,xmm0
853	pxor	xmm1,xmm1
854	movaps	xmm6,XMMWORD[rsp]
855	movaps	XMMWORD[rsp],xmm0
856	movaps	xmm7,XMMWORD[16+rsp]
857	movaps	XMMWORD[16+rsp],xmm0
858	movaps	xmm8,XMMWORD[32+rsp]
859	movaps	XMMWORD[32+rsp],xmm0
860	movaps	xmm9,XMMWORD[48+rsp]
861	movaps	XMMWORD[48+rsp],xmm0
862	lea	rsp,[88+rsp]
863$L$ecb_enc_ret:
864	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
865	mov	rsi,QWORD[16+rsp]
866	DB	0F3h,0C3h		;repret
867$L$SEH_end_aesni_ecb_encrypt:
868global	aesni_ccm64_encrypt_blocks
869
870ALIGN	16
871aesni_ccm64_encrypt_blocks:
872	mov	QWORD[8+rsp],rdi	;WIN64 prologue
873	mov	QWORD[16+rsp],rsi
874	mov	rax,rsp
875$L$SEH_begin_aesni_ccm64_encrypt_blocks:
876	mov	rdi,rcx
877	mov	rsi,rdx
878	mov	rdx,r8
879	mov	rcx,r9
880	mov	r8,QWORD[40+rsp]
881	mov	r9,QWORD[48+rsp]
882
883
884	lea	rsp,[((-88))+rsp]
885	movaps	XMMWORD[rsp],xmm6
886	movaps	XMMWORD[16+rsp],xmm7
887	movaps	XMMWORD[32+rsp],xmm8
888	movaps	XMMWORD[48+rsp],xmm9
889$L$ccm64_enc_body:
890	mov	eax,DWORD[240+rcx]
891	movdqu	xmm6,XMMWORD[r8]
892	movdqa	xmm9,XMMWORD[$L$increment64]
893	movdqa	xmm7,XMMWORD[$L$bswap_mask]
894
895	shl	eax,4
896	mov	r10d,16
897	lea	r11,[rcx]
898	movdqu	xmm3,XMMWORD[r9]
899	movdqa	xmm2,xmm6
900	lea	rcx,[32+rax*1+rcx]
901DB	102,15,56,0,247
902	sub	r10,rax
903	jmp	NEAR $L$ccm64_enc_outer
904ALIGN	16
905$L$ccm64_enc_outer:
906	movups	xmm0,XMMWORD[r11]
907	mov	rax,r10
908	movups	xmm8,XMMWORD[rdi]
909
910	xorps	xmm2,xmm0
911	movups	xmm1,XMMWORD[16+r11]
912	xorps	xmm0,xmm8
913	xorps	xmm3,xmm0
914	movups	xmm0,XMMWORD[32+r11]
915
916$L$ccm64_enc2_loop:
917DB	102,15,56,220,209
918DB	102,15,56,220,217
919	movups	xmm1,XMMWORD[rax*1+rcx]
920	add	rax,32
921DB	102,15,56,220,208
922DB	102,15,56,220,216
923	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
924	jnz	NEAR $L$ccm64_enc2_loop
925DB	102,15,56,220,209
926DB	102,15,56,220,217
927	paddq	xmm6,xmm9
928	dec	rdx
929DB	102,15,56,221,208
930DB	102,15,56,221,216
931
932	lea	rdi,[16+rdi]
933	xorps	xmm8,xmm2
934	movdqa	xmm2,xmm6
935	movups	XMMWORD[rsi],xmm8
936DB	102,15,56,0,215
937	lea	rsi,[16+rsi]
938	jnz	NEAR $L$ccm64_enc_outer
939
940	pxor	xmm0,xmm0
941	pxor	xmm1,xmm1
942	pxor	xmm2,xmm2
943	movups	XMMWORD[r9],xmm3
944	pxor	xmm3,xmm3
945	pxor	xmm8,xmm8
946	pxor	xmm6,xmm6
947	movaps	xmm6,XMMWORD[rsp]
948	movaps	XMMWORD[rsp],xmm0
949	movaps	xmm7,XMMWORD[16+rsp]
950	movaps	XMMWORD[16+rsp],xmm0
951	movaps	xmm8,XMMWORD[32+rsp]
952	movaps	XMMWORD[32+rsp],xmm0
953	movaps	xmm9,XMMWORD[48+rsp]
954	movaps	XMMWORD[48+rsp],xmm0
955	lea	rsp,[88+rsp]
956$L$ccm64_enc_ret:
957	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
958	mov	rsi,QWORD[16+rsp]
959	DB	0F3h,0C3h		;repret
960$L$SEH_end_aesni_ccm64_encrypt_blocks:
961global	aesni_ccm64_decrypt_blocks
962
963ALIGN	16
964aesni_ccm64_decrypt_blocks:
965	mov	QWORD[8+rsp],rdi	;WIN64 prologue
966	mov	QWORD[16+rsp],rsi
967	mov	rax,rsp
968$L$SEH_begin_aesni_ccm64_decrypt_blocks:
969	mov	rdi,rcx
970	mov	rsi,rdx
971	mov	rdx,r8
972	mov	rcx,r9
973	mov	r8,QWORD[40+rsp]
974	mov	r9,QWORD[48+rsp]
975
976
977	lea	rsp,[((-88))+rsp]
978	movaps	XMMWORD[rsp],xmm6
979	movaps	XMMWORD[16+rsp],xmm7
980	movaps	XMMWORD[32+rsp],xmm8
981	movaps	XMMWORD[48+rsp],xmm9
982$L$ccm64_dec_body:
983	mov	eax,DWORD[240+rcx]
984	movups	xmm6,XMMWORD[r8]
985	movdqu	xmm3,XMMWORD[r9]
986	movdqa	xmm9,XMMWORD[$L$increment64]
987	movdqa	xmm7,XMMWORD[$L$bswap_mask]
988
989	movaps	xmm2,xmm6
990	mov	r10d,eax
991	mov	r11,rcx
992DB	102,15,56,0,247
993	movups	xmm0,XMMWORD[rcx]
994	movups	xmm1,XMMWORD[16+rcx]
995	lea	rcx,[32+rcx]
996	xorps	xmm2,xmm0
997$L$oop_enc1_5:
998DB	102,15,56,220,209
999	dec	eax
1000	movups	xmm1,XMMWORD[rcx]
1001	lea	rcx,[16+rcx]
1002	jnz	NEAR $L$oop_enc1_5
1003DB	102,15,56,221,209
1004	shl	r10d,4
1005	mov	eax,16
1006	movups	xmm8,XMMWORD[rdi]
1007	paddq	xmm6,xmm9
1008	lea	rdi,[16+rdi]
1009	sub	rax,r10
1010	lea	rcx,[32+r10*1+r11]
1011	mov	r10,rax
1012	jmp	NEAR $L$ccm64_dec_outer
1013ALIGN	16
1014$L$ccm64_dec_outer:
1015	xorps	xmm8,xmm2
1016	movdqa	xmm2,xmm6
1017	movups	XMMWORD[rsi],xmm8
1018	lea	rsi,[16+rsi]
1019DB	102,15,56,0,215
1020
1021	sub	rdx,1
1022	jz	NEAR $L$ccm64_dec_break
1023
1024	movups	xmm0,XMMWORD[r11]
1025	mov	rax,r10
1026	movups	xmm1,XMMWORD[16+r11]
1027	xorps	xmm8,xmm0
1028	xorps	xmm2,xmm0
1029	xorps	xmm3,xmm8
1030	movups	xmm0,XMMWORD[32+r11]
1031	jmp	NEAR $L$ccm64_dec2_loop
1032ALIGN	16
1033$L$ccm64_dec2_loop:
1034DB	102,15,56,220,209
1035DB	102,15,56,220,217
1036	movups	xmm1,XMMWORD[rax*1+rcx]
1037	add	rax,32
1038DB	102,15,56,220,208
1039DB	102,15,56,220,216
1040	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
1041	jnz	NEAR $L$ccm64_dec2_loop
1042	movups	xmm8,XMMWORD[rdi]
1043	paddq	xmm6,xmm9
1044DB	102,15,56,220,209
1045DB	102,15,56,220,217
1046DB	102,15,56,221,208
1047DB	102,15,56,221,216
1048	lea	rdi,[16+rdi]
1049	jmp	NEAR $L$ccm64_dec_outer
1050
1051ALIGN	16
1052$L$ccm64_dec_break:
1053
1054	mov	eax,DWORD[240+r11]
1055	movups	xmm0,XMMWORD[r11]
1056	movups	xmm1,XMMWORD[16+r11]
1057	xorps	xmm8,xmm0
1058	lea	r11,[32+r11]
1059	xorps	xmm3,xmm8
1060$L$oop_enc1_6:
1061DB	102,15,56,220,217
1062	dec	eax
1063	movups	xmm1,XMMWORD[r11]
1064	lea	r11,[16+r11]
1065	jnz	NEAR $L$oop_enc1_6
1066DB	102,15,56,221,217
1067	pxor	xmm0,xmm0
1068	pxor	xmm1,xmm1
1069	pxor	xmm2,xmm2
1070	movups	XMMWORD[r9],xmm3
1071	pxor	xmm3,xmm3
1072	pxor	xmm8,xmm8
1073	pxor	xmm6,xmm6
1074	movaps	xmm6,XMMWORD[rsp]
1075	movaps	XMMWORD[rsp],xmm0
1076	movaps	xmm7,XMMWORD[16+rsp]
1077	movaps	XMMWORD[16+rsp],xmm0
1078	movaps	xmm8,XMMWORD[32+rsp]
1079	movaps	XMMWORD[32+rsp],xmm0
1080	movaps	xmm9,XMMWORD[48+rsp]
1081	movaps	XMMWORD[48+rsp],xmm0
1082	lea	rsp,[88+rsp]
1083$L$ccm64_dec_ret:
1084	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
1085	mov	rsi,QWORD[16+rsp]
1086	DB	0F3h,0C3h		;repret
1087$L$SEH_end_aesni_ccm64_decrypt_blocks:
1088global	aesni_ctr32_encrypt_blocks
1089
1090ALIGN	16
1091aesni_ctr32_encrypt_blocks:
1092	mov	QWORD[8+rsp],rdi	;WIN64 prologue
1093	mov	QWORD[16+rsp],rsi
1094	mov	rax,rsp
1095$L$SEH_begin_aesni_ctr32_encrypt_blocks:
1096	mov	rdi,rcx
1097	mov	rsi,rdx
1098	mov	rdx,r8
1099	mov	rcx,r9
1100	mov	r8,QWORD[40+rsp]
1101
1102
1103	cmp	rdx,1
1104	jne	NEAR $L$ctr32_bulk
1105
1106
1107
1108	movups	xmm2,XMMWORD[r8]
1109	movups	xmm3,XMMWORD[rdi]
1110	mov	edx,DWORD[240+rcx]
1111	movups	xmm0,XMMWORD[rcx]
1112	movups	xmm1,XMMWORD[16+rcx]
1113	lea	rcx,[32+rcx]
1114	xorps	xmm2,xmm0
1115$L$oop_enc1_7:
1116DB	102,15,56,220,209
1117	dec	edx
1118	movups	xmm1,XMMWORD[rcx]
1119	lea	rcx,[16+rcx]
1120	jnz	NEAR $L$oop_enc1_7
1121DB	102,15,56,221,209
1122	pxor	xmm0,xmm0
1123	pxor	xmm1,xmm1
1124	xorps	xmm2,xmm3
1125	pxor	xmm3,xmm3
1126	movups	XMMWORD[rsi],xmm2
1127	xorps	xmm2,xmm2
1128	jmp	NEAR $L$ctr32_epilogue
1129
1130ALIGN	16
1131$L$ctr32_bulk:
1132	lea	r11,[rsp]
1133	push	rbp
1134	sub	rsp,288
1135	and	rsp,-16
1136	movaps	XMMWORD[(-168)+r11],xmm6
1137	movaps	XMMWORD[(-152)+r11],xmm7
1138	movaps	XMMWORD[(-136)+r11],xmm8
1139	movaps	XMMWORD[(-120)+r11],xmm9
1140	movaps	XMMWORD[(-104)+r11],xmm10
1141	movaps	XMMWORD[(-88)+r11],xmm11
1142	movaps	XMMWORD[(-72)+r11],xmm12
1143	movaps	XMMWORD[(-56)+r11],xmm13
1144	movaps	XMMWORD[(-40)+r11],xmm14
1145	movaps	XMMWORD[(-24)+r11],xmm15
1146$L$ctr32_body:
1147
1148
1149
1150
1151	movdqu	xmm2,XMMWORD[r8]
1152	movdqu	xmm0,XMMWORD[rcx]
1153	mov	r8d,DWORD[12+r8]
1154	pxor	xmm2,xmm0
1155	mov	ebp,DWORD[12+rcx]
1156	movdqa	XMMWORD[rsp],xmm2
1157	bswap	r8d
1158	movdqa	xmm3,xmm2
1159	movdqa	xmm4,xmm2
1160	movdqa	xmm5,xmm2
1161	movdqa	XMMWORD[64+rsp],xmm2
1162	movdqa	XMMWORD[80+rsp],xmm2
1163	movdqa	XMMWORD[96+rsp],xmm2
1164	mov	r10,rdx
1165	movdqa	XMMWORD[112+rsp],xmm2
1166
1167	lea	rax,[1+r8]
1168	lea	rdx,[2+r8]
1169	bswap	eax
1170	bswap	edx
1171	xor	eax,ebp
1172	xor	edx,ebp
1173DB	102,15,58,34,216,3
1174	lea	rax,[3+r8]
1175	movdqa	XMMWORD[16+rsp],xmm3
1176DB	102,15,58,34,226,3
1177	bswap	eax
1178	mov	rdx,r10
1179	lea	r10,[4+r8]
1180	movdqa	XMMWORD[32+rsp],xmm4
1181	xor	eax,ebp
1182	bswap	r10d
1183DB	102,15,58,34,232,3
1184	xor	r10d,ebp
1185	movdqa	XMMWORD[48+rsp],xmm5
1186	lea	r9,[5+r8]
1187	mov	DWORD[((64+12))+rsp],r10d
1188	bswap	r9d
1189	lea	r10,[6+r8]
1190	mov	eax,DWORD[240+rcx]
1191	xor	r9d,ebp
1192	bswap	r10d
1193	mov	DWORD[((80+12))+rsp],r9d
1194	xor	r10d,ebp
1195	lea	r9,[7+r8]
1196	mov	DWORD[((96+12))+rsp],r10d
1197	bswap	r9d
1198	mov	r10d,DWORD[((OPENSSL_ia32cap_P+4))]
1199	xor	r9d,ebp
1200	and	r10d,71303168
1201	mov	DWORD[((112+12))+rsp],r9d
1202
1203	movups	xmm1,XMMWORD[16+rcx]
1204
1205	movdqa	xmm6,XMMWORD[64+rsp]
1206	movdqa	xmm7,XMMWORD[80+rsp]
1207
1208	cmp	rdx,8
1209	jb	NEAR $L$ctr32_tail
1210
1211	sub	rdx,6
1212	cmp	r10d,4194304
1213	je	NEAR $L$ctr32_6x
1214
1215	lea	rcx,[128+rcx]
1216	sub	rdx,2
1217	jmp	NEAR $L$ctr32_loop8
1218
1219ALIGN	16
1220$L$ctr32_6x:
1221	shl	eax,4
1222	mov	r10d,48
1223	bswap	ebp
1224	lea	rcx,[32+rax*1+rcx]
1225	sub	r10,rax
1226	jmp	NEAR $L$ctr32_loop6
1227
1228ALIGN	16
1229$L$ctr32_loop6:
1230	add	r8d,6
1231	movups	xmm0,XMMWORD[((-48))+r10*1+rcx]
1232DB	102,15,56,220,209
1233	mov	eax,r8d
1234	xor	eax,ebp
1235DB	102,15,56,220,217
1236DB	0x0f,0x38,0xf1,0x44,0x24,12
1237	lea	eax,[1+r8]
1238DB	102,15,56,220,225
1239	xor	eax,ebp
1240DB	0x0f,0x38,0xf1,0x44,0x24,28
1241DB	102,15,56,220,233
1242	lea	eax,[2+r8]
1243	xor	eax,ebp
1244DB	102,15,56,220,241
1245DB	0x0f,0x38,0xf1,0x44,0x24,44
1246	lea	eax,[3+r8]
1247DB	102,15,56,220,249
1248	movups	xmm1,XMMWORD[((-32))+r10*1+rcx]
1249	xor	eax,ebp
1250
1251DB	102,15,56,220,208
1252DB	0x0f,0x38,0xf1,0x44,0x24,60
1253	lea	eax,[4+r8]
1254DB	102,15,56,220,216
1255	xor	eax,ebp
1256DB	0x0f,0x38,0xf1,0x44,0x24,76
1257DB	102,15,56,220,224
1258	lea	eax,[5+r8]
1259	xor	eax,ebp
1260DB	102,15,56,220,232
1261DB	0x0f,0x38,0xf1,0x44,0x24,92
1262	mov	rax,r10
1263DB	102,15,56,220,240
1264DB	102,15,56,220,248
1265	movups	xmm0,XMMWORD[((-16))+r10*1+rcx]
1266
1267	call	$L$enc_loop6
1268
1269	movdqu	xmm8,XMMWORD[rdi]
1270	movdqu	xmm9,XMMWORD[16+rdi]
1271	movdqu	xmm10,XMMWORD[32+rdi]
1272	movdqu	xmm11,XMMWORD[48+rdi]
1273	movdqu	xmm12,XMMWORD[64+rdi]
1274	movdqu	xmm13,XMMWORD[80+rdi]
1275	lea	rdi,[96+rdi]
1276	movups	xmm1,XMMWORD[((-64))+r10*1+rcx]
1277	pxor	xmm8,xmm2
1278	movaps	xmm2,XMMWORD[rsp]
1279	pxor	xmm9,xmm3
1280	movaps	xmm3,XMMWORD[16+rsp]
1281	pxor	xmm10,xmm4
1282	movaps	xmm4,XMMWORD[32+rsp]
1283	pxor	xmm11,xmm5
1284	movaps	xmm5,XMMWORD[48+rsp]
1285	pxor	xmm12,xmm6
1286	movaps	xmm6,XMMWORD[64+rsp]
1287	pxor	xmm13,xmm7
1288	movaps	xmm7,XMMWORD[80+rsp]
1289	movdqu	XMMWORD[rsi],xmm8
1290	movdqu	XMMWORD[16+rsi],xmm9
1291	movdqu	XMMWORD[32+rsi],xmm10
1292	movdqu	XMMWORD[48+rsi],xmm11
1293	movdqu	XMMWORD[64+rsi],xmm12
1294	movdqu	XMMWORD[80+rsi],xmm13
1295	lea	rsi,[96+rsi]
1296
1297	sub	rdx,6
1298	jnc	NEAR $L$ctr32_loop6
1299
1300	add	rdx,6
1301	jz	NEAR $L$ctr32_done
1302
1303	lea	eax,[((-48))+r10]
1304	lea	rcx,[((-80))+r10*1+rcx]
1305	neg	eax
1306	shr	eax,4
1307	jmp	NEAR $L$ctr32_tail
1308
1309ALIGN	32
1310$L$ctr32_loop8:
1311	add	r8d,8
1312	movdqa	xmm8,XMMWORD[96+rsp]
1313DB	102,15,56,220,209
1314	mov	r9d,r8d
1315	movdqa	xmm9,XMMWORD[112+rsp]
1316DB	102,15,56,220,217
1317	bswap	r9d
1318	movups	xmm0,XMMWORD[((32-128))+rcx]
1319DB	102,15,56,220,225
1320	xor	r9d,ebp
1321	nop
1322DB	102,15,56,220,233
1323	mov	DWORD[((0+12))+rsp],r9d
1324	lea	r9,[1+r8]
1325DB	102,15,56,220,241
1326DB	102,15,56,220,249
1327DB	102,68,15,56,220,193
1328DB	102,68,15,56,220,201
1329	movups	xmm1,XMMWORD[((48-128))+rcx]
1330	bswap	r9d
1331DB	102,15,56,220,208
1332DB	102,15,56,220,216
1333	xor	r9d,ebp
1334DB	0x66,0x90
1335DB	102,15,56,220,224
1336DB	102,15,56,220,232
1337	mov	DWORD[((16+12))+rsp],r9d
1338	lea	r9,[2+r8]
1339DB	102,15,56,220,240
1340DB	102,15,56,220,248
1341DB	102,68,15,56,220,192
1342DB	102,68,15,56,220,200
1343	movups	xmm0,XMMWORD[((64-128))+rcx]
1344	bswap	r9d
1345DB	102,15,56,220,209
1346DB	102,15,56,220,217
1347	xor	r9d,ebp
1348DB	0x66,0x90
1349DB	102,15,56,220,225
1350DB	102,15,56,220,233
1351	mov	DWORD[((32+12))+rsp],r9d
1352	lea	r9,[3+r8]
1353DB	102,15,56,220,241
1354DB	102,15,56,220,249
1355DB	102,68,15,56,220,193
1356DB	102,68,15,56,220,201
1357	movups	xmm1,XMMWORD[((80-128))+rcx]
1358	bswap	r9d
1359DB	102,15,56,220,208
1360DB	102,15,56,220,216
1361	xor	r9d,ebp
1362DB	0x66,0x90
1363DB	102,15,56,220,224
1364DB	102,15,56,220,232
1365	mov	DWORD[((48+12))+rsp],r9d
1366	lea	r9,[4+r8]
1367DB	102,15,56,220,240
1368DB	102,15,56,220,248
1369DB	102,68,15,56,220,192
1370DB	102,68,15,56,220,200
1371	movups	xmm0,XMMWORD[((96-128))+rcx]
1372	bswap	r9d
1373DB	102,15,56,220,209
1374DB	102,15,56,220,217
1375	xor	r9d,ebp
1376DB	0x66,0x90
1377DB	102,15,56,220,225
1378DB	102,15,56,220,233
1379	mov	DWORD[((64+12))+rsp],r9d
1380	lea	r9,[5+r8]
1381DB	102,15,56,220,241
1382DB	102,15,56,220,249
1383DB	102,68,15,56,220,193
1384DB	102,68,15,56,220,201
1385	movups	xmm1,XMMWORD[((112-128))+rcx]
1386	bswap	r9d
1387DB	102,15,56,220,208
1388DB	102,15,56,220,216
1389	xor	r9d,ebp
1390DB	0x66,0x90
1391DB	102,15,56,220,224
1392DB	102,15,56,220,232
1393	mov	DWORD[((80+12))+rsp],r9d
1394	lea	r9,[6+r8]
1395DB	102,15,56,220,240
1396DB	102,15,56,220,248
1397DB	102,68,15,56,220,192
1398DB	102,68,15,56,220,200
1399	movups	xmm0,XMMWORD[((128-128))+rcx]
1400	bswap	r9d
1401DB	102,15,56,220,209
1402DB	102,15,56,220,217
1403	xor	r9d,ebp
1404DB	0x66,0x90
1405DB	102,15,56,220,225
1406DB	102,15,56,220,233
1407	mov	DWORD[((96+12))+rsp],r9d
1408	lea	r9,[7+r8]
1409DB	102,15,56,220,241
1410DB	102,15,56,220,249
1411DB	102,68,15,56,220,193
1412DB	102,68,15,56,220,201
1413	movups	xmm1,XMMWORD[((144-128))+rcx]
1414	bswap	r9d
1415DB	102,15,56,220,208
1416DB	102,15,56,220,216
1417DB	102,15,56,220,224
1418	xor	r9d,ebp
1419	movdqu	xmm10,XMMWORD[rdi]
1420DB	102,15,56,220,232
1421	mov	DWORD[((112+12))+rsp],r9d
1422	cmp	eax,11
1423DB	102,15,56,220,240
1424DB	102,15,56,220,248
1425DB	102,68,15,56,220,192
1426DB	102,68,15,56,220,200
1427	movups	xmm0,XMMWORD[((160-128))+rcx]
1428
1429	jb	NEAR $L$ctr32_enc_done
1430
1431DB	102,15,56,220,209
1432DB	102,15,56,220,217
1433DB	102,15,56,220,225
1434DB	102,15,56,220,233
1435DB	102,15,56,220,241
1436DB	102,15,56,220,249
1437DB	102,68,15,56,220,193
1438DB	102,68,15,56,220,201
1439	movups	xmm1,XMMWORD[((176-128))+rcx]
1440
1441DB	102,15,56,220,208
1442DB	102,15,56,220,216
1443DB	102,15,56,220,224
1444DB	102,15,56,220,232
1445DB	102,15,56,220,240
1446DB	102,15,56,220,248
1447DB	102,68,15,56,220,192
1448DB	102,68,15,56,220,200
1449	movups	xmm0,XMMWORD[((192-128))+rcx]
1450	je	NEAR $L$ctr32_enc_done
1451
1452DB	102,15,56,220,209
1453DB	102,15,56,220,217
1454DB	102,15,56,220,225
1455DB	102,15,56,220,233
1456DB	102,15,56,220,241
1457DB	102,15,56,220,249
1458DB	102,68,15,56,220,193
1459DB	102,68,15,56,220,201
1460	movups	xmm1,XMMWORD[((208-128))+rcx]
1461
1462DB	102,15,56,220,208
1463DB	102,15,56,220,216
1464DB	102,15,56,220,224
1465DB	102,15,56,220,232
1466DB	102,15,56,220,240
1467DB	102,15,56,220,248
1468DB	102,68,15,56,220,192
1469DB	102,68,15,56,220,200
1470	movups	xmm0,XMMWORD[((224-128))+rcx]
1471	jmp	NEAR $L$ctr32_enc_done
1472
1473ALIGN	16
1474$L$ctr32_enc_done:
1475	movdqu	xmm11,XMMWORD[16+rdi]
1476	pxor	xmm10,xmm0
1477	movdqu	xmm12,XMMWORD[32+rdi]
1478	pxor	xmm11,xmm0
1479	movdqu	xmm13,XMMWORD[48+rdi]
1480	pxor	xmm12,xmm0
1481	movdqu	xmm14,XMMWORD[64+rdi]
1482	pxor	xmm13,xmm0
1483	movdqu	xmm15,XMMWORD[80+rdi]
1484	pxor	xmm14,xmm0
1485	pxor	xmm15,xmm0
1486DB	102,15,56,220,209
1487DB	102,15,56,220,217
1488DB	102,15,56,220,225
1489DB	102,15,56,220,233
1490DB	102,15,56,220,241
1491DB	102,15,56,220,249
1492DB	102,68,15,56,220,193
1493DB	102,68,15,56,220,201
1494	movdqu	xmm1,XMMWORD[96+rdi]
1495	lea	rdi,[128+rdi]
1496
1497DB	102,65,15,56,221,210
1498	pxor	xmm1,xmm0
1499	movdqu	xmm10,XMMWORD[((112-128))+rdi]
1500DB	102,65,15,56,221,219
1501	pxor	xmm10,xmm0
1502	movdqa	xmm11,XMMWORD[rsp]
1503DB	102,65,15,56,221,228
1504DB	102,65,15,56,221,237
1505	movdqa	xmm12,XMMWORD[16+rsp]
1506	movdqa	xmm13,XMMWORD[32+rsp]
1507DB	102,65,15,56,221,246
1508DB	102,65,15,56,221,255
1509	movdqa	xmm14,XMMWORD[48+rsp]
1510	movdqa	xmm15,XMMWORD[64+rsp]
1511DB	102,68,15,56,221,193
1512	movdqa	xmm0,XMMWORD[80+rsp]
1513	movups	xmm1,XMMWORD[((16-128))+rcx]
1514DB	102,69,15,56,221,202
1515
1516	movups	XMMWORD[rsi],xmm2
1517	movdqa	xmm2,xmm11
1518	movups	XMMWORD[16+rsi],xmm3
1519	movdqa	xmm3,xmm12
1520	movups	XMMWORD[32+rsi],xmm4
1521	movdqa	xmm4,xmm13
1522	movups	XMMWORD[48+rsi],xmm5
1523	movdqa	xmm5,xmm14
1524	movups	XMMWORD[64+rsi],xmm6
1525	movdqa	xmm6,xmm15
1526	movups	XMMWORD[80+rsi],xmm7
1527	movdqa	xmm7,xmm0
1528	movups	XMMWORD[96+rsi],xmm8
1529	movups	XMMWORD[112+rsi],xmm9
1530	lea	rsi,[128+rsi]
1531
1532	sub	rdx,8
1533	jnc	NEAR $L$ctr32_loop8
1534
1535	add	rdx,8
1536	jz	NEAR $L$ctr32_done
1537	lea	rcx,[((-128))+rcx]
1538
1539$L$ctr32_tail:
1540
1541
1542	lea	rcx,[16+rcx]
1543	cmp	rdx,4
1544	jb	NEAR $L$ctr32_loop3
1545	je	NEAR $L$ctr32_loop4
1546
1547
1548	shl	eax,4
1549	movdqa	xmm8,XMMWORD[96+rsp]
1550	pxor	xmm9,xmm9
1551
1552	movups	xmm0,XMMWORD[16+rcx]
1553DB	102,15,56,220,209
1554DB	102,15,56,220,217
1555	lea	rcx,[((32-16))+rax*1+rcx]
1556	neg	rax
1557DB	102,15,56,220,225
1558	add	rax,16
1559	movups	xmm10,XMMWORD[rdi]
1560DB	102,15,56,220,233
1561DB	102,15,56,220,241
1562	movups	xmm11,XMMWORD[16+rdi]
1563	movups	xmm12,XMMWORD[32+rdi]
1564DB	102,15,56,220,249
1565DB	102,68,15,56,220,193
1566
1567	call	$L$enc_loop8_enter
1568
1569	movdqu	xmm13,XMMWORD[48+rdi]
1570	pxor	xmm2,xmm10
1571	movdqu	xmm10,XMMWORD[64+rdi]
1572	pxor	xmm3,xmm11
1573	movdqu	XMMWORD[rsi],xmm2
1574	pxor	xmm4,xmm12
1575	movdqu	XMMWORD[16+rsi],xmm3
1576	pxor	xmm5,xmm13
1577	movdqu	XMMWORD[32+rsi],xmm4
1578	pxor	xmm6,xmm10
1579	movdqu	XMMWORD[48+rsi],xmm5
1580	movdqu	XMMWORD[64+rsi],xmm6
1581	cmp	rdx,6
1582	jb	NEAR $L$ctr32_done
1583
1584	movups	xmm11,XMMWORD[80+rdi]
1585	xorps	xmm7,xmm11
1586	movups	XMMWORD[80+rsi],xmm7
1587	je	NEAR $L$ctr32_done
1588
1589	movups	xmm12,XMMWORD[96+rdi]
1590	xorps	xmm8,xmm12
1591	movups	XMMWORD[96+rsi],xmm8
1592	jmp	NEAR $L$ctr32_done
1593
1594ALIGN	32
1595$L$ctr32_loop4:
1596DB	102,15,56,220,209
1597	lea	rcx,[16+rcx]
1598	dec	eax
1599DB	102,15,56,220,217
1600DB	102,15,56,220,225
1601DB	102,15,56,220,233
1602	movups	xmm1,XMMWORD[rcx]
1603	jnz	NEAR $L$ctr32_loop4
1604DB	102,15,56,221,209
1605DB	102,15,56,221,217
1606	movups	xmm10,XMMWORD[rdi]
1607	movups	xmm11,XMMWORD[16+rdi]
1608DB	102,15,56,221,225
1609DB	102,15,56,221,233
1610	movups	xmm12,XMMWORD[32+rdi]
1611	movups	xmm13,XMMWORD[48+rdi]
1612
1613	xorps	xmm2,xmm10
1614	movups	XMMWORD[rsi],xmm2
1615	xorps	xmm3,xmm11
1616	movups	XMMWORD[16+rsi],xmm3
1617	pxor	xmm4,xmm12
1618	movdqu	XMMWORD[32+rsi],xmm4
1619	pxor	xmm5,xmm13
1620	movdqu	XMMWORD[48+rsi],xmm5
1621	jmp	NEAR $L$ctr32_done
1622
1623ALIGN	32
1624$L$ctr32_loop3:
1625DB	102,15,56,220,209
1626	lea	rcx,[16+rcx]
1627	dec	eax
1628DB	102,15,56,220,217
1629DB	102,15,56,220,225
1630	movups	xmm1,XMMWORD[rcx]
1631	jnz	NEAR $L$ctr32_loop3
1632DB	102,15,56,221,209
1633DB	102,15,56,221,217
1634DB	102,15,56,221,225
1635
1636	movups	xmm10,XMMWORD[rdi]
1637	xorps	xmm2,xmm10
1638	movups	XMMWORD[rsi],xmm2
1639	cmp	rdx,2
1640	jb	NEAR $L$ctr32_done
1641
1642	movups	xmm11,XMMWORD[16+rdi]
1643	xorps	xmm3,xmm11
1644	movups	XMMWORD[16+rsi],xmm3
1645	je	NEAR $L$ctr32_done
1646
1647	movups	xmm12,XMMWORD[32+rdi]
1648	xorps	xmm4,xmm12
1649	movups	XMMWORD[32+rsi],xmm4
1650
1651$L$ctr32_done:
1652	xorps	xmm0,xmm0
1653	xor	ebp,ebp
1654	pxor	xmm1,xmm1
1655	pxor	xmm2,xmm2
1656	pxor	xmm3,xmm3
1657	pxor	xmm4,xmm4
1658	pxor	xmm5,xmm5
1659	movaps	xmm6,XMMWORD[((-168))+r11]
1660	movaps	XMMWORD[(-168)+r11],xmm0
1661	movaps	xmm7,XMMWORD[((-152))+r11]
1662	movaps	XMMWORD[(-152)+r11],xmm0
1663	movaps	xmm8,XMMWORD[((-136))+r11]
1664	movaps	XMMWORD[(-136)+r11],xmm0
1665	movaps	xmm9,XMMWORD[((-120))+r11]
1666	movaps	XMMWORD[(-120)+r11],xmm0
1667	movaps	xmm10,XMMWORD[((-104))+r11]
1668	movaps	XMMWORD[(-104)+r11],xmm0
1669	movaps	xmm11,XMMWORD[((-88))+r11]
1670	movaps	XMMWORD[(-88)+r11],xmm0
1671	movaps	xmm12,XMMWORD[((-72))+r11]
1672	movaps	XMMWORD[(-72)+r11],xmm0
1673	movaps	xmm13,XMMWORD[((-56))+r11]
1674	movaps	XMMWORD[(-56)+r11],xmm0
1675	movaps	xmm14,XMMWORD[((-40))+r11]
1676	movaps	XMMWORD[(-40)+r11],xmm0
1677	movaps	xmm15,XMMWORD[((-24))+r11]
1678	movaps	XMMWORD[(-24)+r11],xmm0
1679	movaps	XMMWORD[rsp],xmm0
1680	movaps	XMMWORD[16+rsp],xmm0
1681	movaps	XMMWORD[32+rsp],xmm0
1682	movaps	XMMWORD[48+rsp],xmm0
1683	movaps	XMMWORD[64+rsp],xmm0
1684	movaps	XMMWORD[80+rsp],xmm0
1685	movaps	XMMWORD[96+rsp],xmm0
1686	movaps	XMMWORD[112+rsp],xmm0
1687	mov	rbp,QWORD[((-8))+r11]
1688	lea	rsp,[r11]
1689$L$ctr32_epilogue:
1690	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
1691	mov	rsi,QWORD[16+rsp]
1692	DB	0F3h,0C3h		;repret
1693$L$SEH_end_aesni_ctr32_encrypt_blocks:
1694global	aesni_xts_encrypt
1695
1696ALIGN	16
1697aesni_xts_encrypt:
1698	mov	QWORD[8+rsp],rdi	;WIN64 prologue
1699	mov	QWORD[16+rsp],rsi
1700	mov	rax,rsp
1701$L$SEH_begin_aesni_xts_encrypt:
1702	mov	rdi,rcx
1703	mov	rsi,rdx
1704	mov	rdx,r8
1705	mov	rcx,r9
1706	mov	r8,QWORD[40+rsp]
1707	mov	r9,QWORD[48+rsp]
1708
1709
1710	lea	r11,[rsp]
1711	push	rbp
1712	sub	rsp,272
1713	and	rsp,-16
1714	movaps	XMMWORD[(-168)+r11],xmm6
1715	movaps	XMMWORD[(-152)+r11],xmm7
1716	movaps	XMMWORD[(-136)+r11],xmm8
1717	movaps	XMMWORD[(-120)+r11],xmm9
1718	movaps	XMMWORD[(-104)+r11],xmm10
1719	movaps	XMMWORD[(-88)+r11],xmm11
1720	movaps	XMMWORD[(-72)+r11],xmm12
1721	movaps	XMMWORD[(-56)+r11],xmm13
1722	movaps	XMMWORD[(-40)+r11],xmm14
1723	movaps	XMMWORD[(-24)+r11],xmm15
1724$L$xts_enc_body:
1725	movups	xmm2,XMMWORD[r9]
1726	mov	eax,DWORD[240+r8]
1727	mov	r10d,DWORD[240+rcx]
1728	movups	xmm0,XMMWORD[r8]
1729	movups	xmm1,XMMWORD[16+r8]
1730	lea	r8,[32+r8]
1731	xorps	xmm2,xmm0
1732$L$oop_enc1_8:
1733DB	102,15,56,220,209
1734	dec	eax
1735	movups	xmm1,XMMWORD[r8]
1736	lea	r8,[16+r8]
1737	jnz	NEAR $L$oop_enc1_8
1738DB	102,15,56,221,209
1739	movups	xmm0,XMMWORD[rcx]
1740	mov	rbp,rcx
1741	mov	eax,r10d
1742	shl	r10d,4
1743	mov	r9,rdx
1744	and	rdx,-16
1745
1746	movups	xmm1,XMMWORD[16+r10*1+rcx]
1747
1748	movdqa	xmm8,XMMWORD[$L$xts_magic]
1749	movdqa	xmm15,xmm2
1750	pshufd	xmm9,xmm2,0x5f
1751	pxor	xmm1,xmm0
1752	movdqa	xmm14,xmm9
1753	paddd	xmm9,xmm9
1754	movdqa	xmm10,xmm15
1755	psrad	xmm14,31
1756	paddq	xmm15,xmm15
1757	pand	xmm14,xmm8
1758	pxor	xmm10,xmm0
1759	pxor	xmm15,xmm14
1760	movdqa	xmm14,xmm9
1761	paddd	xmm9,xmm9
1762	movdqa	xmm11,xmm15
1763	psrad	xmm14,31
1764	paddq	xmm15,xmm15
1765	pand	xmm14,xmm8
1766	pxor	xmm11,xmm0
1767	pxor	xmm15,xmm14
1768	movdqa	xmm14,xmm9
1769	paddd	xmm9,xmm9
1770	movdqa	xmm12,xmm15
1771	psrad	xmm14,31
1772	paddq	xmm15,xmm15
1773	pand	xmm14,xmm8
1774	pxor	xmm12,xmm0
1775	pxor	xmm15,xmm14
1776	movdqa	xmm14,xmm9
1777	paddd	xmm9,xmm9
1778	movdqa	xmm13,xmm15
1779	psrad	xmm14,31
1780	paddq	xmm15,xmm15
1781	pand	xmm14,xmm8
1782	pxor	xmm13,xmm0
1783	pxor	xmm15,xmm14
1784	movdqa	xmm14,xmm15
1785	psrad	xmm9,31
1786	paddq	xmm15,xmm15
1787	pand	xmm9,xmm8
1788	pxor	xmm14,xmm0
1789	pxor	xmm15,xmm9
1790	movaps	XMMWORD[96+rsp],xmm1
1791
1792	sub	rdx,16*6
1793	jc	NEAR $L$xts_enc_short
1794
1795	mov	eax,16+96
1796	lea	rcx,[32+r10*1+rbp]
1797	sub	rax,r10
1798	movups	xmm1,XMMWORD[16+rbp]
1799	mov	r10,rax
1800	lea	r8,[$L$xts_magic]
1801	jmp	NEAR $L$xts_enc_grandloop
1802
1803ALIGN	32
1804$L$xts_enc_grandloop:
1805	movdqu	xmm2,XMMWORD[rdi]
1806	movdqa	xmm8,xmm0
1807	movdqu	xmm3,XMMWORD[16+rdi]
1808	pxor	xmm2,xmm10
1809	movdqu	xmm4,XMMWORD[32+rdi]
1810	pxor	xmm3,xmm11
1811DB	102,15,56,220,209
1812	movdqu	xmm5,XMMWORD[48+rdi]
1813	pxor	xmm4,xmm12
1814DB	102,15,56,220,217
1815	movdqu	xmm6,XMMWORD[64+rdi]
1816	pxor	xmm5,xmm13
1817DB	102,15,56,220,225
1818	movdqu	xmm7,XMMWORD[80+rdi]
1819	pxor	xmm8,xmm15
1820	movdqa	xmm9,XMMWORD[96+rsp]
1821	pxor	xmm6,xmm14
1822DB	102,15,56,220,233
1823	movups	xmm0,XMMWORD[32+rbp]
1824	lea	rdi,[96+rdi]
1825	pxor	xmm7,xmm8
1826
1827	pxor	xmm10,xmm9
1828DB	102,15,56,220,241
1829	pxor	xmm11,xmm9
1830	movdqa	XMMWORD[rsp],xmm10
1831DB	102,15,56,220,249
1832	movups	xmm1,XMMWORD[48+rbp]
1833	pxor	xmm12,xmm9
1834
1835DB	102,15,56,220,208
1836	pxor	xmm13,xmm9
1837	movdqa	XMMWORD[16+rsp],xmm11
1838DB	102,15,56,220,216
1839	pxor	xmm14,xmm9
1840	movdqa	XMMWORD[32+rsp],xmm12
1841DB	102,15,56,220,224
1842DB	102,15,56,220,232
1843	pxor	xmm8,xmm9
1844	movdqa	XMMWORD[64+rsp],xmm14
1845DB	102,15,56,220,240
1846DB	102,15,56,220,248
1847	movups	xmm0,XMMWORD[64+rbp]
1848	movdqa	XMMWORD[80+rsp],xmm8
1849	pshufd	xmm9,xmm15,0x5f
1850	jmp	NEAR $L$xts_enc_loop6
1851ALIGN	32
1852$L$xts_enc_loop6:
1853DB	102,15,56,220,209
1854DB	102,15,56,220,217
1855DB	102,15,56,220,225
1856DB	102,15,56,220,233
1857DB	102,15,56,220,241
1858DB	102,15,56,220,249
1859	movups	xmm1,XMMWORD[((-64))+rax*1+rcx]
1860	add	rax,32
1861
1862DB	102,15,56,220,208
1863DB	102,15,56,220,216
1864DB	102,15,56,220,224
1865DB	102,15,56,220,232
1866DB	102,15,56,220,240
1867DB	102,15,56,220,248
1868	movups	xmm0,XMMWORD[((-80))+rax*1+rcx]
1869	jnz	NEAR $L$xts_enc_loop6
1870
1871	movdqa	xmm8,XMMWORD[r8]
1872	movdqa	xmm14,xmm9
1873	paddd	xmm9,xmm9
1874DB	102,15,56,220,209
1875	paddq	xmm15,xmm15
1876	psrad	xmm14,31
1877DB	102,15,56,220,217
1878	pand	xmm14,xmm8
1879	movups	xmm10,XMMWORD[rbp]
1880DB	102,15,56,220,225
1881DB	102,15,56,220,233
1882DB	102,15,56,220,241
1883	pxor	xmm15,xmm14
1884	movaps	xmm11,xmm10
1885DB	102,15,56,220,249
1886	movups	xmm1,XMMWORD[((-64))+rcx]
1887
1888	movdqa	xmm14,xmm9
1889DB	102,15,56,220,208
1890	paddd	xmm9,xmm9
1891	pxor	xmm10,xmm15
1892DB	102,15,56,220,216
1893	psrad	xmm14,31
1894	paddq	xmm15,xmm15
1895DB	102,15,56,220,224
1896DB	102,15,56,220,232
1897	pand	xmm14,xmm8
1898	movaps	xmm12,xmm11
1899DB	102,15,56,220,240
1900	pxor	xmm15,xmm14
1901	movdqa	xmm14,xmm9
1902DB	102,15,56,220,248
1903	movups	xmm0,XMMWORD[((-48))+rcx]
1904
1905	paddd	xmm9,xmm9
1906DB	102,15,56,220,209
1907	pxor	xmm11,xmm15
1908	psrad	xmm14,31
1909DB	102,15,56,220,217
1910	paddq	xmm15,xmm15
1911	pand	xmm14,xmm8
1912DB	102,15,56,220,225
1913DB	102,15,56,220,233
1914	movdqa	XMMWORD[48+rsp],xmm13
1915	pxor	xmm15,xmm14
1916DB	102,15,56,220,241
1917	movaps	xmm13,xmm12
1918	movdqa	xmm14,xmm9
1919DB	102,15,56,220,249
1920	movups	xmm1,XMMWORD[((-32))+rcx]
1921
1922	paddd	xmm9,xmm9
1923DB	102,15,56,220,208
1924	pxor	xmm12,xmm15
1925	psrad	xmm14,31
1926DB	102,15,56,220,216
1927	paddq	xmm15,xmm15
1928	pand	xmm14,xmm8
1929DB	102,15,56,220,224
1930DB	102,15,56,220,232
1931DB	102,15,56,220,240
1932	pxor	xmm15,xmm14
1933	movaps	xmm14,xmm13
1934DB	102,15,56,220,248
1935
1936	movdqa	xmm0,xmm9
1937	paddd	xmm9,xmm9
1938DB	102,15,56,220,209
1939	pxor	xmm13,xmm15
1940	psrad	xmm0,31
1941DB	102,15,56,220,217
1942	paddq	xmm15,xmm15
1943	pand	xmm0,xmm8
1944DB	102,15,56,220,225
1945DB	102,15,56,220,233
1946	pxor	xmm15,xmm0
1947	movups	xmm0,XMMWORD[rbp]
1948DB	102,15,56,220,241
1949DB	102,15,56,220,249
1950	movups	xmm1,XMMWORD[16+rbp]
1951
1952	pxor	xmm14,xmm15
1953DB	102,15,56,221,84,36,0
1954	psrad	xmm9,31
1955	paddq	xmm15,xmm15
1956DB	102,15,56,221,92,36,16
1957DB	102,15,56,221,100,36,32
1958	pand	xmm9,xmm8
1959	mov	rax,r10
1960DB	102,15,56,221,108,36,48
1961DB	102,15,56,221,116,36,64
1962DB	102,15,56,221,124,36,80
1963	pxor	xmm15,xmm9
1964
1965	lea	rsi,[96+rsi]
1966	movups	XMMWORD[(-96)+rsi],xmm2
1967	movups	XMMWORD[(-80)+rsi],xmm3
1968	movups	XMMWORD[(-64)+rsi],xmm4
1969	movups	XMMWORD[(-48)+rsi],xmm5
1970	movups	XMMWORD[(-32)+rsi],xmm6
1971	movups	XMMWORD[(-16)+rsi],xmm7
1972	sub	rdx,16*6
1973	jnc	NEAR $L$xts_enc_grandloop
1974
1975	mov	eax,16+96
1976	sub	eax,r10d
1977	mov	rcx,rbp
1978	shr	eax,4
1979
1980$L$xts_enc_short:
1981
1982	mov	r10d,eax
1983	pxor	xmm10,xmm0
1984	add	rdx,16*6
1985	jz	NEAR $L$xts_enc_done
1986
1987	pxor	xmm11,xmm0
1988	cmp	rdx,0x20
1989	jb	NEAR $L$xts_enc_one
1990	pxor	xmm12,xmm0
1991	je	NEAR $L$xts_enc_two
1992
1993	pxor	xmm13,xmm0
1994	cmp	rdx,0x40
1995	jb	NEAR $L$xts_enc_three
1996	pxor	xmm14,xmm0
1997	je	NEAR $L$xts_enc_four
1998
1999	movdqu	xmm2,XMMWORD[rdi]
2000	movdqu	xmm3,XMMWORD[16+rdi]
2001	movdqu	xmm4,XMMWORD[32+rdi]
2002	pxor	xmm2,xmm10
2003	movdqu	xmm5,XMMWORD[48+rdi]
2004	pxor	xmm3,xmm11
2005	movdqu	xmm6,XMMWORD[64+rdi]
2006	lea	rdi,[80+rdi]
2007	pxor	xmm4,xmm12
2008	pxor	xmm5,xmm13
2009	pxor	xmm6,xmm14
2010	pxor	xmm7,xmm7
2011
2012	call	_aesni_encrypt6
2013
2014	xorps	xmm2,xmm10
2015	movdqa	xmm10,xmm15
2016	xorps	xmm3,xmm11
2017	xorps	xmm4,xmm12
2018	movdqu	XMMWORD[rsi],xmm2
2019	xorps	xmm5,xmm13
2020	movdqu	XMMWORD[16+rsi],xmm3
2021	xorps	xmm6,xmm14
2022	movdqu	XMMWORD[32+rsi],xmm4
2023	movdqu	XMMWORD[48+rsi],xmm5
2024	movdqu	XMMWORD[64+rsi],xmm6
2025	lea	rsi,[80+rsi]
2026	jmp	NEAR $L$xts_enc_done
2027
2028ALIGN	16
2029$L$xts_enc_one:
2030	movups	xmm2,XMMWORD[rdi]
2031	lea	rdi,[16+rdi]
2032	xorps	xmm2,xmm10
2033	movups	xmm0,XMMWORD[rcx]
2034	movups	xmm1,XMMWORD[16+rcx]
2035	lea	rcx,[32+rcx]
2036	xorps	xmm2,xmm0
2037$L$oop_enc1_9:
2038DB	102,15,56,220,209
2039	dec	eax
2040	movups	xmm1,XMMWORD[rcx]
2041	lea	rcx,[16+rcx]
2042	jnz	NEAR $L$oop_enc1_9
2043DB	102,15,56,221,209
2044	xorps	xmm2,xmm10
2045	movdqa	xmm10,xmm11
2046	movups	XMMWORD[rsi],xmm2
2047	lea	rsi,[16+rsi]
2048	jmp	NEAR $L$xts_enc_done
2049
2050ALIGN	16
2051$L$xts_enc_two:
2052	movups	xmm2,XMMWORD[rdi]
2053	movups	xmm3,XMMWORD[16+rdi]
2054	lea	rdi,[32+rdi]
2055	xorps	xmm2,xmm10
2056	xorps	xmm3,xmm11
2057
2058	call	_aesni_encrypt2
2059
2060	xorps	xmm2,xmm10
2061	movdqa	xmm10,xmm12
2062	xorps	xmm3,xmm11
2063	movups	XMMWORD[rsi],xmm2
2064	movups	XMMWORD[16+rsi],xmm3
2065	lea	rsi,[32+rsi]
2066	jmp	NEAR $L$xts_enc_done
2067
2068ALIGN	16
2069$L$xts_enc_three:
2070	movups	xmm2,XMMWORD[rdi]
2071	movups	xmm3,XMMWORD[16+rdi]
2072	movups	xmm4,XMMWORD[32+rdi]
2073	lea	rdi,[48+rdi]
2074	xorps	xmm2,xmm10
2075	xorps	xmm3,xmm11
2076	xorps	xmm4,xmm12
2077
2078	call	_aesni_encrypt3
2079
2080	xorps	xmm2,xmm10
2081	movdqa	xmm10,xmm13
2082	xorps	xmm3,xmm11
2083	xorps	xmm4,xmm12
2084	movups	XMMWORD[rsi],xmm2
2085	movups	XMMWORD[16+rsi],xmm3
2086	movups	XMMWORD[32+rsi],xmm4
2087	lea	rsi,[48+rsi]
2088	jmp	NEAR $L$xts_enc_done
2089
2090ALIGN	16
2091$L$xts_enc_four:
2092	movups	xmm2,XMMWORD[rdi]
2093	movups	xmm3,XMMWORD[16+rdi]
2094	movups	xmm4,XMMWORD[32+rdi]
2095	xorps	xmm2,xmm10
2096	movups	xmm5,XMMWORD[48+rdi]
2097	lea	rdi,[64+rdi]
2098	xorps	xmm3,xmm11
2099	xorps	xmm4,xmm12
2100	xorps	xmm5,xmm13
2101
2102	call	_aesni_encrypt4
2103
2104	pxor	xmm2,xmm10
2105	movdqa	xmm10,xmm14
2106	pxor	xmm3,xmm11
2107	pxor	xmm4,xmm12
2108	movdqu	XMMWORD[rsi],xmm2
2109	pxor	xmm5,xmm13
2110	movdqu	XMMWORD[16+rsi],xmm3
2111	movdqu	XMMWORD[32+rsi],xmm4
2112	movdqu	XMMWORD[48+rsi],xmm5
2113	lea	rsi,[64+rsi]
2114	jmp	NEAR $L$xts_enc_done
2115
2116ALIGN	16
2117$L$xts_enc_done:
2118	and	r9,15
2119	jz	NEAR $L$xts_enc_ret
2120	mov	rdx,r9
2121
2122$L$xts_enc_steal:
2123	movzx	eax,BYTE[rdi]
2124	movzx	ecx,BYTE[((-16))+rsi]
2125	lea	rdi,[1+rdi]
2126	mov	BYTE[((-16))+rsi],al
2127	mov	BYTE[rsi],cl
2128	lea	rsi,[1+rsi]
2129	sub	rdx,1
2130	jnz	NEAR $L$xts_enc_steal
2131
2132	sub	rsi,r9
2133	mov	rcx,rbp
2134	mov	eax,r10d
2135
2136	movups	xmm2,XMMWORD[((-16))+rsi]
2137	xorps	xmm2,xmm10
2138	movups	xmm0,XMMWORD[rcx]
2139	movups	xmm1,XMMWORD[16+rcx]
2140	lea	rcx,[32+rcx]
2141	xorps	xmm2,xmm0
2142$L$oop_enc1_10:
2143DB	102,15,56,220,209
2144	dec	eax
2145	movups	xmm1,XMMWORD[rcx]
2146	lea	rcx,[16+rcx]
2147	jnz	NEAR $L$oop_enc1_10
2148DB	102,15,56,221,209
2149	xorps	xmm2,xmm10
2150	movups	XMMWORD[(-16)+rsi],xmm2
2151
2152$L$xts_enc_ret:
2153	xorps	xmm0,xmm0
2154	pxor	xmm1,xmm1
2155	pxor	xmm2,xmm2
2156	pxor	xmm3,xmm3
2157	pxor	xmm4,xmm4
2158	pxor	xmm5,xmm5
2159	movaps	xmm6,XMMWORD[((-168))+r11]
2160	movaps	XMMWORD[(-168)+r11],xmm0
2161	movaps	xmm7,XMMWORD[((-152))+r11]
2162	movaps	XMMWORD[(-152)+r11],xmm0
2163	movaps	xmm8,XMMWORD[((-136))+r11]
2164	movaps	XMMWORD[(-136)+r11],xmm0
2165	movaps	xmm9,XMMWORD[((-120))+r11]
2166	movaps	XMMWORD[(-120)+r11],xmm0
2167	movaps	xmm10,XMMWORD[((-104))+r11]
2168	movaps	XMMWORD[(-104)+r11],xmm0
2169	movaps	xmm11,XMMWORD[((-88))+r11]
2170	movaps	XMMWORD[(-88)+r11],xmm0
2171	movaps	xmm12,XMMWORD[((-72))+r11]
2172	movaps	XMMWORD[(-72)+r11],xmm0
2173	movaps	xmm13,XMMWORD[((-56))+r11]
2174	movaps	XMMWORD[(-56)+r11],xmm0
2175	movaps	xmm14,XMMWORD[((-40))+r11]
2176	movaps	XMMWORD[(-40)+r11],xmm0
2177	movaps	xmm15,XMMWORD[((-24))+r11]
2178	movaps	XMMWORD[(-24)+r11],xmm0
2179	movaps	XMMWORD[rsp],xmm0
2180	movaps	XMMWORD[16+rsp],xmm0
2181	movaps	XMMWORD[32+rsp],xmm0
2182	movaps	XMMWORD[48+rsp],xmm0
2183	movaps	XMMWORD[64+rsp],xmm0
2184	movaps	XMMWORD[80+rsp],xmm0
2185	movaps	XMMWORD[96+rsp],xmm0
2186	mov	rbp,QWORD[((-8))+r11]
2187	lea	rsp,[r11]
2188$L$xts_enc_epilogue:
2189	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
2190	mov	rsi,QWORD[16+rsp]
2191	DB	0F3h,0C3h		;repret
2192$L$SEH_end_aesni_xts_encrypt:
2193global	aesni_xts_decrypt
2194
2195ALIGN	16
2196aesni_xts_decrypt:
2197	mov	QWORD[8+rsp],rdi	;WIN64 prologue
2198	mov	QWORD[16+rsp],rsi
2199	mov	rax,rsp
2200$L$SEH_begin_aesni_xts_decrypt:
2201	mov	rdi,rcx
2202	mov	rsi,rdx
2203	mov	rdx,r8
2204	mov	rcx,r9
2205	mov	r8,QWORD[40+rsp]
2206	mov	r9,QWORD[48+rsp]
2207
2208
2209	lea	r11,[rsp]
2210	push	rbp
2211	sub	rsp,272
2212	and	rsp,-16
2213	movaps	XMMWORD[(-168)+r11],xmm6
2214	movaps	XMMWORD[(-152)+r11],xmm7
2215	movaps	XMMWORD[(-136)+r11],xmm8
2216	movaps	XMMWORD[(-120)+r11],xmm9
2217	movaps	XMMWORD[(-104)+r11],xmm10
2218	movaps	XMMWORD[(-88)+r11],xmm11
2219	movaps	XMMWORD[(-72)+r11],xmm12
2220	movaps	XMMWORD[(-56)+r11],xmm13
2221	movaps	XMMWORD[(-40)+r11],xmm14
2222	movaps	XMMWORD[(-24)+r11],xmm15
2223$L$xts_dec_body:
2224	movups	xmm2,XMMWORD[r9]
2225	mov	eax,DWORD[240+r8]
2226	mov	r10d,DWORD[240+rcx]
2227	movups	xmm0,XMMWORD[r8]
2228	movups	xmm1,XMMWORD[16+r8]
2229	lea	r8,[32+r8]
2230	xorps	xmm2,xmm0
2231$L$oop_enc1_11:
2232DB	102,15,56,220,209
2233	dec	eax
2234	movups	xmm1,XMMWORD[r8]
2235	lea	r8,[16+r8]
2236	jnz	NEAR $L$oop_enc1_11
2237DB	102,15,56,221,209
2238	xor	eax,eax
2239	test	rdx,15
2240	setnz	al
2241	shl	rax,4
2242	sub	rdx,rax
2243
2244	movups	xmm0,XMMWORD[rcx]
2245	mov	rbp,rcx
2246	mov	eax,r10d
2247	shl	r10d,4
2248	mov	r9,rdx
2249	and	rdx,-16
2250
2251	movups	xmm1,XMMWORD[16+r10*1+rcx]
2252
2253	movdqa	xmm8,XMMWORD[$L$xts_magic]
2254	movdqa	xmm15,xmm2
2255	pshufd	xmm9,xmm2,0x5f
2256	pxor	xmm1,xmm0
2257	movdqa	xmm14,xmm9
2258	paddd	xmm9,xmm9
2259	movdqa	xmm10,xmm15
2260	psrad	xmm14,31
2261	paddq	xmm15,xmm15
2262	pand	xmm14,xmm8
2263	pxor	xmm10,xmm0
2264	pxor	xmm15,xmm14
2265	movdqa	xmm14,xmm9
2266	paddd	xmm9,xmm9
2267	movdqa	xmm11,xmm15
2268	psrad	xmm14,31
2269	paddq	xmm15,xmm15
2270	pand	xmm14,xmm8
2271	pxor	xmm11,xmm0
2272	pxor	xmm15,xmm14
2273	movdqa	xmm14,xmm9
2274	paddd	xmm9,xmm9
2275	movdqa	xmm12,xmm15
2276	psrad	xmm14,31
2277	paddq	xmm15,xmm15
2278	pand	xmm14,xmm8
2279	pxor	xmm12,xmm0
2280	pxor	xmm15,xmm14
2281	movdqa	xmm14,xmm9
2282	paddd	xmm9,xmm9
2283	movdqa	xmm13,xmm15
2284	psrad	xmm14,31
2285	paddq	xmm15,xmm15
2286	pand	xmm14,xmm8
2287	pxor	xmm13,xmm0
2288	pxor	xmm15,xmm14
2289	movdqa	xmm14,xmm15
2290	psrad	xmm9,31
2291	paddq	xmm15,xmm15
2292	pand	xmm9,xmm8
2293	pxor	xmm14,xmm0
2294	pxor	xmm15,xmm9
2295	movaps	XMMWORD[96+rsp],xmm1
2296
2297	sub	rdx,16*6
2298	jc	NEAR $L$xts_dec_short
2299
2300	mov	eax,16+96
2301	lea	rcx,[32+r10*1+rbp]
2302	sub	rax,r10
2303	movups	xmm1,XMMWORD[16+rbp]
2304	mov	r10,rax
2305	lea	r8,[$L$xts_magic]
2306	jmp	NEAR $L$xts_dec_grandloop
2307
2308ALIGN	32
2309$L$xts_dec_grandloop:
2310	movdqu	xmm2,XMMWORD[rdi]
2311	movdqa	xmm8,xmm0
2312	movdqu	xmm3,XMMWORD[16+rdi]
2313	pxor	xmm2,xmm10
2314	movdqu	xmm4,XMMWORD[32+rdi]
2315	pxor	xmm3,xmm11
2316DB	102,15,56,222,209
2317	movdqu	xmm5,XMMWORD[48+rdi]
2318	pxor	xmm4,xmm12
2319DB	102,15,56,222,217
2320	movdqu	xmm6,XMMWORD[64+rdi]
2321	pxor	xmm5,xmm13
2322DB	102,15,56,222,225
2323	movdqu	xmm7,XMMWORD[80+rdi]
2324	pxor	xmm8,xmm15
2325	movdqa	xmm9,XMMWORD[96+rsp]
2326	pxor	xmm6,xmm14
2327DB	102,15,56,222,233
2328	movups	xmm0,XMMWORD[32+rbp]
2329	lea	rdi,[96+rdi]
2330	pxor	xmm7,xmm8
2331
2332	pxor	xmm10,xmm9
2333DB	102,15,56,222,241
2334	pxor	xmm11,xmm9
2335	movdqa	XMMWORD[rsp],xmm10
2336DB	102,15,56,222,249
2337	movups	xmm1,XMMWORD[48+rbp]
2338	pxor	xmm12,xmm9
2339
2340DB	102,15,56,222,208
2341	pxor	xmm13,xmm9
2342	movdqa	XMMWORD[16+rsp],xmm11
2343DB	102,15,56,222,216
2344	pxor	xmm14,xmm9
2345	movdqa	XMMWORD[32+rsp],xmm12
2346DB	102,15,56,222,224
2347DB	102,15,56,222,232
2348	pxor	xmm8,xmm9
2349	movdqa	XMMWORD[64+rsp],xmm14
2350DB	102,15,56,222,240
2351DB	102,15,56,222,248
2352	movups	xmm0,XMMWORD[64+rbp]
2353	movdqa	XMMWORD[80+rsp],xmm8
2354	pshufd	xmm9,xmm15,0x5f
2355	jmp	NEAR $L$xts_dec_loop6
2356ALIGN	32
2357$L$xts_dec_loop6:
2358DB	102,15,56,222,209
2359DB	102,15,56,222,217
2360DB	102,15,56,222,225
2361DB	102,15,56,222,233
2362DB	102,15,56,222,241
2363DB	102,15,56,222,249
2364	movups	xmm1,XMMWORD[((-64))+rax*1+rcx]
2365	add	rax,32
2366
2367DB	102,15,56,222,208
2368DB	102,15,56,222,216
2369DB	102,15,56,222,224
2370DB	102,15,56,222,232
2371DB	102,15,56,222,240
2372DB	102,15,56,222,248
2373	movups	xmm0,XMMWORD[((-80))+rax*1+rcx]
2374	jnz	NEAR $L$xts_dec_loop6
2375
2376	movdqa	xmm8,XMMWORD[r8]
2377	movdqa	xmm14,xmm9
2378	paddd	xmm9,xmm9
2379DB	102,15,56,222,209
2380	paddq	xmm15,xmm15
2381	psrad	xmm14,31
2382DB	102,15,56,222,217
2383	pand	xmm14,xmm8
2384	movups	xmm10,XMMWORD[rbp]
2385DB	102,15,56,222,225
2386DB	102,15,56,222,233
2387DB	102,15,56,222,241
2388	pxor	xmm15,xmm14
2389	movaps	xmm11,xmm10
2390DB	102,15,56,222,249
2391	movups	xmm1,XMMWORD[((-64))+rcx]
2392
2393	movdqa	xmm14,xmm9
2394DB	102,15,56,222,208
2395	paddd	xmm9,xmm9
2396	pxor	xmm10,xmm15
2397DB	102,15,56,222,216
2398	psrad	xmm14,31
2399	paddq	xmm15,xmm15
2400DB	102,15,56,222,224
2401DB	102,15,56,222,232
2402	pand	xmm14,xmm8
2403	movaps	xmm12,xmm11
2404DB	102,15,56,222,240
2405	pxor	xmm15,xmm14
2406	movdqa	xmm14,xmm9
2407DB	102,15,56,222,248
2408	movups	xmm0,XMMWORD[((-48))+rcx]
2409
2410	paddd	xmm9,xmm9
2411DB	102,15,56,222,209
2412	pxor	xmm11,xmm15
2413	psrad	xmm14,31
2414DB	102,15,56,222,217
2415	paddq	xmm15,xmm15
2416	pand	xmm14,xmm8
2417DB	102,15,56,222,225
2418DB	102,15,56,222,233
2419	movdqa	XMMWORD[48+rsp],xmm13
2420	pxor	xmm15,xmm14
2421DB	102,15,56,222,241
2422	movaps	xmm13,xmm12
2423	movdqa	xmm14,xmm9
2424DB	102,15,56,222,249
2425	movups	xmm1,XMMWORD[((-32))+rcx]
2426
2427	paddd	xmm9,xmm9
2428DB	102,15,56,222,208
2429	pxor	xmm12,xmm15
2430	psrad	xmm14,31
2431DB	102,15,56,222,216
2432	paddq	xmm15,xmm15
2433	pand	xmm14,xmm8
2434DB	102,15,56,222,224
2435DB	102,15,56,222,232
2436DB	102,15,56,222,240
2437	pxor	xmm15,xmm14
2438	movaps	xmm14,xmm13
2439DB	102,15,56,222,248
2440
2441	movdqa	xmm0,xmm9
2442	paddd	xmm9,xmm9
2443DB	102,15,56,222,209
2444	pxor	xmm13,xmm15
2445	psrad	xmm0,31
2446DB	102,15,56,222,217
2447	paddq	xmm15,xmm15
2448	pand	xmm0,xmm8
2449DB	102,15,56,222,225
2450DB	102,15,56,222,233
2451	pxor	xmm15,xmm0
2452	movups	xmm0,XMMWORD[rbp]
2453DB	102,15,56,222,241
2454DB	102,15,56,222,249
2455	movups	xmm1,XMMWORD[16+rbp]
2456
2457	pxor	xmm14,xmm15
2458DB	102,15,56,223,84,36,0
2459	psrad	xmm9,31
2460	paddq	xmm15,xmm15
2461DB	102,15,56,223,92,36,16
2462DB	102,15,56,223,100,36,32
2463	pand	xmm9,xmm8
2464	mov	rax,r10
2465DB	102,15,56,223,108,36,48
2466DB	102,15,56,223,116,36,64
2467DB	102,15,56,223,124,36,80
2468	pxor	xmm15,xmm9
2469
2470	lea	rsi,[96+rsi]
2471	movups	XMMWORD[(-96)+rsi],xmm2
2472	movups	XMMWORD[(-80)+rsi],xmm3
2473	movups	XMMWORD[(-64)+rsi],xmm4
2474	movups	XMMWORD[(-48)+rsi],xmm5
2475	movups	XMMWORD[(-32)+rsi],xmm6
2476	movups	XMMWORD[(-16)+rsi],xmm7
2477	sub	rdx,16*6
2478	jnc	NEAR $L$xts_dec_grandloop
2479
2480	mov	eax,16+96
2481	sub	eax,r10d
2482	mov	rcx,rbp
2483	shr	eax,4
2484
2485$L$xts_dec_short:
2486
2487	mov	r10d,eax
2488	pxor	xmm10,xmm0
2489	pxor	xmm11,xmm0
2490	add	rdx,16*6
2491	jz	NEAR $L$xts_dec_done
2492
2493	pxor	xmm12,xmm0
2494	cmp	rdx,0x20
2495	jb	NEAR $L$xts_dec_one
2496	pxor	xmm13,xmm0
2497	je	NEAR $L$xts_dec_two
2498
2499	pxor	xmm14,xmm0
2500	cmp	rdx,0x40
2501	jb	NEAR $L$xts_dec_three
2502	je	NEAR $L$xts_dec_four
2503
2504	movdqu	xmm2,XMMWORD[rdi]
2505	movdqu	xmm3,XMMWORD[16+rdi]
2506	movdqu	xmm4,XMMWORD[32+rdi]
2507	pxor	xmm2,xmm10
2508	movdqu	xmm5,XMMWORD[48+rdi]
2509	pxor	xmm3,xmm11
2510	movdqu	xmm6,XMMWORD[64+rdi]
2511	lea	rdi,[80+rdi]
2512	pxor	xmm4,xmm12
2513	pxor	xmm5,xmm13
2514	pxor	xmm6,xmm14
2515
2516	call	_aesni_decrypt6
2517
2518	xorps	xmm2,xmm10
2519	xorps	xmm3,xmm11
2520	xorps	xmm4,xmm12
2521	movdqu	XMMWORD[rsi],xmm2
2522	xorps	xmm5,xmm13
2523	movdqu	XMMWORD[16+rsi],xmm3
2524	xorps	xmm6,xmm14
2525	movdqu	XMMWORD[32+rsi],xmm4
2526	pxor	xmm14,xmm14
2527	movdqu	XMMWORD[48+rsi],xmm5
2528	pcmpgtd	xmm14,xmm15
2529	movdqu	XMMWORD[64+rsi],xmm6
2530	lea	rsi,[80+rsi]
2531	pshufd	xmm11,xmm14,0x13
2532	and	r9,15
2533	jz	NEAR $L$xts_dec_ret
2534
2535	movdqa	xmm10,xmm15
2536	paddq	xmm15,xmm15
2537	pand	xmm11,xmm8
2538	pxor	xmm11,xmm15
2539	jmp	NEAR $L$xts_dec_done2
2540
2541ALIGN	16
2542$L$xts_dec_one:
2543	movups	xmm2,XMMWORD[rdi]
2544	lea	rdi,[16+rdi]
2545	xorps	xmm2,xmm10
2546	movups	xmm0,XMMWORD[rcx]
2547	movups	xmm1,XMMWORD[16+rcx]
2548	lea	rcx,[32+rcx]
2549	xorps	xmm2,xmm0
2550$L$oop_dec1_12:
2551DB	102,15,56,222,209
2552	dec	eax
2553	movups	xmm1,XMMWORD[rcx]
2554	lea	rcx,[16+rcx]
2555	jnz	NEAR $L$oop_dec1_12
2556DB	102,15,56,223,209
2557	xorps	xmm2,xmm10
2558	movdqa	xmm10,xmm11
2559	movups	XMMWORD[rsi],xmm2
2560	movdqa	xmm11,xmm12
2561	lea	rsi,[16+rsi]
2562	jmp	NEAR $L$xts_dec_done
2563
2564ALIGN	16
2565$L$xts_dec_two:
2566	movups	xmm2,XMMWORD[rdi]
2567	movups	xmm3,XMMWORD[16+rdi]
2568	lea	rdi,[32+rdi]
2569	xorps	xmm2,xmm10
2570	xorps	xmm3,xmm11
2571
2572	call	_aesni_decrypt2
2573
2574	xorps	xmm2,xmm10
2575	movdqa	xmm10,xmm12
2576	xorps	xmm3,xmm11
2577	movdqa	xmm11,xmm13
2578	movups	XMMWORD[rsi],xmm2
2579	movups	XMMWORD[16+rsi],xmm3
2580	lea	rsi,[32+rsi]
2581	jmp	NEAR $L$xts_dec_done
2582
2583ALIGN	16
2584$L$xts_dec_three:
2585	movups	xmm2,XMMWORD[rdi]
2586	movups	xmm3,XMMWORD[16+rdi]
2587	movups	xmm4,XMMWORD[32+rdi]
2588	lea	rdi,[48+rdi]
2589	xorps	xmm2,xmm10
2590	xorps	xmm3,xmm11
2591	xorps	xmm4,xmm12
2592
2593	call	_aesni_decrypt3
2594
2595	xorps	xmm2,xmm10
2596	movdqa	xmm10,xmm13
2597	xorps	xmm3,xmm11
2598	movdqa	xmm11,xmm14
2599	xorps	xmm4,xmm12
2600	movups	XMMWORD[rsi],xmm2
2601	movups	XMMWORD[16+rsi],xmm3
2602	movups	XMMWORD[32+rsi],xmm4
2603	lea	rsi,[48+rsi]
2604	jmp	NEAR $L$xts_dec_done
2605
2606ALIGN	16
2607$L$xts_dec_four:
2608	movups	xmm2,XMMWORD[rdi]
2609	movups	xmm3,XMMWORD[16+rdi]
2610	movups	xmm4,XMMWORD[32+rdi]
2611	xorps	xmm2,xmm10
2612	movups	xmm5,XMMWORD[48+rdi]
2613	lea	rdi,[64+rdi]
2614	xorps	xmm3,xmm11
2615	xorps	xmm4,xmm12
2616	xorps	xmm5,xmm13
2617
2618	call	_aesni_decrypt4
2619
2620	pxor	xmm2,xmm10
2621	movdqa	xmm10,xmm14
2622	pxor	xmm3,xmm11
2623	movdqa	xmm11,xmm15
2624	pxor	xmm4,xmm12
2625	movdqu	XMMWORD[rsi],xmm2
2626	pxor	xmm5,xmm13
2627	movdqu	XMMWORD[16+rsi],xmm3
2628	movdqu	XMMWORD[32+rsi],xmm4
2629	movdqu	XMMWORD[48+rsi],xmm5
2630	lea	rsi,[64+rsi]
2631	jmp	NEAR $L$xts_dec_done
2632
2633ALIGN	16
2634$L$xts_dec_done:
2635	and	r9,15
2636	jz	NEAR $L$xts_dec_ret
2637$L$xts_dec_done2:
2638	mov	rdx,r9
2639	mov	rcx,rbp
2640	mov	eax,r10d
2641
2642	movups	xmm2,XMMWORD[rdi]
2643	xorps	xmm2,xmm11
2644	movups	xmm0,XMMWORD[rcx]
2645	movups	xmm1,XMMWORD[16+rcx]
2646	lea	rcx,[32+rcx]
2647	xorps	xmm2,xmm0
2648$L$oop_dec1_13:
2649DB	102,15,56,222,209
2650	dec	eax
2651	movups	xmm1,XMMWORD[rcx]
2652	lea	rcx,[16+rcx]
2653	jnz	NEAR $L$oop_dec1_13
2654DB	102,15,56,223,209
2655	xorps	xmm2,xmm11
2656	movups	XMMWORD[rsi],xmm2
2657
2658$L$xts_dec_steal:
2659	movzx	eax,BYTE[16+rdi]
2660	movzx	ecx,BYTE[rsi]
2661	lea	rdi,[1+rdi]
2662	mov	BYTE[rsi],al
2663	mov	BYTE[16+rsi],cl
2664	lea	rsi,[1+rsi]
2665	sub	rdx,1
2666	jnz	NEAR $L$xts_dec_steal
2667
2668	sub	rsi,r9
2669	mov	rcx,rbp
2670	mov	eax,r10d
2671
2672	movups	xmm2,XMMWORD[rsi]
2673	xorps	xmm2,xmm10
2674	movups	xmm0,XMMWORD[rcx]
2675	movups	xmm1,XMMWORD[16+rcx]
2676	lea	rcx,[32+rcx]
2677	xorps	xmm2,xmm0
2678$L$oop_dec1_14:
2679DB	102,15,56,222,209
2680	dec	eax
2681	movups	xmm1,XMMWORD[rcx]
2682	lea	rcx,[16+rcx]
2683	jnz	NEAR $L$oop_dec1_14
2684DB	102,15,56,223,209
2685	xorps	xmm2,xmm10
2686	movups	XMMWORD[rsi],xmm2
2687
2688$L$xts_dec_ret:
2689	xorps	xmm0,xmm0
2690	pxor	xmm1,xmm1
2691	pxor	xmm2,xmm2
2692	pxor	xmm3,xmm3
2693	pxor	xmm4,xmm4
2694	pxor	xmm5,xmm5
2695	movaps	xmm6,XMMWORD[((-168))+r11]
2696	movaps	XMMWORD[(-168)+r11],xmm0
2697	movaps	xmm7,XMMWORD[((-152))+r11]
2698	movaps	XMMWORD[(-152)+r11],xmm0
2699	movaps	xmm8,XMMWORD[((-136))+r11]
2700	movaps	XMMWORD[(-136)+r11],xmm0
2701	movaps	xmm9,XMMWORD[((-120))+r11]
2702	movaps	XMMWORD[(-120)+r11],xmm0
2703	movaps	xmm10,XMMWORD[((-104))+r11]
2704	movaps	XMMWORD[(-104)+r11],xmm0
2705	movaps	xmm11,XMMWORD[((-88))+r11]
2706	movaps	XMMWORD[(-88)+r11],xmm0
2707	movaps	xmm12,XMMWORD[((-72))+r11]
2708	movaps	XMMWORD[(-72)+r11],xmm0
2709	movaps	xmm13,XMMWORD[((-56))+r11]
2710	movaps	XMMWORD[(-56)+r11],xmm0
2711	movaps	xmm14,XMMWORD[((-40))+r11]
2712	movaps	XMMWORD[(-40)+r11],xmm0
2713	movaps	xmm15,XMMWORD[((-24))+r11]
2714	movaps	XMMWORD[(-24)+r11],xmm0
2715	movaps	XMMWORD[rsp],xmm0
2716	movaps	XMMWORD[16+rsp],xmm0
2717	movaps	XMMWORD[32+rsp],xmm0
2718	movaps	XMMWORD[48+rsp],xmm0
2719	movaps	XMMWORD[64+rsp],xmm0
2720	movaps	XMMWORD[80+rsp],xmm0
2721	movaps	XMMWORD[96+rsp],xmm0
2722	mov	rbp,QWORD[((-8))+r11]
2723	lea	rsp,[r11]
2724$L$xts_dec_epilogue:
2725	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
2726	mov	rsi,QWORD[16+rsp]
2727	DB	0F3h,0C3h		;repret
2728$L$SEH_end_aesni_xts_decrypt:
2729global	aesni_ocb_encrypt
2730
2731ALIGN	32
2732aesni_ocb_encrypt:
2733	mov	QWORD[8+rsp],rdi	;WIN64 prologue
2734	mov	QWORD[16+rsp],rsi
2735	mov	rax,rsp
2736$L$SEH_begin_aesni_ocb_encrypt:
2737	mov	rdi,rcx
2738	mov	rsi,rdx
2739	mov	rdx,r8
2740	mov	rcx,r9
2741	mov	r8,QWORD[40+rsp]
2742	mov	r9,QWORD[48+rsp]
2743
2744
2745	lea	rax,[rsp]
2746	push	rbx
2747	push	rbp
2748	push	r12
2749	push	r13
2750	push	r14
2751	lea	rsp,[((-160))+rsp]
2752	movaps	XMMWORD[rsp],xmm6
2753	movaps	XMMWORD[16+rsp],xmm7
2754	movaps	XMMWORD[32+rsp],xmm8
2755	movaps	XMMWORD[48+rsp],xmm9
2756	movaps	XMMWORD[64+rsp],xmm10
2757	movaps	XMMWORD[80+rsp],xmm11
2758	movaps	XMMWORD[96+rsp],xmm12
2759	movaps	XMMWORD[112+rsp],xmm13
2760	movaps	XMMWORD[128+rsp],xmm14
2761	movaps	XMMWORD[144+rsp],xmm15
2762$L$ocb_enc_body:
2763	mov	rbx,QWORD[56+rax]
2764	mov	rbp,QWORD[((56+8))+rax]
2765
2766	mov	r10d,DWORD[240+rcx]
2767	mov	r11,rcx
2768	shl	r10d,4
2769	movups	xmm9,XMMWORD[rcx]
2770	movups	xmm1,XMMWORD[16+r10*1+rcx]
2771
2772	movdqu	xmm15,XMMWORD[r9]
2773	pxor	xmm9,xmm1
2774	pxor	xmm15,xmm1
2775
2776	mov	eax,16+32
2777	lea	rcx,[32+r10*1+r11]
2778	movups	xmm1,XMMWORD[16+r11]
2779	sub	rax,r10
2780	mov	r10,rax
2781
2782	movdqu	xmm10,XMMWORD[rbx]
2783	movdqu	xmm8,XMMWORD[rbp]
2784
2785	test	r8,1
2786	jnz	NEAR $L$ocb_enc_odd
2787
2788	bsf	r12,r8
2789	add	r8,1
2790	shl	r12,4
2791	movdqu	xmm7,XMMWORD[r12*1+rbx]
2792	movdqu	xmm2,XMMWORD[rdi]
2793	lea	rdi,[16+rdi]
2794
2795	call	__ocb_encrypt1
2796
2797	movdqa	xmm15,xmm7
2798	movups	XMMWORD[rsi],xmm2
2799	lea	rsi,[16+rsi]
2800	sub	rdx,1
2801	jz	NEAR $L$ocb_enc_done
2802
2803$L$ocb_enc_odd:
2804	lea	r12,[1+r8]
2805	lea	r13,[3+r8]
2806	lea	r14,[5+r8]
2807	lea	r8,[6+r8]
2808	bsf	r12,r12
2809	bsf	r13,r13
2810	bsf	r14,r14
2811	shl	r12,4
2812	shl	r13,4
2813	shl	r14,4
2814
2815	sub	rdx,6
2816	jc	NEAR $L$ocb_enc_short
2817	jmp	NEAR $L$ocb_enc_grandloop
2818
2819ALIGN	32
2820$L$ocb_enc_grandloop:
2821	movdqu	xmm2,XMMWORD[rdi]
2822	movdqu	xmm3,XMMWORD[16+rdi]
2823	movdqu	xmm4,XMMWORD[32+rdi]
2824	movdqu	xmm5,XMMWORD[48+rdi]
2825	movdqu	xmm6,XMMWORD[64+rdi]
2826	movdqu	xmm7,XMMWORD[80+rdi]
2827	lea	rdi,[96+rdi]
2828
2829	call	__ocb_encrypt6
2830
2831	movups	XMMWORD[rsi],xmm2
2832	movups	XMMWORD[16+rsi],xmm3
2833	movups	XMMWORD[32+rsi],xmm4
2834	movups	XMMWORD[48+rsi],xmm5
2835	movups	XMMWORD[64+rsi],xmm6
2836	movups	XMMWORD[80+rsi],xmm7
2837	lea	rsi,[96+rsi]
2838	sub	rdx,6
2839	jnc	NEAR $L$ocb_enc_grandloop
2840
2841$L$ocb_enc_short:
2842	add	rdx,6
2843	jz	NEAR $L$ocb_enc_done
2844
2845	movdqu	xmm2,XMMWORD[rdi]
2846	cmp	rdx,2
2847	jb	NEAR $L$ocb_enc_one
2848	movdqu	xmm3,XMMWORD[16+rdi]
2849	je	NEAR $L$ocb_enc_two
2850
2851	movdqu	xmm4,XMMWORD[32+rdi]
2852	cmp	rdx,4
2853	jb	NEAR $L$ocb_enc_three
2854	movdqu	xmm5,XMMWORD[48+rdi]
2855	je	NEAR $L$ocb_enc_four
2856
2857	movdqu	xmm6,XMMWORD[64+rdi]
2858	pxor	xmm7,xmm7
2859
2860	call	__ocb_encrypt6
2861
2862	movdqa	xmm15,xmm14
2863	movups	XMMWORD[rsi],xmm2
2864	movups	XMMWORD[16+rsi],xmm3
2865	movups	XMMWORD[32+rsi],xmm4
2866	movups	XMMWORD[48+rsi],xmm5
2867	movups	XMMWORD[64+rsi],xmm6
2868
2869	jmp	NEAR $L$ocb_enc_done
2870
2871ALIGN	16
2872$L$ocb_enc_one:
2873	movdqa	xmm7,xmm10
2874
2875	call	__ocb_encrypt1
2876
2877	movdqa	xmm15,xmm7
2878	movups	XMMWORD[rsi],xmm2
2879	jmp	NEAR $L$ocb_enc_done
2880
2881ALIGN	16
2882$L$ocb_enc_two:
2883	pxor	xmm4,xmm4
2884	pxor	xmm5,xmm5
2885
2886	call	__ocb_encrypt4
2887
2888	movdqa	xmm15,xmm11
2889	movups	XMMWORD[rsi],xmm2
2890	movups	XMMWORD[16+rsi],xmm3
2891
2892	jmp	NEAR $L$ocb_enc_done
2893
2894ALIGN	16
2895$L$ocb_enc_three:
2896	pxor	xmm5,xmm5
2897
2898	call	__ocb_encrypt4
2899
2900	movdqa	xmm15,xmm12
2901	movups	XMMWORD[rsi],xmm2
2902	movups	XMMWORD[16+rsi],xmm3
2903	movups	XMMWORD[32+rsi],xmm4
2904
2905	jmp	NEAR $L$ocb_enc_done
2906
2907ALIGN	16
2908$L$ocb_enc_four:
2909	call	__ocb_encrypt4
2910
2911	movdqa	xmm15,xmm13
2912	movups	XMMWORD[rsi],xmm2
2913	movups	XMMWORD[16+rsi],xmm3
2914	movups	XMMWORD[32+rsi],xmm4
2915	movups	XMMWORD[48+rsi],xmm5
2916
2917$L$ocb_enc_done:
2918	pxor	xmm15,xmm0
2919	movdqu	XMMWORD[rbp],xmm8
2920	movdqu	XMMWORD[r9],xmm15
2921
2922	xorps	xmm0,xmm0
2923	pxor	xmm1,xmm1
2924	pxor	xmm2,xmm2
2925	pxor	xmm3,xmm3
2926	pxor	xmm4,xmm4
2927	pxor	xmm5,xmm5
2928	movaps	xmm6,XMMWORD[rsp]
2929	movaps	XMMWORD[rsp],xmm0
2930	movaps	xmm7,XMMWORD[16+rsp]
2931	movaps	XMMWORD[16+rsp],xmm0
2932	movaps	xmm8,XMMWORD[32+rsp]
2933	movaps	XMMWORD[32+rsp],xmm0
2934	movaps	xmm9,XMMWORD[48+rsp]
2935	movaps	XMMWORD[48+rsp],xmm0
2936	movaps	xmm10,XMMWORD[64+rsp]
2937	movaps	XMMWORD[64+rsp],xmm0
2938	movaps	xmm11,XMMWORD[80+rsp]
2939	movaps	XMMWORD[80+rsp],xmm0
2940	movaps	xmm12,XMMWORD[96+rsp]
2941	movaps	XMMWORD[96+rsp],xmm0
2942	movaps	xmm13,XMMWORD[112+rsp]
2943	movaps	XMMWORD[112+rsp],xmm0
2944	movaps	xmm14,XMMWORD[128+rsp]
2945	movaps	XMMWORD[128+rsp],xmm0
2946	movaps	xmm15,XMMWORD[144+rsp]
2947	movaps	XMMWORD[144+rsp],xmm0
2948	lea	rax,[((160+40))+rsp]
2949$L$ocb_enc_pop:
2950	mov	r14,QWORD[((-40))+rax]
2951	mov	r13,QWORD[((-32))+rax]
2952	mov	r12,QWORD[((-24))+rax]
2953	mov	rbp,QWORD[((-16))+rax]
2954	mov	rbx,QWORD[((-8))+rax]
2955	lea	rsp,[rax]
2956$L$ocb_enc_epilogue:
2957	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
2958	mov	rsi,QWORD[16+rsp]
2959	DB	0F3h,0C3h		;repret
2960$L$SEH_end_aesni_ocb_encrypt:
2961
2962
2963ALIGN	32
2964__ocb_encrypt6:
2965	pxor	xmm15,xmm9
2966	movdqu	xmm11,XMMWORD[r12*1+rbx]
2967	movdqa	xmm12,xmm10
2968	movdqu	xmm13,XMMWORD[r13*1+rbx]
2969	movdqa	xmm14,xmm10
2970	pxor	xmm10,xmm15
2971	movdqu	xmm15,XMMWORD[r14*1+rbx]
2972	pxor	xmm11,xmm10
2973	pxor	xmm8,xmm2
2974	pxor	xmm2,xmm10
2975	pxor	xmm12,xmm11
2976	pxor	xmm8,xmm3
2977	pxor	xmm3,xmm11
2978	pxor	xmm13,xmm12
2979	pxor	xmm8,xmm4
2980	pxor	xmm4,xmm12
2981	pxor	xmm14,xmm13
2982	pxor	xmm8,xmm5
2983	pxor	xmm5,xmm13
2984	pxor	xmm15,xmm14
2985	pxor	xmm8,xmm6
2986	pxor	xmm6,xmm14
2987	pxor	xmm8,xmm7
2988	pxor	xmm7,xmm15
2989	movups	xmm0,XMMWORD[32+r11]
2990
2991	lea	r12,[1+r8]
2992	lea	r13,[3+r8]
2993	lea	r14,[5+r8]
2994	add	r8,6
2995	pxor	xmm10,xmm9
2996	bsf	r12,r12
2997	bsf	r13,r13
2998	bsf	r14,r14
2999
3000DB	102,15,56,220,209
3001DB	102,15,56,220,217
3002DB	102,15,56,220,225
3003DB	102,15,56,220,233
3004	pxor	xmm11,xmm9
3005	pxor	xmm12,xmm9
3006DB	102,15,56,220,241
3007	pxor	xmm13,xmm9
3008	pxor	xmm14,xmm9
3009DB	102,15,56,220,249
3010	movups	xmm1,XMMWORD[48+r11]
3011	pxor	xmm15,xmm9
3012
3013DB	102,15,56,220,208
3014DB	102,15,56,220,216
3015DB	102,15,56,220,224
3016DB	102,15,56,220,232
3017DB	102,15,56,220,240
3018DB	102,15,56,220,248
3019	movups	xmm0,XMMWORD[64+r11]
3020	shl	r12,4
3021	shl	r13,4
3022	jmp	NEAR $L$ocb_enc_loop6
3023
3024ALIGN	32
3025$L$ocb_enc_loop6:
3026DB	102,15,56,220,209
3027DB	102,15,56,220,217
3028DB	102,15,56,220,225
3029DB	102,15,56,220,233
3030DB	102,15,56,220,241
3031DB	102,15,56,220,249
3032	movups	xmm1,XMMWORD[rax*1+rcx]
3033	add	rax,32
3034
3035DB	102,15,56,220,208
3036DB	102,15,56,220,216
3037DB	102,15,56,220,224
3038DB	102,15,56,220,232
3039DB	102,15,56,220,240
3040DB	102,15,56,220,248
3041	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
3042	jnz	NEAR $L$ocb_enc_loop6
3043
3044DB	102,15,56,220,209
3045DB	102,15,56,220,217
3046DB	102,15,56,220,225
3047DB	102,15,56,220,233
3048DB	102,15,56,220,241
3049DB	102,15,56,220,249
3050	movups	xmm1,XMMWORD[16+r11]
3051	shl	r14,4
3052
3053DB	102,65,15,56,221,210
3054	movdqu	xmm10,XMMWORD[rbx]
3055	mov	rax,r10
3056DB	102,65,15,56,221,219
3057DB	102,65,15,56,221,228
3058DB	102,65,15,56,221,237
3059DB	102,65,15,56,221,246
3060DB	102,65,15,56,221,255
3061	DB	0F3h,0C3h		;repret
3062
3063
3064
3065ALIGN	32
3066__ocb_encrypt4:
3067	pxor	xmm15,xmm9
3068	movdqu	xmm11,XMMWORD[r12*1+rbx]
3069	movdqa	xmm12,xmm10
3070	movdqu	xmm13,XMMWORD[r13*1+rbx]
3071	pxor	xmm10,xmm15
3072	pxor	xmm11,xmm10
3073	pxor	xmm8,xmm2
3074	pxor	xmm2,xmm10
3075	pxor	xmm12,xmm11
3076	pxor	xmm8,xmm3
3077	pxor	xmm3,xmm11
3078	pxor	xmm13,xmm12
3079	pxor	xmm8,xmm4
3080	pxor	xmm4,xmm12
3081	pxor	xmm8,xmm5
3082	pxor	xmm5,xmm13
3083	movups	xmm0,XMMWORD[32+r11]
3084
3085	pxor	xmm10,xmm9
3086	pxor	xmm11,xmm9
3087	pxor	xmm12,xmm9
3088	pxor	xmm13,xmm9
3089
3090DB	102,15,56,220,209
3091DB	102,15,56,220,217
3092DB	102,15,56,220,225
3093DB	102,15,56,220,233
3094	movups	xmm1,XMMWORD[48+r11]
3095
3096DB	102,15,56,220,208
3097DB	102,15,56,220,216
3098DB	102,15,56,220,224
3099DB	102,15,56,220,232
3100	movups	xmm0,XMMWORD[64+r11]
3101	jmp	NEAR $L$ocb_enc_loop4
3102
3103ALIGN	32
3104$L$ocb_enc_loop4:
3105DB	102,15,56,220,209
3106DB	102,15,56,220,217
3107DB	102,15,56,220,225
3108DB	102,15,56,220,233
3109	movups	xmm1,XMMWORD[rax*1+rcx]
3110	add	rax,32
3111
3112DB	102,15,56,220,208
3113DB	102,15,56,220,216
3114DB	102,15,56,220,224
3115DB	102,15,56,220,232
3116	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
3117	jnz	NEAR $L$ocb_enc_loop4
3118
3119DB	102,15,56,220,209
3120DB	102,15,56,220,217
3121DB	102,15,56,220,225
3122DB	102,15,56,220,233
3123	movups	xmm1,XMMWORD[16+r11]
3124	mov	rax,r10
3125
3126DB	102,65,15,56,221,210
3127DB	102,65,15,56,221,219
3128DB	102,65,15,56,221,228
3129DB	102,65,15,56,221,237
3130	DB	0F3h,0C3h		;repret
3131
3132
3133
3134ALIGN	32
3135__ocb_encrypt1:
3136	pxor	xmm7,xmm15
3137	pxor	xmm7,xmm9
3138	pxor	xmm8,xmm2
3139	pxor	xmm2,xmm7
3140	movups	xmm0,XMMWORD[32+r11]
3141
3142DB	102,15,56,220,209
3143	movups	xmm1,XMMWORD[48+r11]
3144	pxor	xmm7,xmm9
3145
3146DB	102,15,56,220,208
3147	movups	xmm0,XMMWORD[64+r11]
3148	jmp	NEAR $L$ocb_enc_loop1
3149
3150ALIGN	32
3151$L$ocb_enc_loop1:
3152DB	102,15,56,220,209
3153	movups	xmm1,XMMWORD[rax*1+rcx]
3154	add	rax,32
3155
3156DB	102,15,56,220,208
3157	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
3158	jnz	NEAR $L$ocb_enc_loop1
3159
3160DB	102,15,56,220,209
3161	movups	xmm1,XMMWORD[16+r11]
3162	mov	rax,r10
3163
3164DB	102,15,56,221,215
3165	DB	0F3h,0C3h		;repret
3166
3167
3168global	aesni_ocb_decrypt
3169
3170ALIGN	32
3171aesni_ocb_decrypt:
3172	mov	QWORD[8+rsp],rdi	;WIN64 prologue
3173	mov	QWORD[16+rsp],rsi
3174	mov	rax,rsp
3175$L$SEH_begin_aesni_ocb_decrypt:
3176	mov	rdi,rcx
3177	mov	rsi,rdx
3178	mov	rdx,r8
3179	mov	rcx,r9
3180	mov	r8,QWORD[40+rsp]
3181	mov	r9,QWORD[48+rsp]
3182
3183
3184	lea	rax,[rsp]
3185	push	rbx
3186	push	rbp
3187	push	r12
3188	push	r13
3189	push	r14
3190	lea	rsp,[((-160))+rsp]
3191	movaps	XMMWORD[rsp],xmm6
3192	movaps	XMMWORD[16+rsp],xmm7
3193	movaps	XMMWORD[32+rsp],xmm8
3194	movaps	XMMWORD[48+rsp],xmm9
3195	movaps	XMMWORD[64+rsp],xmm10
3196	movaps	XMMWORD[80+rsp],xmm11
3197	movaps	XMMWORD[96+rsp],xmm12
3198	movaps	XMMWORD[112+rsp],xmm13
3199	movaps	XMMWORD[128+rsp],xmm14
3200	movaps	XMMWORD[144+rsp],xmm15
3201$L$ocb_dec_body:
3202	mov	rbx,QWORD[56+rax]
3203	mov	rbp,QWORD[((56+8))+rax]
3204
3205	mov	r10d,DWORD[240+rcx]
3206	mov	r11,rcx
3207	shl	r10d,4
3208	movups	xmm9,XMMWORD[rcx]
3209	movups	xmm1,XMMWORD[16+r10*1+rcx]
3210
3211	movdqu	xmm15,XMMWORD[r9]
3212	pxor	xmm9,xmm1
3213	pxor	xmm15,xmm1
3214
3215	mov	eax,16+32
3216	lea	rcx,[32+r10*1+r11]
3217	movups	xmm1,XMMWORD[16+r11]
3218	sub	rax,r10
3219	mov	r10,rax
3220
3221	movdqu	xmm10,XMMWORD[rbx]
3222	movdqu	xmm8,XMMWORD[rbp]
3223
3224	test	r8,1
3225	jnz	NEAR $L$ocb_dec_odd
3226
3227	bsf	r12,r8
3228	add	r8,1
3229	shl	r12,4
3230	movdqu	xmm7,XMMWORD[r12*1+rbx]
3231	movdqu	xmm2,XMMWORD[rdi]
3232	lea	rdi,[16+rdi]
3233
3234	call	__ocb_decrypt1
3235
3236	movdqa	xmm15,xmm7
3237	movups	XMMWORD[rsi],xmm2
3238	xorps	xmm8,xmm2
3239	lea	rsi,[16+rsi]
3240	sub	rdx,1
3241	jz	NEAR $L$ocb_dec_done
3242
3243$L$ocb_dec_odd:
3244	lea	r12,[1+r8]
3245	lea	r13,[3+r8]
3246	lea	r14,[5+r8]
3247	lea	r8,[6+r8]
3248	bsf	r12,r12
3249	bsf	r13,r13
3250	bsf	r14,r14
3251	shl	r12,4
3252	shl	r13,4
3253	shl	r14,4
3254
3255	sub	rdx,6
3256	jc	NEAR $L$ocb_dec_short
3257	jmp	NEAR $L$ocb_dec_grandloop
3258
3259ALIGN	32
3260$L$ocb_dec_grandloop:
3261	movdqu	xmm2,XMMWORD[rdi]
3262	movdqu	xmm3,XMMWORD[16+rdi]
3263	movdqu	xmm4,XMMWORD[32+rdi]
3264	movdqu	xmm5,XMMWORD[48+rdi]
3265	movdqu	xmm6,XMMWORD[64+rdi]
3266	movdqu	xmm7,XMMWORD[80+rdi]
3267	lea	rdi,[96+rdi]
3268
3269	call	__ocb_decrypt6
3270
3271	movups	XMMWORD[rsi],xmm2
3272	pxor	xmm8,xmm2
3273	movups	XMMWORD[16+rsi],xmm3
3274	pxor	xmm8,xmm3
3275	movups	XMMWORD[32+rsi],xmm4
3276	pxor	xmm8,xmm4
3277	movups	XMMWORD[48+rsi],xmm5
3278	pxor	xmm8,xmm5
3279	movups	XMMWORD[64+rsi],xmm6
3280	pxor	xmm8,xmm6
3281	movups	XMMWORD[80+rsi],xmm7
3282	pxor	xmm8,xmm7
3283	lea	rsi,[96+rsi]
3284	sub	rdx,6
3285	jnc	NEAR $L$ocb_dec_grandloop
3286
3287$L$ocb_dec_short:
3288	add	rdx,6
3289	jz	NEAR $L$ocb_dec_done
3290
3291	movdqu	xmm2,XMMWORD[rdi]
3292	cmp	rdx,2
3293	jb	NEAR $L$ocb_dec_one
3294	movdqu	xmm3,XMMWORD[16+rdi]
3295	je	NEAR $L$ocb_dec_two
3296
3297	movdqu	xmm4,XMMWORD[32+rdi]
3298	cmp	rdx,4
3299	jb	NEAR $L$ocb_dec_three
3300	movdqu	xmm5,XMMWORD[48+rdi]
3301	je	NEAR $L$ocb_dec_four
3302
3303	movdqu	xmm6,XMMWORD[64+rdi]
3304	pxor	xmm7,xmm7
3305
3306	call	__ocb_decrypt6
3307
3308	movdqa	xmm15,xmm14
3309	movups	XMMWORD[rsi],xmm2
3310	pxor	xmm8,xmm2
3311	movups	XMMWORD[16+rsi],xmm3
3312	pxor	xmm8,xmm3
3313	movups	XMMWORD[32+rsi],xmm4
3314	pxor	xmm8,xmm4
3315	movups	XMMWORD[48+rsi],xmm5
3316	pxor	xmm8,xmm5
3317	movups	XMMWORD[64+rsi],xmm6
3318	pxor	xmm8,xmm6
3319
3320	jmp	NEAR $L$ocb_dec_done
3321
3322ALIGN	16
3323$L$ocb_dec_one:
3324	movdqa	xmm7,xmm10
3325
3326	call	__ocb_decrypt1
3327
3328	movdqa	xmm15,xmm7
3329	movups	XMMWORD[rsi],xmm2
3330	xorps	xmm8,xmm2
3331	jmp	NEAR $L$ocb_dec_done
3332
3333ALIGN	16
3334$L$ocb_dec_two:
3335	pxor	xmm4,xmm4
3336	pxor	xmm5,xmm5
3337
3338	call	__ocb_decrypt4
3339
3340	movdqa	xmm15,xmm11
3341	movups	XMMWORD[rsi],xmm2
3342	xorps	xmm8,xmm2
3343	movups	XMMWORD[16+rsi],xmm3
3344	xorps	xmm8,xmm3
3345
3346	jmp	NEAR $L$ocb_dec_done
3347
3348ALIGN	16
3349$L$ocb_dec_three:
3350	pxor	xmm5,xmm5
3351
3352	call	__ocb_decrypt4
3353
3354	movdqa	xmm15,xmm12
3355	movups	XMMWORD[rsi],xmm2
3356	xorps	xmm8,xmm2
3357	movups	XMMWORD[16+rsi],xmm3
3358	xorps	xmm8,xmm3
3359	movups	XMMWORD[32+rsi],xmm4
3360	xorps	xmm8,xmm4
3361
3362	jmp	NEAR $L$ocb_dec_done
3363
3364ALIGN	16
3365$L$ocb_dec_four:
3366	call	__ocb_decrypt4
3367
3368	movdqa	xmm15,xmm13
3369	movups	XMMWORD[rsi],xmm2
3370	pxor	xmm8,xmm2
3371	movups	XMMWORD[16+rsi],xmm3
3372	pxor	xmm8,xmm3
3373	movups	XMMWORD[32+rsi],xmm4
3374	pxor	xmm8,xmm4
3375	movups	XMMWORD[48+rsi],xmm5
3376	pxor	xmm8,xmm5
3377
3378$L$ocb_dec_done:
3379	pxor	xmm15,xmm0
3380	movdqu	XMMWORD[rbp],xmm8
3381	movdqu	XMMWORD[r9],xmm15
3382
3383	xorps	xmm0,xmm0
3384	pxor	xmm1,xmm1
3385	pxor	xmm2,xmm2
3386	pxor	xmm3,xmm3
3387	pxor	xmm4,xmm4
3388	pxor	xmm5,xmm5
3389	movaps	xmm6,XMMWORD[rsp]
3390	movaps	XMMWORD[rsp],xmm0
3391	movaps	xmm7,XMMWORD[16+rsp]
3392	movaps	XMMWORD[16+rsp],xmm0
3393	movaps	xmm8,XMMWORD[32+rsp]
3394	movaps	XMMWORD[32+rsp],xmm0
3395	movaps	xmm9,XMMWORD[48+rsp]
3396	movaps	XMMWORD[48+rsp],xmm0
3397	movaps	xmm10,XMMWORD[64+rsp]
3398	movaps	XMMWORD[64+rsp],xmm0
3399	movaps	xmm11,XMMWORD[80+rsp]
3400	movaps	XMMWORD[80+rsp],xmm0
3401	movaps	xmm12,XMMWORD[96+rsp]
3402	movaps	XMMWORD[96+rsp],xmm0
3403	movaps	xmm13,XMMWORD[112+rsp]
3404	movaps	XMMWORD[112+rsp],xmm0
3405	movaps	xmm14,XMMWORD[128+rsp]
3406	movaps	XMMWORD[128+rsp],xmm0
3407	movaps	xmm15,XMMWORD[144+rsp]
3408	movaps	XMMWORD[144+rsp],xmm0
3409	lea	rax,[((160+40))+rsp]
3410$L$ocb_dec_pop:
3411	mov	r14,QWORD[((-40))+rax]
3412	mov	r13,QWORD[((-32))+rax]
3413	mov	r12,QWORD[((-24))+rax]
3414	mov	rbp,QWORD[((-16))+rax]
3415	mov	rbx,QWORD[((-8))+rax]
3416	lea	rsp,[rax]
3417$L$ocb_dec_epilogue:
3418	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
3419	mov	rsi,QWORD[16+rsp]
3420	DB	0F3h,0C3h		;repret
3421$L$SEH_end_aesni_ocb_decrypt:
3422
3423
3424ALIGN	32
3425__ocb_decrypt6:
3426	pxor	xmm15,xmm9
3427	movdqu	xmm11,XMMWORD[r12*1+rbx]
3428	movdqa	xmm12,xmm10
3429	movdqu	xmm13,XMMWORD[r13*1+rbx]
3430	movdqa	xmm14,xmm10
3431	pxor	xmm10,xmm15
3432	movdqu	xmm15,XMMWORD[r14*1+rbx]
3433	pxor	xmm11,xmm10
3434	pxor	xmm2,xmm10
3435	pxor	xmm12,xmm11
3436	pxor	xmm3,xmm11
3437	pxor	xmm13,xmm12
3438	pxor	xmm4,xmm12
3439	pxor	xmm14,xmm13
3440	pxor	xmm5,xmm13
3441	pxor	xmm15,xmm14
3442	pxor	xmm6,xmm14
3443	pxor	xmm7,xmm15
3444	movups	xmm0,XMMWORD[32+r11]
3445
3446	lea	r12,[1+r8]
3447	lea	r13,[3+r8]
3448	lea	r14,[5+r8]
3449	add	r8,6
3450	pxor	xmm10,xmm9
3451	bsf	r12,r12
3452	bsf	r13,r13
3453	bsf	r14,r14
3454
3455DB	102,15,56,222,209
3456DB	102,15,56,222,217
3457DB	102,15,56,222,225
3458DB	102,15,56,222,233
3459	pxor	xmm11,xmm9
3460	pxor	xmm12,xmm9
3461DB	102,15,56,222,241
3462	pxor	xmm13,xmm9
3463	pxor	xmm14,xmm9
3464DB	102,15,56,222,249
3465	movups	xmm1,XMMWORD[48+r11]
3466	pxor	xmm15,xmm9
3467
3468DB	102,15,56,222,208
3469DB	102,15,56,222,216
3470DB	102,15,56,222,224
3471DB	102,15,56,222,232
3472DB	102,15,56,222,240
3473DB	102,15,56,222,248
3474	movups	xmm0,XMMWORD[64+r11]
3475	shl	r12,4
3476	shl	r13,4
3477	jmp	NEAR $L$ocb_dec_loop6
3478
3479ALIGN	32
3480$L$ocb_dec_loop6:
3481DB	102,15,56,222,209
3482DB	102,15,56,222,217
3483DB	102,15,56,222,225
3484DB	102,15,56,222,233
3485DB	102,15,56,222,241
3486DB	102,15,56,222,249
3487	movups	xmm1,XMMWORD[rax*1+rcx]
3488	add	rax,32
3489
3490DB	102,15,56,222,208
3491DB	102,15,56,222,216
3492DB	102,15,56,222,224
3493DB	102,15,56,222,232
3494DB	102,15,56,222,240
3495DB	102,15,56,222,248
3496	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
3497	jnz	NEAR $L$ocb_dec_loop6
3498
3499DB	102,15,56,222,209
3500DB	102,15,56,222,217
3501DB	102,15,56,222,225
3502DB	102,15,56,222,233
3503DB	102,15,56,222,241
3504DB	102,15,56,222,249
3505	movups	xmm1,XMMWORD[16+r11]
3506	shl	r14,4
3507
3508DB	102,65,15,56,223,210
3509	movdqu	xmm10,XMMWORD[rbx]
3510	mov	rax,r10
3511DB	102,65,15,56,223,219
3512DB	102,65,15,56,223,228
3513DB	102,65,15,56,223,237
3514DB	102,65,15,56,223,246
3515DB	102,65,15,56,223,255
3516	DB	0F3h,0C3h		;repret
3517
3518
3519
3520ALIGN	32
3521__ocb_decrypt4:
3522	pxor	xmm15,xmm9
3523	movdqu	xmm11,XMMWORD[r12*1+rbx]
3524	movdqa	xmm12,xmm10
3525	movdqu	xmm13,XMMWORD[r13*1+rbx]
3526	pxor	xmm10,xmm15
3527	pxor	xmm11,xmm10
3528	pxor	xmm2,xmm10
3529	pxor	xmm12,xmm11
3530	pxor	xmm3,xmm11
3531	pxor	xmm13,xmm12
3532	pxor	xmm4,xmm12
3533	pxor	xmm5,xmm13
3534	movups	xmm0,XMMWORD[32+r11]
3535
3536	pxor	xmm10,xmm9
3537	pxor	xmm11,xmm9
3538	pxor	xmm12,xmm9
3539	pxor	xmm13,xmm9
3540
3541DB	102,15,56,222,209
3542DB	102,15,56,222,217
3543DB	102,15,56,222,225
3544DB	102,15,56,222,233
3545	movups	xmm1,XMMWORD[48+r11]
3546
3547DB	102,15,56,222,208
3548DB	102,15,56,222,216
3549DB	102,15,56,222,224
3550DB	102,15,56,222,232
3551	movups	xmm0,XMMWORD[64+r11]
3552	jmp	NEAR $L$ocb_dec_loop4
3553
3554ALIGN	32
3555$L$ocb_dec_loop4:
3556DB	102,15,56,222,209
3557DB	102,15,56,222,217
3558DB	102,15,56,222,225
3559DB	102,15,56,222,233
3560	movups	xmm1,XMMWORD[rax*1+rcx]
3561	add	rax,32
3562
3563DB	102,15,56,222,208
3564DB	102,15,56,222,216
3565DB	102,15,56,222,224
3566DB	102,15,56,222,232
3567	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
3568	jnz	NEAR $L$ocb_dec_loop4
3569
3570DB	102,15,56,222,209
3571DB	102,15,56,222,217
3572DB	102,15,56,222,225
3573DB	102,15,56,222,233
3574	movups	xmm1,XMMWORD[16+r11]
3575	mov	rax,r10
3576
3577DB	102,65,15,56,223,210
3578DB	102,65,15,56,223,219
3579DB	102,65,15,56,223,228
3580DB	102,65,15,56,223,237
3581	DB	0F3h,0C3h		;repret
3582
3583
3584
3585ALIGN	32
3586__ocb_decrypt1:
3587	pxor	xmm7,xmm15
3588	pxor	xmm7,xmm9
3589	pxor	xmm2,xmm7
3590	movups	xmm0,XMMWORD[32+r11]
3591
3592DB	102,15,56,222,209
3593	movups	xmm1,XMMWORD[48+r11]
3594	pxor	xmm7,xmm9
3595
3596DB	102,15,56,222,208
3597	movups	xmm0,XMMWORD[64+r11]
3598	jmp	NEAR $L$ocb_dec_loop1
3599
3600ALIGN	32
3601$L$ocb_dec_loop1:
3602DB	102,15,56,222,209
3603	movups	xmm1,XMMWORD[rax*1+rcx]
3604	add	rax,32
3605
3606DB	102,15,56,222,208
3607	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
3608	jnz	NEAR $L$ocb_dec_loop1
3609
3610DB	102,15,56,222,209
3611	movups	xmm1,XMMWORD[16+r11]
3612	mov	rax,r10
3613
3614DB	102,15,56,223,215
3615	DB	0F3h,0C3h		;repret
3616
3617global	aesni_cbc_encrypt
3618
3619ALIGN	16
3620aesni_cbc_encrypt:
3621	mov	QWORD[8+rsp],rdi	;WIN64 prologue
3622	mov	QWORD[16+rsp],rsi
3623	mov	rax,rsp
3624$L$SEH_begin_aesni_cbc_encrypt:
3625	mov	rdi,rcx
3626	mov	rsi,rdx
3627	mov	rdx,r8
3628	mov	rcx,r9
3629	mov	r8,QWORD[40+rsp]
3630	mov	r9,QWORD[48+rsp]
3631
3632
3633	test	rdx,rdx
3634	jz	NEAR $L$cbc_ret
3635
3636	mov	r10d,DWORD[240+rcx]
3637	mov	r11,rcx
3638	test	r9d,r9d
3639	jz	NEAR $L$cbc_decrypt
3640
3641	movups	xmm2,XMMWORD[r8]
3642	mov	eax,r10d
3643	cmp	rdx,16
3644	jb	NEAR $L$cbc_enc_tail
3645	sub	rdx,16
3646	jmp	NEAR $L$cbc_enc_loop
3647ALIGN	16
3648$L$cbc_enc_loop:
3649	movups	xmm3,XMMWORD[rdi]
3650	lea	rdi,[16+rdi]
3651
3652	movups	xmm0,XMMWORD[rcx]
3653	movups	xmm1,XMMWORD[16+rcx]
3654	xorps	xmm3,xmm0
3655	lea	rcx,[32+rcx]
3656	xorps	xmm2,xmm3
3657$L$oop_enc1_15:
3658DB	102,15,56,220,209
3659	dec	eax
3660	movups	xmm1,XMMWORD[rcx]
3661	lea	rcx,[16+rcx]
3662	jnz	NEAR $L$oop_enc1_15
3663DB	102,15,56,221,209
3664	mov	eax,r10d
3665	mov	rcx,r11
3666	movups	XMMWORD[rsi],xmm2
3667	lea	rsi,[16+rsi]
3668	sub	rdx,16
3669	jnc	NEAR $L$cbc_enc_loop
3670	add	rdx,16
3671	jnz	NEAR $L$cbc_enc_tail
3672	pxor	xmm0,xmm0
3673	pxor	xmm1,xmm1
3674	movups	XMMWORD[r8],xmm2
3675	pxor	xmm2,xmm2
3676	pxor	xmm3,xmm3
3677	jmp	NEAR $L$cbc_ret
3678
3679$L$cbc_enc_tail:
3680	mov	rcx,rdx
3681	xchg	rsi,rdi
3682	DD	0x9066A4F3
3683	mov	ecx,16
3684	sub	rcx,rdx
3685	xor	eax,eax
3686	DD	0x9066AAF3
3687	lea	rdi,[((-16))+rdi]
3688	mov	eax,r10d
3689	mov	rsi,rdi
3690	mov	rcx,r11
3691	xor	rdx,rdx
3692	jmp	NEAR $L$cbc_enc_loop
3693
3694ALIGN	16
3695$L$cbc_decrypt:
3696	cmp	rdx,16
3697	jne	NEAR $L$cbc_decrypt_bulk
3698
3699
3700
3701	movdqu	xmm2,XMMWORD[rdi]
3702	movdqu	xmm3,XMMWORD[r8]
3703	movdqa	xmm4,xmm2
3704	movups	xmm0,XMMWORD[rcx]
3705	movups	xmm1,XMMWORD[16+rcx]
3706	lea	rcx,[32+rcx]
3707	xorps	xmm2,xmm0
3708$L$oop_dec1_16:
3709DB	102,15,56,222,209
3710	dec	r10d
3711	movups	xmm1,XMMWORD[rcx]
3712	lea	rcx,[16+rcx]
3713	jnz	NEAR $L$oop_dec1_16
3714DB	102,15,56,223,209
3715	pxor	xmm0,xmm0
3716	pxor	xmm1,xmm1
3717	movdqu	XMMWORD[r8],xmm4
3718	xorps	xmm2,xmm3
3719	pxor	xmm3,xmm3
3720	movups	XMMWORD[rsi],xmm2
3721	pxor	xmm2,xmm2
3722	jmp	NEAR $L$cbc_ret
3723ALIGN	16
3724$L$cbc_decrypt_bulk:
3725	lea	r11,[rsp]
3726	push	rbp
3727	sub	rsp,176
3728	and	rsp,-16
3729	movaps	XMMWORD[16+rsp],xmm6
3730	movaps	XMMWORD[32+rsp],xmm7
3731	movaps	XMMWORD[48+rsp],xmm8
3732	movaps	XMMWORD[64+rsp],xmm9
3733	movaps	XMMWORD[80+rsp],xmm10
3734	movaps	XMMWORD[96+rsp],xmm11
3735	movaps	XMMWORD[112+rsp],xmm12
3736	movaps	XMMWORD[128+rsp],xmm13
3737	movaps	XMMWORD[144+rsp],xmm14
3738	movaps	XMMWORD[160+rsp],xmm15
3739$L$cbc_decrypt_body:
3740	mov	rbp,rcx
3741	movups	xmm10,XMMWORD[r8]
3742	mov	eax,r10d
3743	cmp	rdx,0x50
3744	jbe	NEAR $L$cbc_dec_tail
3745
3746	movups	xmm0,XMMWORD[rcx]
3747	movdqu	xmm2,XMMWORD[rdi]
3748	movdqu	xmm3,XMMWORD[16+rdi]
3749	movdqa	xmm11,xmm2
3750	movdqu	xmm4,XMMWORD[32+rdi]
3751	movdqa	xmm12,xmm3
3752	movdqu	xmm5,XMMWORD[48+rdi]
3753	movdqa	xmm13,xmm4
3754	movdqu	xmm6,XMMWORD[64+rdi]
3755	movdqa	xmm14,xmm5
3756	movdqu	xmm7,XMMWORD[80+rdi]
3757	movdqa	xmm15,xmm6
3758	mov	r9d,DWORD[((OPENSSL_ia32cap_P+4))]
3759	cmp	rdx,0x70
3760	jbe	NEAR $L$cbc_dec_six_or_seven
3761
3762	and	r9d,71303168
3763	sub	rdx,0x50
3764	cmp	r9d,4194304
3765	je	NEAR $L$cbc_dec_loop6_enter
3766	sub	rdx,0x20
3767	lea	rcx,[112+rcx]
3768	jmp	NEAR $L$cbc_dec_loop8_enter
3769ALIGN	16
3770$L$cbc_dec_loop8:
3771	movups	XMMWORD[rsi],xmm9
3772	lea	rsi,[16+rsi]
3773$L$cbc_dec_loop8_enter:
3774	movdqu	xmm8,XMMWORD[96+rdi]
3775	pxor	xmm2,xmm0
3776	movdqu	xmm9,XMMWORD[112+rdi]
3777	pxor	xmm3,xmm0
3778	movups	xmm1,XMMWORD[((16-112))+rcx]
3779	pxor	xmm4,xmm0
3780	mov	rbp,-1
3781	cmp	rdx,0x70
3782	pxor	xmm5,xmm0
3783	pxor	xmm6,xmm0
3784	pxor	xmm7,xmm0
3785	pxor	xmm8,xmm0
3786
3787DB	102,15,56,222,209
3788	pxor	xmm9,xmm0
3789	movups	xmm0,XMMWORD[((32-112))+rcx]
3790DB	102,15,56,222,217
3791DB	102,15,56,222,225
3792DB	102,15,56,222,233
3793DB	102,15,56,222,241
3794DB	102,15,56,222,249
3795DB	102,68,15,56,222,193
3796	adc	rbp,0
3797	and	rbp,128
3798DB	102,68,15,56,222,201
3799	add	rbp,rdi
3800	movups	xmm1,XMMWORD[((48-112))+rcx]
3801DB	102,15,56,222,208
3802DB	102,15,56,222,216
3803DB	102,15,56,222,224
3804DB	102,15,56,222,232
3805DB	102,15,56,222,240
3806DB	102,15,56,222,248
3807DB	102,68,15,56,222,192
3808DB	102,68,15,56,222,200
3809	movups	xmm0,XMMWORD[((64-112))+rcx]
3810	nop
3811DB	102,15,56,222,209
3812DB	102,15,56,222,217
3813DB	102,15,56,222,225
3814DB	102,15,56,222,233
3815DB	102,15,56,222,241
3816DB	102,15,56,222,249
3817DB	102,68,15,56,222,193
3818DB	102,68,15,56,222,201
3819	movups	xmm1,XMMWORD[((80-112))+rcx]
3820	nop
3821DB	102,15,56,222,208
3822DB	102,15,56,222,216
3823DB	102,15,56,222,224
3824DB	102,15,56,222,232
3825DB	102,15,56,222,240
3826DB	102,15,56,222,248
3827DB	102,68,15,56,222,192
3828DB	102,68,15,56,222,200
3829	movups	xmm0,XMMWORD[((96-112))+rcx]
3830	nop
3831DB	102,15,56,222,209
3832DB	102,15,56,222,217
3833DB	102,15,56,222,225
3834DB	102,15,56,222,233
3835DB	102,15,56,222,241
3836DB	102,15,56,222,249
3837DB	102,68,15,56,222,193
3838DB	102,68,15,56,222,201
3839	movups	xmm1,XMMWORD[((112-112))+rcx]
3840	nop
3841DB	102,15,56,222,208
3842DB	102,15,56,222,216
3843DB	102,15,56,222,224
3844DB	102,15,56,222,232
3845DB	102,15,56,222,240
3846DB	102,15,56,222,248
3847DB	102,68,15,56,222,192
3848DB	102,68,15,56,222,200
3849	movups	xmm0,XMMWORD[((128-112))+rcx]
3850	nop
3851DB	102,15,56,222,209
3852DB	102,15,56,222,217
3853DB	102,15,56,222,225
3854DB	102,15,56,222,233
3855DB	102,15,56,222,241
3856DB	102,15,56,222,249
3857DB	102,68,15,56,222,193
3858DB	102,68,15,56,222,201
3859	movups	xmm1,XMMWORD[((144-112))+rcx]
3860	cmp	eax,11
3861DB	102,15,56,222,208
3862DB	102,15,56,222,216
3863DB	102,15,56,222,224
3864DB	102,15,56,222,232
3865DB	102,15,56,222,240
3866DB	102,15,56,222,248
3867DB	102,68,15,56,222,192
3868DB	102,68,15,56,222,200
3869	movups	xmm0,XMMWORD[((160-112))+rcx]
3870	jb	NEAR $L$cbc_dec_done
3871DB	102,15,56,222,209
3872DB	102,15,56,222,217
3873DB	102,15,56,222,225
3874DB	102,15,56,222,233
3875DB	102,15,56,222,241
3876DB	102,15,56,222,249
3877DB	102,68,15,56,222,193
3878DB	102,68,15,56,222,201
3879	movups	xmm1,XMMWORD[((176-112))+rcx]
3880	nop
3881DB	102,15,56,222,208
3882DB	102,15,56,222,216
3883DB	102,15,56,222,224
3884DB	102,15,56,222,232
3885DB	102,15,56,222,240
3886DB	102,15,56,222,248
3887DB	102,68,15,56,222,192
3888DB	102,68,15,56,222,200
3889	movups	xmm0,XMMWORD[((192-112))+rcx]
3890	je	NEAR $L$cbc_dec_done
3891DB	102,15,56,222,209
3892DB	102,15,56,222,217
3893DB	102,15,56,222,225
3894DB	102,15,56,222,233
3895DB	102,15,56,222,241
3896DB	102,15,56,222,249
3897DB	102,68,15,56,222,193
3898DB	102,68,15,56,222,201
3899	movups	xmm1,XMMWORD[((208-112))+rcx]
3900	nop
3901DB	102,15,56,222,208
3902DB	102,15,56,222,216
3903DB	102,15,56,222,224
3904DB	102,15,56,222,232
3905DB	102,15,56,222,240
3906DB	102,15,56,222,248
3907DB	102,68,15,56,222,192
3908DB	102,68,15,56,222,200
3909	movups	xmm0,XMMWORD[((224-112))+rcx]
3910	jmp	NEAR $L$cbc_dec_done
3911ALIGN	16
3912$L$cbc_dec_done:
3913DB	102,15,56,222,209
3914DB	102,15,56,222,217
3915	pxor	xmm10,xmm0
3916	pxor	xmm11,xmm0
3917DB	102,15,56,222,225
3918DB	102,15,56,222,233
3919	pxor	xmm12,xmm0
3920	pxor	xmm13,xmm0
3921DB	102,15,56,222,241
3922DB	102,15,56,222,249
3923	pxor	xmm14,xmm0
3924	pxor	xmm15,xmm0
3925DB	102,68,15,56,222,193
3926DB	102,68,15,56,222,201
3927	movdqu	xmm1,XMMWORD[80+rdi]
3928
3929DB	102,65,15,56,223,210
3930	movdqu	xmm10,XMMWORD[96+rdi]
3931	pxor	xmm1,xmm0
3932DB	102,65,15,56,223,219
3933	pxor	xmm10,xmm0
3934	movdqu	xmm0,XMMWORD[112+rdi]
3935DB	102,65,15,56,223,228
3936	lea	rdi,[128+rdi]
3937	movdqu	xmm11,XMMWORD[rbp]
3938DB	102,65,15,56,223,237
3939DB	102,65,15,56,223,246
3940	movdqu	xmm12,XMMWORD[16+rbp]
3941	movdqu	xmm13,XMMWORD[32+rbp]
3942DB	102,65,15,56,223,255
3943DB	102,68,15,56,223,193
3944	movdqu	xmm14,XMMWORD[48+rbp]
3945	movdqu	xmm15,XMMWORD[64+rbp]
3946DB	102,69,15,56,223,202
3947	movdqa	xmm10,xmm0
3948	movdqu	xmm1,XMMWORD[80+rbp]
3949	movups	xmm0,XMMWORD[((-112))+rcx]
3950
3951	movups	XMMWORD[rsi],xmm2
3952	movdqa	xmm2,xmm11
3953	movups	XMMWORD[16+rsi],xmm3
3954	movdqa	xmm3,xmm12
3955	movups	XMMWORD[32+rsi],xmm4
3956	movdqa	xmm4,xmm13
3957	movups	XMMWORD[48+rsi],xmm5
3958	movdqa	xmm5,xmm14
3959	movups	XMMWORD[64+rsi],xmm6
3960	movdqa	xmm6,xmm15
3961	movups	XMMWORD[80+rsi],xmm7
3962	movdqa	xmm7,xmm1
3963	movups	XMMWORD[96+rsi],xmm8
3964	lea	rsi,[112+rsi]
3965
3966	sub	rdx,0x80
3967	ja	NEAR $L$cbc_dec_loop8
3968
3969	movaps	xmm2,xmm9
3970	lea	rcx,[((-112))+rcx]
3971	add	rdx,0x70
3972	jle	NEAR $L$cbc_dec_clear_tail_collected
3973	movups	XMMWORD[rsi],xmm9
3974	lea	rsi,[16+rsi]
3975	cmp	rdx,0x50
3976	jbe	NEAR $L$cbc_dec_tail
3977
3978	movaps	xmm2,xmm11
3979$L$cbc_dec_six_or_seven:
3980	cmp	rdx,0x60
3981	ja	NEAR $L$cbc_dec_seven
3982
3983	movaps	xmm8,xmm7
3984	call	_aesni_decrypt6
3985	pxor	xmm2,xmm10
3986	movaps	xmm10,xmm8
3987	pxor	xmm3,xmm11
3988	movdqu	XMMWORD[rsi],xmm2
3989	pxor	xmm4,xmm12
3990	movdqu	XMMWORD[16+rsi],xmm3
3991	pxor	xmm3,xmm3
3992	pxor	xmm5,xmm13
3993	movdqu	XMMWORD[32+rsi],xmm4
3994	pxor	xmm4,xmm4
3995	pxor	xmm6,xmm14
3996	movdqu	XMMWORD[48+rsi],xmm5
3997	pxor	xmm5,xmm5
3998	pxor	xmm7,xmm15
3999	movdqu	XMMWORD[64+rsi],xmm6
4000	pxor	xmm6,xmm6
4001	lea	rsi,[80+rsi]
4002	movdqa	xmm2,xmm7
4003	pxor	xmm7,xmm7
4004	jmp	NEAR $L$cbc_dec_tail_collected
4005
4006ALIGN	16
4007$L$cbc_dec_seven:
4008	movups	xmm8,XMMWORD[96+rdi]
4009	xorps	xmm9,xmm9
4010	call	_aesni_decrypt8
4011	movups	xmm9,XMMWORD[80+rdi]
4012	pxor	xmm2,xmm10
4013	movups	xmm10,XMMWORD[96+rdi]
4014	pxor	xmm3,xmm11
4015	movdqu	XMMWORD[rsi],xmm2
4016	pxor	xmm4,xmm12
4017	movdqu	XMMWORD[16+rsi],xmm3
4018	pxor	xmm3,xmm3
4019	pxor	xmm5,xmm13
4020	movdqu	XMMWORD[32+rsi],xmm4
4021	pxor	xmm4,xmm4
4022	pxor	xmm6,xmm14
4023	movdqu	XMMWORD[48+rsi],xmm5
4024	pxor	xmm5,xmm5
4025	pxor	xmm7,xmm15
4026	movdqu	XMMWORD[64+rsi],xmm6
4027	pxor	xmm6,xmm6
4028	pxor	xmm8,xmm9
4029	movdqu	XMMWORD[80+rsi],xmm7
4030	pxor	xmm7,xmm7
4031	lea	rsi,[96+rsi]
4032	movdqa	xmm2,xmm8
4033	pxor	xmm8,xmm8
4034	pxor	xmm9,xmm9
4035	jmp	NEAR $L$cbc_dec_tail_collected
4036
4037ALIGN	16
4038$L$cbc_dec_loop6:
4039	movups	XMMWORD[rsi],xmm7
4040	lea	rsi,[16+rsi]
4041	movdqu	xmm2,XMMWORD[rdi]
4042	movdqu	xmm3,XMMWORD[16+rdi]
4043	movdqa	xmm11,xmm2
4044	movdqu	xmm4,XMMWORD[32+rdi]
4045	movdqa	xmm12,xmm3
4046	movdqu	xmm5,XMMWORD[48+rdi]
4047	movdqa	xmm13,xmm4
4048	movdqu	xmm6,XMMWORD[64+rdi]
4049	movdqa	xmm14,xmm5
4050	movdqu	xmm7,XMMWORD[80+rdi]
4051	movdqa	xmm15,xmm6
4052$L$cbc_dec_loop6_enter:
4053	lea	rdi,[96+rdi]
4054	movdqa	xmm8,xmm7
4055
4056	call	_aesni_decrypt6
4057
4058	pxor	xmm2,xmm10
4059	movdqa	xmm10,xmm8
4060	pxor	xmm3,xmm11
4061	movdqu	XMMWORD[rsi],xmm2
4062	pxor	xmm4,xmm12
4063	movdqu	XMMWORD[16+rsi],xmm3
4064	pxor	xmm5,xmm13
4065	movdqu	XMMWORD[32+rsi],xmm4
4066	pxor	xmm6,xmm14
4067	mov	rcx,rbp
4068	movdqu	XMMWORD[48+rsi],xmm5
4069	pxor	xmm7,xmm15
4070	mov	eax,r10d
4071	movdqu	XMMWORD[64+rsi],xmm6
4072	lea	rsi,[80+rsi]
4073	sub	rdx,0x60
4074	ja	NEAR $L$cbc_dec_loop6
4075
4076	movdqa	xmm2,xmm7
4077	add	rdx,0x50
4078	jle	NEAR $L$cbc_dec_clear_tail_collected
4079	movups	XMMWORD[rsi],xmm7
4080	lea	rsi,[16+rsi]
4081
4082$L$cbc_dec_tail:
4083	movups	xmm2,XMMWORD[rdi]
4084	sub	rdx,0x10
4085	jbe	NEAR $L$cbc_dec_one
4086
4087	movups	xmm3,XMMWORD[16+rdi]
4088	movaps	xmm11,xmm2
4089	sub	rdx,0x10
4090	jbe	NEAR $L$cbc_dec_two
4091
4092	movups	xmm4,XMMWORD[32+rdi]
4093	movaps	xmm12,xmm3
4094	sub	rdx,0x10
4095	jbe	NEAR $L$cbc_dec_three
4096
4097	movups	xmm5,XMMWORD[48+rdi]
4098	movaps	xmm13,xmm4
4099	sub	rdx,0x10
4100	jbe	NEAR $L$cbc_dec_four
4101
4102	movups	xmm6,XMMWORD[64+rdi]
4103	movaps	xmm14,xmm5
4104	movaps	xmm15,xmm6
4105	xorps	xmm7,xmm7
4106	call	_aesni_decrypt6
4107	pxor	xmm2,xmm10
4108	movaps	xmm10,xmm15
4109	pxor	xmm3,xmm11
4110	movdqu	XMMWORD[rsi],xmm2
4111	pxor	xmm4,xmm12
4112	movdqu	XMMWORD[16+rsi],xmm3
4113	pxor	xmm3,xmm3
4114	pxor	xmm5,xmm13
4115	movdqu	XMMWORD[32+rsi],xmm4
4116	pxor	xmm4,xmm4
4117	pxor	xmm6,xmm14
4118	movdqu	XMMWORD[48+rsi],xmm5
4119	pxor	xmm5,xmm5
4120	lea	rsi,[64+rsi]
4121	movdqa	xmm2,xmm6
4122	pxor	xmm6,xmm6
4123	pxor	xmm7,xmm7
4124	sub	rdx,0x10
4125	jmp	NEAR $L$cbc_dec_tail_collected
4126
4127ALIGN	16
4128$L$cbc_dec_one:
4129	movaps	xmm11,xmm2
4130	movups	xmm0,XMMWORD[rcx]
4131	movups	xmm1,XMMWORD[16+rcx]
4132	lea	rcx,[32+rcx]
4133	xorps	xmm2,xmm0
4134$L$oop_dec1_17:
4135DB	102,15,56,222,209
4136	dec	eax
4137	movups	xmm1,XMMWORD[rcx]
4138	lea	rcx,[16+rcx]
4139	jnz	NEAR $L$oop_dec1_17
4140DB	102,15,56,223,209
4141	xorps	xmm2,xmm10
4142	movaps	xmm10,xmm11
4143	jmp	NEAR $L$cbc_dec_tail_collected
4144ALIGN	16
4145$L$cbc_dec_two:
4146	movaps	xmm12,xmm3
4147	call	_aesni_decrypt2
4148	pxor	xmm2,xmm10
4149	movaps	xmm10,xmm12
4150	pxor	xmm3,xmm11
4151	movdqu	XMMWORD[rsi],xmm2
4152	movdqa	xmm2,xmm3
4153	pxor	xmm3,xmm3
4154	lea	rsi,[16+rsi]
4155	jmp	NEAR $L$cbc_dec_tail_collected
4156ALIGN	16
4157$L$cbc_dec_three:
4158	movaps	xmm13,xmm4
4159	call	_aesni_decrypt3
4160	pxor	xmm2,xmm10
4161	movaps	xmm10,xmm13
4162	pxor	xmm3,xmm11
4163	movdqu	XMMWORD[rsi],xmm2
4164	pxor	xmm4,xmm12
4165	movdqu	XMMWORD[16+rsi],xmm3
4166	pxor	xmm3,xmm3
4167	movdqa	xmm2,xmm4
4168	pxor	xmm4,xmm4
4169	lea	rsi,[32+rsi]
4170	jmp	NEAR $L$cbc_dec_tail_collected
4171ALIGN	16
4172$L$cbc_dec_four:
4173	movaps	xmm14,xmm5
4174	call	_aesni_decrypt4
4175	pxor	xmm2,xmm10
4176	movaps	xmm10,xmm14
4177	pxor	xmm3,xmm11
4178	movdqu	XMMWORD[rsi],xmm2
4179	pxor	xmm4,xmm12
4180	movdqu	XMMWORD[16+rsi],xmm3
4181	pxor	xmm3,xmm3
4182	pxor	xmm5,xmm13
4183	movdqu	XMMWORD[32+rsi],xmm4
4184	pxor	xmm4,xmm4
4185	movdqa	xmm2,xmm5
4186	pxor	xmm5,xmm5
4187	lea	rsi,[48+rsi]
4188	jmp	NEAR $L$cbc_dec_tail_collected
4189
4190ALIGN	16
4191$L$cbc_dec_clear_tail_collected:
4192	pxor	xmm3,xmm3
4193	pxor	xmm4,xmm4
4194	pxor	xmm5,xmm5
4195$L$cbc_dec_tail_collected:
4196	movups	XMMWORD[r8],xmm10
4197	and	rdx,15
4198	jnz	NEAR $L$cbc_dec_tail_partial
4199	movups	XMMWORD[rsi],xmm2
4200	pxor	xmm2,xmm2
4201	jmp	NEAR $L$cbc_dec_ret
4202ALIGN	16
4203$L$cbc_dec_tail_partial:
4204	movaps	XMMWORD[rsp],xmm2
4205	pxor	xmm2,xmm2
4206	mov	rcx,16
4207	mov	rdi,rsi
4208	sub	rcx,rdx
4209	lea	rsi,[rsp]
4210	DD	0x9066A4F3
4211	movdqa	XMMWORD[rsp],xmm2
4212
4213$L$cbc_dec_ret:
4214	xorps	xmm0,xmm0
4215	pxor	xmm1,xmm1
4216	movaps	xmm6,XMMWORD[16+rsp]
4217	movaps	XMMWORD[16+rsp],xmm0
4218	movaps	xmm7,XMMWORD[32+rsp]
4219	movaps	XMMWORD[32+rsp],xmm0
4220	movaps	xmm8,XMMWORD[48+rsp]
4221	movaps	XMMWORD[48+rsp],xmm0
4222	movaps	xmm9,XMMWORD[64+rsp]
4223	movaps	XMMWORD[64+rsp],xmm0
4224	movaps	xmm10,XMMWORD[80+rsp]
4225	movaps	XMMWORD[80+rsp],xmm0
4226	movaps	xmm11,XMMWORD[96+rsp]
4227	movaps	XMMWORD[96+rsp],xmm0
4228	movaps	xmm12,XMMWORD[112+rsp]
4229	movaps	XMMWORD[112+rsp],xmm0
4230	movaps	xmm13,XMMWORD[128+rsp]
4231	movaps	XMMWORD[128+rsp],xmm0
4232	movaps	xmm14,XMMWORD[144+rsp]
4233	movaps	XMMWORD[144+rsp],xmm0
4234	movaps	xmm15,XMMWORD[160+rsp]
4235	movaps	XMMWORD[160+rsp],xmm0
4236	mov	rbp,QWORD[((-8))+r11]
4237	lea	rsp,[r11]
4238$L$cbc_ret:
4239	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
4240	mov	rsi,QWORD[16+rsp]
4241	DB	0F3h,0C3h		;repret
4242$L$SEH_end_aesni_cbc_encrypt:
4243global	aesni_set_decrypt_key
4244
4245ALIGN	16
4246aesni_set_decrypt_key:
4247DB	0x48,0x83,0xEC,0x08
4248	call	__aesni_set_encrypt_key
4249	shl	edx,4
4250	test	eax,eax
4251	jnz	NEAR $L$dec_key_ret
4252	lea	rcx,[16+rdx*1+r8]
4253
4254	movups	xmm0,XMMWORD[r8]
4255	movups	xmm1,XMMWORD[rcx]
4256	movups	XMMWORD[rcx],xmm0
4257	movups	XMMWORD[r8],xmm1
4258	lea	r8,[16+r8]
4259	lea	rcx,[((-16))+rcx]
4260
4261$L$dec_key_inverse:
4262	movups	xmm0,XMMWORD[r8]
4263	movups	xmm1,XMMWORD[rcx]
4264DB	102,15,56,219,192
4265DB	102,15,56,219,201
4266	lea	r8,[16+r8]
4267	lea	rcx,[((-16))+rcx]
4268	movups	XMMWORD[16+rcx],xmm0
4269	movups	XMMWORD[(-16)+r8],xmm1
4270	cmp	rcx,r8
4271	ja	NEAR $L$dec_key_inverse
4272
4273	movups	xmm0,XMMWORD[r8]
4274DB	102,15,56,219,192
4275	pxor	xmm1,xmm1
4276	movups	XMMWORD[rcx],xmm0
4277	pxor	xmm0,xmm0
4278$L$dec_key_ret:
4279	add	rsp,8
4280	DB	0F3h,0C3h		;repret
4281$L$SEH_end_set_decrypt_key:
4282
4283global	aesni_set_encrypt_key
4284
4285ALIGN	16
4286aesni_set_encrypt_key:
4287__aesni_set_encrypt_key:
4288DB	0x48,0x83,0xEC,0x08
4289	mov	rax,-1
4290	test	rcx,rcx
4291	jz	NEAR $L$enc_key_ret
4292	test	r8,r8
4293	jz	NEAR $L$enc_key_ret
4294
4295	mov	r10d,268437504
4296	movups	xmm0,XMMWORD[rcx]
4297	xorps	xmm4,xmm4
4298	and	r10d,DWORD[((OPENSSL_ia32cap_P+4))]
4299	lea	rax,[16+r8]
4300	cmp	edx,256
4301	je	NEAR $L$14rounds
4302	cmp	edx,192
4303	je	NEAR $L$12rounds
4304	cmp	edx,128
4305	jne	NEAR $L$bad_keybits
4306
4307$L$10rounds:
4308	mov	edx,9
4309	cmp	r10d,268435456
4310	je	NEAR $L$10rounds_alt
4311
4312	movups	XMMWORD[r8],xmm0
4313DB	102,15,58,223,200,1
4314	call	$L$key_expansion_128_cold
4315DB	102,15,58,223,200,2
4316	call	$L$key_expansion_128
4317DB	102,15,58,223,200,4
4318	call	$L$key_expansion_128
4319DB	102,15,58,223,200,8
4320	call	$L$key_expansion_128
4321DB	102,15,58,223,200,16
4322	call	$L$key_expansion_128
4323DB	102,15,58,223,200,32
4324	call	$L$key_expansion_128
4325DB	102,15,58,223,200,64
4326	call	$L$key_expansion_128
4327DB	102,15,58,223,200,128
4328	call	$L$key_expansion_128
4329DB	102,15,58,223,200,27
4330	call	$L$key_expansion_128
4331DB	102,15,58,223,200,54
4332	call	$L$key_expansion_128
4333	movups	XMMWORD[rax],xmm0
4334	mov	DWORD[80+rax],edx
4335	xor	eax,eax
4336	jmp	NEAR $L$enc_key_ret
4337
4338ALIGN	16
4339$L$10rounds_alt:
4340	movdqa	xmm5,XMMWORD[$L$key_rotate]
4341	mov	r10d,8
4342	movdqa	xmm4,XMMWORD[$L$key_rcon1]
4343	movdqa	xmm2,xmm0
4344	movdqu	XMMWORD[r8],xmm0
4345	jmp	NEAR $L$oop_key128
4346
4347ALIGN	16
4348$L$oop_key128:
4349DB	102,15,56,0,197
4350DB	102,15,56,221,196
4351	pslld	xmm4,1
4352	lea	rax,[16+rax]
4353
4354	movdqa	xmm3,xmm2
4355	pslldq	xmm2,4
4356	pxor	xmm3,xmm2
4357	pslldq	xmm2,4
4358	pxor	xmm3,xmm2
4359	pslldq	xmm2,4
4360	pxor	xmm2,xmm3
4361
4362	pxor	xmm0,xmm2
4363	movdqu	XMMWORD[(-16)+rax],xmm0
4364	movdqa	xmm2,xmm0
4365
4366	dec	r10d
4367	jnz	NEAR $L$oop_key128
4368
4369	movdqa	xmm4,XMMWORD[$L$key_rcon1b]
4370
4371DB	102,15,56,0,197
4372DB	102,15,56,221,196
4373	pslld	xmm4,1
4374
4375	movdqa	xmm3,xmm2
4376	pslldq	xmm2,4
4377	pxor	xmm3,xmm2
4378	pslldq	xmm2,4
4379	pxor	xmm3,xmm2
4380	pslldq	xmm2,4
4381	pxor	xmm2,xmm3
4382
4383	pxor	xmm0,xmm2
4384	movdqu	XMMWORD[rax],xmm0
4385
4386	movdqa	xmm2,xmm0
4387DB	102,15,56,0,197
4388DB	102,15,56,221,196
4389
4390	movdqa	xmm3,xmm2
4391	pslldq	xmm2,4
4392	pxor	xmm3,xmm2
4393	pslldq	xmm2,4
4394	pxor	xmm3,xmm2
4395	pslldq	xmm2,4
4396	pxor	xmm2,xmm3
4397
4398	pxor	xmm0,xmm2
4399	movdqu	XMMWORD[16+rax],xmm0
4400
4401	mov	DWORD[96+rax],edx
4402	xor	eax,eax
4403	jmp	NEAR $L$enc_key_ret
4404
4405ALIGN	16
4406$L$12rounds:
4407	movq	xmm2,QWORD[16+rcx]
4408	mov	edx,11
4409	cmp	r10d,268435456
4410	je	NEAR $L$12rounds_alt
4411
4412	movups	XMMWORD[r8],xmm0
4413DB	102,15,58,223,202,1
4414	call	$L$key_expansion_192a_cold
4415DB	102,15,58,223,202,2
4416	call	$L$key_expansion_192b
4417DB	102,15,58,223,202,4
4418	call	$L$key_expansion_192a
4419DB	102,15,58,223,202,8
4420	call	$L$key_expansion_192b
4421DB	102,15,58,223,202,16
4422	call	$L$key_expansion_192a
4423DB	102,15,58,223,202,32
4424	call	$L$key_expansion_192b
4425DB	102,15,58,223,202,64
4426	call	$L$key_expansion_192a
4427DB	102,15,58,223,202,128
4428	call	$L$key_expansion_192b
4429	movups	XMMWORD[rax],xmm0
4430	mov	DWORD[48+rax],edx
4431	xor	rax,rax
4432	jmp	NEAR $L$enc_key_ret
4433
4434ALIGN	16
4435$L$12rounds_alt:
4436	movdqa	xmm5,XMMWORD[$L$key_rotate192]
4437	movdqa	xmm4,XMMWORD[$L$key_rcon1]
4438	mov	r10d,8
4439	movdqu	XMMWORD[r8],xmm0
4440	jmp	NEAR $L$oop_key192
4441
4442ALIGN	16
4443$L$oop_key192:
4444	movq	QWORD[rax],xmm2
4445	movdqa	xmm1,xmm2
4446DB	102,15,56,0,213
4447DB	102,15,56,221,212
4448	pslld	xmm4,1
4449	lea	rax,[24+rax]
4450
4451	movdqa	xmm3,xmm0
4452	pslldq	xmm0,4
4453	pxor	xmm3,xmm0
4454	pslldq	xmm0,4
4455	pxor	xmm3,xmm0
4456	pslldq	xmm0,4
4457	pxor	xmm0,xmm3
4458
4459	pshufd	xmm3,xmm0,0xff
4460	pxor	xmm3,xmm1
4461	pslldq	xmm1,4
4462	pxor	xmm3,xmm1
4463
4464	pxor	xmm0,xmm2
4465	pxor	xmm2,xmm3
4466	movdqu	XMMWORD[(-16)+rax],xmm0
4467
4468	dec	r10d
4469	jnz	NEAR $L$oop_key192
4470
4471	mov	DWORD[32+rax],edx
4472	xor	eax,eax
4473	jmp	NEAR $L$enc_key_ret
4474
4475ALIGN	16
4476$L$14rounds:
4477	movups	xmm2,XMMWORD[16+rcx]
4478	mov	edx,13
4479	lea	rax,[16+rax]
4480	cmp	r10d,268435456
4481	je	NEAR $L$14rounds_alt
4482
4483	movups	XMMWORD[r8],xmm0
4484	movups	XMMWORD[16+r8],xmm2
4485DB	102,15,58,223,202,1
4486	call	$L$key_expansion_256a_cold
4487DB	102,15,58,223,200,1
4488	call	$L$key_expansion_256b
4489DB	102,15,58,223,202,2
4490	call	$L$key_expansion_256a
4491DB	102,15,58,223,200,2
4492	call	$L$key_expansion_256b
4493DB	102,15,58,223,202,4
4494	call	$L$key_expansion_256a
4495DB	102,15,58,223,200,4
4496	call	$L$key_expansion_256b
4497DB	102,15,58,223,202,8
4498	call	$L$key_expansion_256a
4499DB	102,15,58,223,200,8
4500	call	$L$key_expansion_256b
4501DB	102,15,58,223,202,16
4502	call	$L$key_expansion_256a
4503DB	102,15,58,223,200,16
4504	call	$L$key_expansion_256b
4505DB	102,15,58,223,202,32
4506	call	$L$key_expansion_256a
4507DB	102,15,58,223,200,32
4508	call	$L$key_expansion_256b
4509DB	102,15,58,223,202,64
4510	call	$L$key_expansion_256a
4511	movups	XMMWORD[rax],xmm0
4512	mov	DWORD[16+rax],edx
4513	xor	rax,rax
4514	jmp	NEAR $L$enc_key_ret
4515
4516ALIGN	16
4517$L$14rounds_alt:
4518	movdqa	xmm5,XMMWORD[$L$key_rotate]
4519	movdqa	xmm4,XMMWORD[$L$key_rcon1]
4520	mov	r10d,7
4521	movdqu	XMMWORD[r8],xmm0
4522	movdqa	xmm1,xmm2
4523	movdqu	XMMWORD[16+r8],xmm2
4524	jmp	NEAR $L$oop_key256
4525
4526ALIGN	16
4527$L$oop_key256:
4528DB	102,15,56,0,213
4529DB	102,15,56,221,212
4530
4531	movdqa	xmm3,xmm0
4532	pslldq	xmm0,4
4533	pxor	xmm3,xmm0
4534	pslldq	xmm0,4
4535	pxor	xmm3,xmm0
4536	pslldq	xmm0,4
4537	pxor	xmm0,xmm3
4538	pslld	xmm4,1
4539
4540	pxor	xmm0,xmm2
4541	movdqu	XMMWORD[rax],xmm0
4542
4543	dec	r10d
4544	jz	NEAR $L$done_key256
4545
4546	pshufd	xmm2,xmm0,0xff
4547	pxor	xmm3,xmm3
4548DB	102,15,56,221,211
4549
4550	movdqa	xmm3,xmm1
4551	pslldq	xmm1,4
4552	pxor	xmm3,xmm1
4553	pslldq	xmm1,4
4554	pxor	xmm3,xmm1
4555	pslldq	xmm1,4
4556	pxor	xmm1,xmm3
4557
4558	pxor	xmm2,xmm1
4559	movdqu	XMMWORD[16+rax],xmm2
4560	lea	rax,[32+rax]
4561	movdqa	xmm1,xmm2
4562
4563	jmp	NEAR $L$oop_key256
4564
4565$L$done_key256:
4566	mov	DWORD[16+rax],edx
4567	xor	eax,eax
4568	jmp	NEAR $L$enc_key_ret
4569
4570ALIGN	16
4571$L$bad_keybits:
4572	mov	rax,-2
4573$L$enc_key_ret:
4574	pxor	xmm0,xmm0
4575	pxor	xmm1,xmm1
4576	pxor	xmm2,xmm2
4577	pxor	xmm3,xmm3
4578	pxor	xmm4,xmm4
4579	pxor	xmm5,xmm5
4580	add	rsp,8
4581	DB	0F3h,0C3h		;repret
4582$L$SEH_end_set_encrypt_key:
4583
4584ALIGN	16
4585$L$key_expansion_128:
4586	movups	XMMWORD[rax],xmm0
4587	lea	rax,[16+rax]
4588$L$key_expansion_128_cold:
4589	shufps	xmm4,xmm0,16
4590	xorps	xmm0,xmm4
4591	shufps	xmm4,xmm0,140
4592	xorps	xmm0,xmm4
4593	shufps	xmm1,xmm1,255
4594	xorps	xmm0,xmm1
4595	DB	0F3h,0C3h		;repret
4596
4597ALIGN	16
4598$L$key_expansion_192a:
4599	movups	XMMWORD[rax],xmm0
4600	lea	rax,[16+rax]
4601$L$key_expansion_192a_cold:
4602	movaps	xmm5,xmm2
4603$L$key_expansion_192b_warm:
4604	shufps	xmm4,xmm0,16
4605	movdqa	xmm3,xmm2
4606	xorps	xmm0,xmm4
4607	shufps	xmm4,xmm0,140
4608	pslldq	xmm3,4
4609	xorps	xmm0,xmm4
4610	pshufd	xmm1,xmm1,85
4611	pxor	xmm2,xmm3
4612	pxor	xmm0,xmm1
4613	pshufd	xmm3,xmm0,255
4614	pxor	xmm2,xmm3
4615	DB	0F3h,0C3h		;repret
4616
4617ALIGN	16
4618$L$key_expansion_192b:
4619	movaps	xmm3,xmm0
4620	shufps	xmm5,xmm0,68
4621	movups	XMMWORD[rax],xmm5
4622	shufps	xmm3,xmm2,78
4623	movups	XMMWORD[16+rax],xmm3
4624	lea	rax,[32+rax]
4625	jmp	NEAR $L$key_expansion_192b_warm
4626
4627ALIGN	16
4628$L$key_expansion_256a:
4629	movups	XMMWORD[rax],xmm2
4630	lea	rax,[16+rax]
4631$L$key_expansion_256a_cold:
4632	shufps	xmm4,xmm0,16
4633	xorps	xmm0,xmm4
4634	shufps	xmm4,xmm0,140
4635	xorps	xmm0,xmm4
4636	shufps	xmm1,xmm1,255
4637	xorps	xmm0,xmm1
4638	DB	0F3h,0C3h		;repret
4639
4640ALIGN	16
4641$L$key_expansion_256b:
4642	movups	XMMWORD[rax],xmm0
4643	lea	rax,[16+rax]
4644
4645	shufps	xmm4,xmm2,16
4646	xorps	xmm2,xmm4
4647	shufps	xmm4,xmm2,140
4648	xorps	xmm2,xmm4
4649	shufps	xmm1,xmm1,170
4650	xorps	xmm2,xmm1
4651	DB	0F3h,0C3h		;repret
4652
4653
4654ALIGN	64
4655$L$bswap_mask:
4656DB	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
4657$L$increment32:
4658	DD	6,6,6,0
4659$L$increment64:
4660	DD	1,0,0,0
4661$L$xts_magic:
4662	DD	0x87,0,1,0
4663$L$increment1:
4664DB	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
4665$L$key_rotate:
4666	DD	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
4667$L$key_rotate192:
4668	DD	0x04070605,0x04070605,0x04070605,0x04070605
4669$L$key_rcon1:
4670	DD	1,1,1,1
4671$L$key_rcon1b:
4672	DD	0x1b,0x1b,0x1b,0x1b
4673
4674DB	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
4675DB	83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
4676DB	32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
4677DB	115,108,46,111,114,103,62,0
4678ALIGN	64
4679EXTERN	__imp_RtlVirtualUnwind
4680
4681ALIGN	16
4682ecb_ccm64_se_handler:
4683	push	rsi
4684	push	rdi
4685	push	rbx
4686	push	rbp
4687	push	r12
4688	push	r13
4689	push	r14
4690	push	r15
4691	pushfq
4692	sub	rsp,64
4693
4694	mov	rax,QWORD[120+r8]
4695	mov	rbx,QWORD[248+r8]
4696
4697	mov	rsi,QWORD[8+r9]
4698	mov	r11,QWORD[56+r9]
4699
4700	mov	r10d,DWORD[r11]
4701	lea	r10,[r10*1+rsi]
4702	cmp	rbx,r10
4703	jb	NEAR $L$common_seh_tail
4704
4705	mov	rax,QWORD[152+r8]
4706
4707	mov	r10d,DWORD[4+r11]
4708	lea	r10,[r10*1+rsi]
4709	cmp	rbx,r10
4710	jae	NEAR $L$common_seh_tail
4711
4712	lea	rsi,[rax]
4713	lea	rdi,[512+r8]
4714	mov	ecx,8
4715	DD	0xa548f3fc
4716	lea	rax,[88+rax]
4717
4718	jmp	NEAR $L$common_seh_tail
4719
4720
4721
4722ALIGN	16
4723ctr_xts_se_handler:
4724	push	rsi
4725	push	rdi
4726	push	rbx
4727	push	rbp
4728	push	r12
4729	push	r13
4730	push	r14
4731	push	r15
4732	pushfq
4733	sub	rsp,64
4734
4735	mov	rax,QWORD[120+r8]
4736	mov	rbx,QWORD[248+r8]
4737
4738	mov	rsi,QWORD[8+r9]
4739	mov	r11,QWORD[56+r9]
4740
4741	mov	r10d,DWORD[r11]
4742	lea	r10,[r10*1+rsi]
4743	cmp	rbx,r10
4744	jb	NEAR $L$common_seh_tail
4745
4746	mov	rax,QWORD[152+r8]
4747
4748	mov	r10d,DWORD[4+r11]
4749	lea	r10,[r10*1+rsi]
4750	cmp	rbx,r10
4751	jae	NEAR $L$common_seh_tail
4752
4753	mov	rax,QWORD[208+r8]
4754
4755	lea	rsi,[((-168))+rax]
4756	lea	rdi,[512+r8]
4757	mov	ecx,20
4758	DD	0xa548f3fc
4759
4760	mov	rbp,QWORD[((-8))+rax]
4761	mov	QWORD[160+r8],rbp
4762	jmp	NEAR $L$common_seh_tail
4763
4764
4765
4766ALIGN	16
4767ocb_se_handler:
4768	push	rsi
4769	push	rdi
4770	push	rbx
4771	push	rbp
4772	push	r12
4773	push	r13
4774	push	r14
4775	push	r15
4776	pushfq
4777	sub	rsp,64
4778
4779	mov	rax,QWORD[120+r8]
4780	mov	rbx,QWORD[248+r8]
4781
4782	mov	rsi,QWORD[8+r9]
4783	mov	r11,QWORD[56+r9]
4784
4785	mov	r10d,DWORD[r11]
4786	lea	r10,[r10*1+rsi]
4787	cmp	rbx,r10
4788	jb	NEAR $L$common_seh_tail
4789
4790	mov	r10d,DWORD[4+r11]
4791	lea	r10,[r10*1+rsi]
4792	cmp	rbx,r10
4793	jae	NEAR $L$common_seh_tail
4794
4795	mov	r10d,DWORD[8+r11]
4796	lea	r10,[r10*1+rsi]
4797	cmp	rbx,r10
4798	jae	NEAR $L$ocb_no_xmm
4799
4800	mov	rax,QWORD[152+r8]
4801
4802	lea	rsi,[rax]
4803	lea	rdi,[512+r8]
4804	mov	ecx,20
4805	DD	0xa548f3fc
4806	lea	rax,[((160+40))+rax]
4807
4808$L$ocb_no_xmm:
4809	mov	rbx,QWORD[((-8))+rax]
4810	mov	rbp,QWORD[((-16))+rax]
4811	mov	r12,QWORD[((-24))+rax]
4812	mov	r13,QWORD[((-32))+rax]
4813	mov	r14,QWORD[((-40))+rax]
4814
4815	mov	QWORD[144+r8],rbx
4816	mov	QWORD[160+r8],rbp
4817	mov	QWORD[216+r8],r12
4818	mov	QWORD[224+r8],r13
4819	mov	QWORD[232+r8],r14
4820
4821	jmp	NEAR $L$common_seh_tail
4822
4823
4824ALIGN	16
4825cbc_se_handler:
4826	push	rsi
4827	push	rdi
4828	push	rbx
4829	push	rbp
4830	push	r12
4831	push	r13
4832	push	r14
4833	push	r15
4834	pushfq
4835	sub	rsp,64
4836
4837	mov	rax,QWORD[152+r8]
4838	mov	rbx,QWORD[248+r8]
4839
4840	lea	r10,[$L$cbc_decrypt_bulk]
4841	cmp	rbx,r10
4842	jb	NEAR $L$common_seh_tail
4843
4844	mov	rax,QWORD[120+r8]
4845
4846	lea	r10,[$L$cbc_decrypt_body]
4847	cmp	rbx,r10
4848	jb	NEAR $L$common_seh_tail
4849
4850	mov	rax,QWORD[152+r8]
4851
4852	lea	r10,[$L$cbc_ret]
4853	cmp	rbx,r10
4854	jae	NEAR $L$common_seh_tail
4855
4856	lea	rsi,[16+rax]
4857	lea	rdi,[512+r8]
4858	mov	ecx,20
4859	DD	0xa548f3fc
4860
4861	mov	rax,QWORD[208+r8]
4862
4863	mov	rbp,QWORD[((-8))+rax]
4864	mov	QWORD[160+r8],rbp
4865
4866$L$common_seh_tail:
4867	mov	rdi,QWORD[8+rax]
4868	mov	rsi,QWORD[16+rax]
4869	mov	QWORD[152+r8],rax
4870	mov	QWORD[168+r8],rsi
4871	mov	QWORD[176+r8],rdi
4872
4873	mov	rdi,QWORD[40+r9]
4874	mov	rsi,r8
4875	mov	ecx,154
4876	DD	0xa548f3fc
4877
4878	mov	rsi,r9
4879	xor	rcx,rcx
4880	mov	rdx,QWORD[8+rsi]
4881	mov	r8,QWORD[rsi]
4882	mov	r9,QWORD[16+rsi]
4883	mov	r10,QWORD[40+rsi]
4884	lea	r11,[56+rsi]
4885	lea	r12,[24+rsi]
4886	mov	QWORD[32+rsp],r10
4887	mov	QWORD[40+rsp],r11
4888	mov	QWORD[48+rsp],r12
4889	mov	QWORD[56+rsp],rcx
4890	call	QWORD[__imp_RtlVirtualUnwind]
4891
4892	mov	eax,1
4893	add	rsp,64
4894	popfq
4895	pop	r15
4896	pop	r14
4897	pop	r13
4898	pop	r12
4899	pop	rbp
4900	pop	rbx
4901	pop	rdi
4902	pop	rsi
4903	DB	0F3h,0C3h		;repret
4904
4905
4906section	.pdata rdata align=4
4907ALIGN	4
4908	DD	$L$SEH_begin_aesni_ecb_encrypt wrt ..imagebase
4909	DD	$L$SEH_end_aesni_ecb_encrypt wrt ..imagebase
4910	DD	$L$SEH_info_ecb wrt ..imagebase
4911
4912	DD	$L$SEH_begin_aesni_ccm64_encrypt_blocks wrt ..imagebase
4913	DD	$L$SEH_end_aesni_ccm64_encrypt_blocks wrt ..imagebase
4914	DD	$L$SEH_info_ccm64_enc wrt ..imagebase
4915
4916	DD	$L$SEH_begin_aesni_ccm64_decrypt_blocks wrt ..imagebase
4917	DD	$L$SEH_end_aesni_ccm64_decrypt_blocks wrt ..imagebase
4918	DD	$L$SEH_info_ccm64_dec wrt ..imagebase
4919
4920	DD	$L$SEH_begin_aesni_ctr32_encrypt_blocks wrt ..imagebase
4921	DD	$L$SEH_end_aesni_ctr32_encrypt_blocks wrt ..imagebase
4922	DD	$L$SEH_info_ctr32 wrt ..imagebase
4923
4924	DD	$L$SEH_begin_aesni_xts_encrypt wrt ..imagebase
4925	DD	$L$SEH_end_aesni_xts_encrypt wrt ..imagebase
4926	DD	$L$SEH_info_xts_enc wrt ..imagebase
4927
4928	DD	$L$SEH_begin_aesni_xts_decrypt wrt ..imagebase
4929	DD	$L$SEH_end_aesni_xts_decrypt wrt ..imagebase
4930	DD	$L$SEH_info_xts_dec wrt ..imagebase
4931
4932	DD	$L$SEH_begin_aesni_ocb_encrypt wrt ..imagebase
4933	DD	$L$SEH_end_aesni_ocb_encrypt wrt ..imagebase
4934	DD	$L$SEH_info_ocb_enc wrt ..imagebase
4935
4936	DD	$L$SEH_begin_aesni_ocb_decrypt wrt ..imagebase
4937	DD	$L$SEH_end_aesni_ocb_decrypt wrt ..imagebase
4938	DD	$L$SEH_info_ocb_dec wrt ..imagebase
4939	DD	$L$SEH_begin_aesni_cbc_encrypt wrt ..imagebase
4940	DD	$L$SEH_end_aesni_cbc_encrypt wrt ..imagebase
4941	DD	$L$SEH_info_cbc wrt ..imagebase
4942
4943	DD	aesni_set_decrypt_key wrt ..imagebase
4944	DD	$L$SEH_end_set_decrypt_key wrt ..imagebase
4945	DD	$L$SEH_info_key wrt ..imagebase
4946
4947	DD	aesni_set_encrypt_key wrt ..imagebase
4948	DD	$L$SEH_end_set_encrypt_key wrt ..imagebase
4949	DD	$L$SEH_info_key wrt ..imagebase
4950section	.xdata rdata align=8
4951ALIGN	8
4952$L$SEH_info_ecb:
4953DB	9,0,0,0
4954	DD	ecb_ccm64_se_handler wrt ..imagebase
4955	DD	$L$ecb_enc_body wrt ..imagebase,$L$ecb_enc_ret wrt ..imagebase
4956$L$SEH_info_ccm64_enc:
4957DB	9,0,0,0
4958	DD	ecb_ccm64_se_handler wrt ..imagebase
4959	DD	$L$ccm64_enc_body wrt ..imagebase,$L$ccm64_enc_ret wrt ..imagebase
4960$L$SEH_info_ccm64_dec:
4961DB	9,0,0,0
4962	DD	ecb_ccm64_se_handler wrt ..imagebase
4963	DD	$L$ccm64_dec_body wrt ..imagebase,$L$ccm64_dec_ret wrt ..imagebase
4964$L$SEH_info_ctr32:
4965DB	9,0,0,0
4966	DD	ctr_xts_se_handler wrt ..imagebase
4967	DD	$L$ctr32_body wrt ..imagebase,$L$ctr32_epilogue wrt ..imagebase
4968$L$SEH_info_xts_enc:
4969DB	9,0,0,0
4970	DD	ctr_xts_se_handler wrt ..imagebase
4971	DD	$L$xts_enc_body wrt ..imagebase,$L$xts_enc_epilogue wrt ..imagebase
4972$L$SEH_info_xts_dec:
4973DB	9,0,0,0
4974	DD	ctr_xts_se_handler wrt ..imagebase
4975	DD	$L$xts_dec_body wrt ..imagebase,$L$xts_dec_epilogue wrt ..imagebase
4976$L$SEH_info_ocb_enc:
4977DB	9,0,0,0
4978	DD	ocb_se_handler wrt ..imagebase
4979	DD	$L$ocb_enc_body wrt ..imagebase,$L$ocb_enc_epilogue wrt ..imagebase
4980	DD	$L$ocb_enc_pop wrt ..imagebase
4981	DD	0
4982$L$SEH_info_ocb_dec:
4983DB	9,0,0,0
4984	DD	ocb_se_handler wrt ..imagebase
4985	DD	$L$ocb_dec_body wrt ..imagebase,$L$ocb_dec_epilogue wrt ..imagebase
4986	DD	$L$ocb_dec_pop wrt ..imagebase
4987	DD	0
4988$L$SEH_info_cbc:
4989DB	9,0,0,0
4990	DD	cbc_se_handler wrt ..imagebase
4991$L$SEH_info_key:
4992DB	0x01,0x04,0x01,0x00
4993DB	0x04,0x02,0x00,0x00
4994