1.text
2.globl	aesni_encrypt
3.type	aesni_encrypt,@function
4.align	16
5aesni_encrypt:
6	movups	(%rdi),%xmm2
7	movl	240(%rdx),%eax
8	movups	(%rdx),%xmm0
9	movups	16(%rdx),%xmm1
10	leaq	32(%rdx),%rdx
11	xorps	%xmm0,%xmm2
12.Loop_enc1_1:
13.byte	102,15,56,220,209
14	decl	%eax
15	movups	(%rdx),%xmm1
16	leaq	16(%rdx),%rdx
17	jnz	.Loop_enc1_1
18.byte	102,15,56,221,209
19	movups	%xmm2,(%rsi)
20	.byte	0xf3,0xc3
21.size	aesni_encrypt,.-aesni_encrypt
22
23.globl	aesni_decrypt
24.type	aesni_decrypt,@function
25.align	16
26aesni_decrypt:
27	movups	(%rdi),%xmm2
28	movl	240(%rdx),%eax
29	movups	(%rdx),%xmm0
30	movups	16(%rdx),%xmm1
31	leaq	32(%rdx),%rdx
32	xorps	%xmm0,%xmm2
33.Loop_dec1_2:
34.byte	102,15,56,222,209
35	decl	%eax
36	movups	(%rdx),%xmm1
37	leaq	16(%rdx),%rdx
38	jnz	.Loop_dec1_2
39.byte	102,15,56,223,209
40	movups	%xmm2,(%rsi)
41	.byte	0xf3,0xc3
42.size	aesni_decrypt, .-aesni_decrypt
43.type	_aesni_encrypt3,@function
44.align	16
45_aesni_encrypt3:
46	movups	(%rcx),%xmm0
47	shrl	$1,%eax
48	movups	16(%rcx),%xmm1
49	leaq	32(%rcx),%rcx
50	xorps	%xmm0,%xmm2
51	xorps	%xmm0,%xmm3
52	xorps	%xmm0,%xmm4
53	movups	(%rcx),%xmm0
54
55.Lenc_loop3:
56.byte	102,15,56,220,209
57.byte	102,15,56,220,217
58	decl	%eax
59.byte	102,15,56,220,225
60	movups	16(%rcx),%xmm1
61.byte	102,15,56,220,208
62.byte	102,15,56,220,216
63	leaq	32(%rcx),%rcx
64.byte	102,15,56,220,224
65	movups	(%rcx),%xmm0
66	jnz	.Lenc_loop3
67
68.byte	102,15,56,220,209
69.byte	102,15,56,220,217
70.byte	102,15,56,220,225
71.byte	102,15,56,221,208
72.byte	102,15,56,221,216
73.byte	102,15,56,221,224
74	.byte	0xf3,0xc3
75.size	_aesni_encrypt3,.-_aesni_encrypt3
76.type	_aesni_decrypt3,@function
77.align	16
78_aesni_decrypt3:
79	movups	(%rcx),%xmm0
80	shrl	$1,%eax
81	movups	16(%rcx),%xmm1
82	leaq	32(%rcx),%rcx
83	xorps	%xmm0,%xmm2
84	xorps	%xmm0,%xmm3
85	xorps	%xmm0,%xmm4
86	movups	(%rcx),%xmm0
87
88.Ldec_loop3:
89.byte	102,15,56,222,209
90.byte	102,15,56,222,217
91	decl	%eax
92.byte	102,15,56,222,225
93	movups	16(%rcx),%xmm1
94.byte	102,15,56,222,208
95.byte	102,15,56,222,216
96	leaq	32(%rcx),%rcx
97.byte	102,15,56,222,224
98	movups	(%rcx),%xmm0
99	jnz	.Ldec_loop3
100
101.byte	102,15,56,222,209
102.byte	102,15,56,222,217
103.byte	102,15,56,222,225
104.byte	102,15,56,223,208
105.byte	102,15,56,223,216
106.byte	102,15,56,223,224
107	.byte	0xf3,0xc3
108.size	_aesni_decrypt3,.-_aesni_decrypt3
109.type	_aesni_encrypt4,@function
110.align	16
111_aesni_encrypt4:
112	movups	(%rcx),%xmm0
113	shrl	$1,%eax
114	movups	16(%rcx),%xmm1
115	leaq	32(%rcx),%rcx
116	xorps	%xmm0,%xmm2
117	xorps	%xmm0,%xmm3
118	xorps	%xmm0,%xmm4
119	xorps	%xmm0,%xmm5
120	movups	(%rcx),%xmm0
121
122.Lenc_loop4:
123.byte	102,15,56,220,209
124.byte	102,15,56,220,217
125	decl	%eax
126.byte	102,15,56,220,225
127.byte	102,15,56,220,233
128	movups	16(%rcx),%xmm1
129.byte	102,15,56,220,208
130.byte	102,15,56,220,216
131	leaq	32(%rcx),%rcx
132.byte	102,15,56,220,224
133.byte	102,15,56,220,232
134	movups	(%rcx),%xmm0
135	jnz	.Lenc_loop4
136
137.byte	102,15,56,220,209
138.byte	102,15,56,220,217
139.byte	102,15,56,220,225
140.byte	102,15,56,220,233
141.byte	102,15,56,221,208
142.byte	102,15,56,221,216
143.byte	102,15,56,221,224
144.byte	102,15,56,221,232
145	.byte	0xf3,0xc3
146.size	_aesni_encrypt4,.-_aesni_encrypt4
147.type	_aesni_decrypt4,@function
148.align	16
149_aesni_decrypt4:
150	movups	(%rcx),%xmm0
151	shrl	$1,%eax
152	movups	16(%rcx),%xmm1
153	leaq	32(%rcx),%rcx
154	xorps	%xmm0,%xmm2
155	xorps	%xmm0,%xmm3
156	xorps	%xmm0,%xmm4
157	xorps	%xmm0,%xmm5
158	movups	(%rcx),%xmm0
159
160.Ldec_loop4:
161.byte	102,15,56,222,209
162.byte	102,15,56,222,217
163	decl	%eax
164.byte	102,15,56,222,225
165.byte	102,15,56,222,233
166	movups	16(%rcx),%xmm1
167.byte	102,15,56,222,208
168.byte	102,15,56,222,216
169	leaq	32(%rcx),%rcx
170.byte	102,15,56,222,224
171.byte	102,15,56,222,232
172	movups	(%rcx),%xmm0
173	jnz	.Ldec_loop4
174
175.byte	102,15,56,222,209
176.byte	102,15,56,222,217
177.byte	102,15,56,222,225
178.byte	102,15,56,222,233
179.byte	102,15,56,223,208
180.byte	102,15,56,223,216
181.byte	102,15,56,223,224
182.byte	102,15,56,223,232
183	.byte	0xf3,0xc3
184.size	_aesni_decrypt4,.-_aesni_decrypt4
185.type	_aesni_encrypt6,@function
186.align	16
187_aesni_encrypt6:
188	movups	(%rcx),%xmm0
189	shrl	$1,%eax
190	movups	16(%rcx),%xmm1
191	leaq	32(%rcx),%rcx
192	xorps	%xmm0,%xmm2
193	pxor	%xmm0,%xmm3
194.byte	102,15,56,220,209
195	pxor	%xmm0,%xmm4
196.byte	102,15,56,220,217
197	pxor	%xmm0,%xmm5
198.byte	102,15,56,220,225
199	pxor	%xmm0,%xmm6
200.byte	102,15,56,220,233
201	pxor	%xmm0,%xmm7
202	decl	%eax
203.byte	102,15,56,220,241
204	movups	(%rcx),%xmm0
205.byte	102,15,56,220,249
206	jmp	.Lenc_loop6_enter
207.align	16
208.Lenc_loop6:
209.byte	102,15,56,220,209
210.byte	102,15,56,220,217
211	decl	%eax
212.byte	102,15,56,220,225
213.byte	102,15,56,220,233
214.byte	102,15,56,220,241
215.byte	102,15,56,220,249
216.Lenc_loop6_enter:
217	movups	16(%rcx),%xmm1
218.byte	102,15,56,220,208
219.byte	102,15,56,220,216
220	leaq	32(%rcx),%rcx
221.byte	102,15,56,220,224
222.byte	102,15,56,220,232
223.byte	102,15,56,220,240
224.byte	102,15,56,220,248
225	movups	(%rcx),%xmm0
226	jnz	.Lenc_loop6
227
228.byte	102,15,56,220,209
229.byte	102,15,56,220,217
230.byte	102,15,56,220,225
231.byte	102,15,56,220,233
232.byte	102,15,56,220,241
233.byte	102,15,56,220,249
234.byte	102,15,56,221,208
235.byte	102,15,56,221,216
236.byte	102,15,56,221,224
237.byte	102,15,56,221,232
238.byte	102,15,56,221,240
239.byte	102,15,56,221,248
240	.byte	0xf3,0xc3
241.size	_aesni_encrypt6,.-_aesni_encrypt6
242.type	_aesni_decrypt6,@function
243.align	16
244_aesni_decrypt6:
245	movups	(%rcx),%xmm0
246	shrl	$1,%eax
247	movups	16(%rcx),%xmm1
248	leaq	32(%rcx),%rcx
249	xorps	%xmm0,%xmm2
250	pxor	%xmm0,%xmm3
251.byte	102,15,56,222,209
252	pxor	%xmm0,%xmm4
253.byte	102,15,56,222,217
254	pxor	%xmm0,%xmm5
255.byte	102,15,56,222,225
256	pxor	%xmm0,%xmm6
257.byte	102,15,56,222,233
258	pxor	%xmm0,%xmm7
259	decl	%eax
260.byte	102,15,56,222,241
261	movups	(%rcx),%xmm0
262.byte	102,15,56,222,249
263	jmp	.Ldec_loop6_enter
264.align	16
265.Ldec_loop6:
266.byte	102,15,56,222,209
267.byte	102,15,56,222,217
268	decl	%eax
269.byte	102,15,56,222,225
270.byte	102,15,56,222,233
271.byte	102,15,56,222,241
272.byte	102,15,56,222,249
273.Ldec_loop6_enter:
274	movups	16(%rcx),%xmm1
275.byte	102,15,56,222,208
276.byte	102,15,56,222,216
277	leaq	32(%rcx),%rcx
278.byte	102,15,56,222,224
279.byte	102,15,56,222,232
280.byte	102,15,56,222,240
281.byte	102,15,56,222,248
282	movups	(%rcx),%xmm0
283	jnz	.Ldec_loop6
284
285.byte	102,15,56,222,209
286.byte	102,15,56,222,217
287.byte	102,15,56,222,225
288.byte	102,15,56,222,233
289.byte	102,15,56,222,241
290.byte	102,15,56,222,249
291.byte	102,15,56,223,208
292.byte	102,15,56,223,216
293.byte	102,15,56,223,224
294.byte	102,15,56,223,232
295.byte	102,15,56,223,240
296.byte	102,15,56,223,248
297	.byte	0xf3,0xc3
298.size	_aesni_decrypt6,.-_aesni_decrypt6
299.type	_aesni_encrypt8,@function
300.align	16
301_aesni_encrypt8:
302	movups	(%rcx),%xmm0
303	shrl	$1,%eax
304	movups	16(%rcx),%xmm1
305	leaq	32(%rcx),%rcx
306	xorps	%xmm0,%xmm2
307	xorps	%xmm0,%xmm3
308.byte	102,15,56,220,209
309	pxor	%xmm0,%xmm4
310.byte	102,15,56,220,217
311	pxor	%xmm0,%xmm5
312.byte	102,15,56,220,225
313	pxor	%xmm0,%xmm6
314.byte	102,15,56,220,233
315	pxor	%xmm0,%xmm7
316	decl	%eax
317.byte	102,15,56,220,241
318	pxor	%xmm0,%xmm8
319.byte	102,15,56,220,249
320	pxor	%xmm0,%xmm9
321	movups	(%rcx),%xmm0
322.byte	102,68,15,56,220,193
323.byte	102,68,15,56,220,201
324	movups	16(%rcx),%xmm1
325	jmp	.Lenc_loop8_enter
326.align	16
327.Lenc_loop8:
328.byte	102,15,56,220,209
329.byte	102,15,56,220,217
330	decl	%eax
331.byte	102,15,56,220,225
332.byte	102,15,56,220,233
333.byte	102,15,56,220,241
334.byte	102,15,56,220,249
335.byte	102,68,15,56,220,193
336.byte	102,68,15,56,220,201
337	movups	16(%rcx),%xmm1
338.Lenc_loop8_enter:
339.byte	102,15,56,220,208
340.byte	102,15,56,220,216
341	leaq	32(%rcx),%rcx
342.byte	102,15,56,220,224
343.byte	102,15,56,220,232
344.byte	102,15,56,220,240
345.byte	102,15,56,220,248
346.byte	102,68,15,56,220,192
347.byte	102,68,15,56,220,200
348	movups	(%rcx),%xmm0
349	jnz	.Lenc_loop8
350
351.byte	102,15,56,220,209
352.byte	102,15,56,220,217
353.byte	102,15,56,220,225
354.byte	102,15,56,220,233
355.byte	102,15,56,220,241
356.byte	102,15,56,220,249
357.byte	102,68,15,56,220,193
358.byte	102,68,15,56,220,201
359.byte	102,15,56,221,208
360.byte	102,15,56,221,216
361.byte	102,15,56,221,224
362.byte	102,15,56,221,232
363.byte	102,15,56,221,240
364.byte	102,15,56,221,248
365.byte	102,68,15,56,221,192
366.byte	102,68,15,56,221,200
367	.byte	0xf3,0xc3
368.size	_aesni_encrypt8,.-_aesni_encrypt8
369.type	_aesni_decrypt8,@function
370.align	16
371_aesni_decrypt8:
372	movups	(%rcx),%xmm0
373	shrl	$1,%eax
374	movups	16(%rcx),%xmm1
375	leaq	32(%rcx),%rcx
376	xorps	%xmm0,%xmm2
377	xorps	%xmm0,%xmm3
378.byte	102,15,56,222,209
379	pxor	%xmm0,%xmm4
380.byte	102,15,56,222,217
381	pxor	%xmm0,%xmm5
382.byte	102,15,56,222,225
383	pxor	%xmm0,%xmm6
384.byte	102,15,56,222,233
385	pxor	%xmm0,%xmm7
386	decl	%eax
387.byte	102,15,56,222,241
388	pxor	%xmm0,%xmm8
389.byte	102,15,56,222,249
390	pxor	%xmm0,%xmm9
391	movups	(%rcx),%xmm0
392.byte	102,68,15,56,222,193
393.byte	102,68,15,56,222,201
394	movups	16(%rcx),%xmm1
395	jmp	.Ldec_loop8_enter
396.align	16
397.Ldec_loop8:
398.byte	102,15,56,222,209
399.byte	102,15,56,222,217
400	decl	%eax
401.byte	102,15,56,222,225
402.byte	102,15,56,222,233
403.byte	102,15,56,222,241
404.byte	102,15,56,222,249
405.byte	102,68,15,56,222,193
406.byte	102,68,15,56,222,201
407	movups	16(%rcx),%xmm1
408.Ldec_loop8_enter:
409.byte	102,15,56,222,208
410.byte	102,15,56,222,216
411	leaq	32(%rcx),%rcx
412.byte	102,15,56,222,224
413.byte	102,15,56,222,232
414.byte	102,15,56,222,240
415.byte	102,15,56,222,248
416.byte	102,68,15,56,222,192
417.byte	102,68,15,56,222,200
418	movups	(%rcx),%xmm0
419	jnz	.Ldec_loop8
420
421.byte	102,15,56,222,209
422.byte	102,15,56,222,217
423.byte	102,15,56,222,225
424.byte	102,15,56,222,233
425.byte	102,15,56,222,241
426.byte	102,15,56,222,249
427.byte	102,68,15,56,222,193
428.byte	102,68,15,56,222,201
429.byte	102,15,56,223,208
430.byte	102,15,56,223,216
431.byte	102,15,56,223,224
432.byte	102,15,56,223,232
433.byte	102,15,56,223,240
434.byte	102,15,56,223,248
435.byte	102,68,15,56,223,192
436.byte	102,68,15,56,223,200
437	.byte	0xf3,0xc3
438.size	_aesni_decrypt8,.-_aesni_decrypt8
439.globl	aesni_ecb_encrypt
440.type	aesni_ecb_encrypt,@function
441.align	16
442aesni_ecb_encrypt:
443	andq	$-16,%rdx
444	jz	.Lecb_ret
445
446	movl	240(%rcx),%eax
447	movups	(%rcx),%xmm0
448	movq	%rcx,%r11
449	movl	%eax,%r10d
450	testl	%r8d,%r8d
451	jz	.Lecb_decrypt
452
453	cmpq	$128,%rdx
454	jb	.Lecb_enc_tail
455
456	movdqu	(%rdi),%xmm2
457	movdqu	16(%rdi),%xmm3
458	movdqu	32(%rdi),%xmm4
459	movdqu	48(%rdi),%xmm5
460	movdqu	64(%rdi),%xmm6
461	movdqu	80(%rdi),%xmm7
462	movdqu	96(%rdi),%xmm8
463	movdqu	112(%rdi),%xmm9
464	leaq	128(%rdi),%rdi
465	subq	$128,%rdx
466	jmp	.Lecb_enc_loop8_enter
467.align	16
468.Lecb_enc_loop8:
469	movups	%xmm2,(%rsi)
470	movq	%r11,%rcx
471	movdqu	(%rdi),%xmm2
472	movl	%r10d,%eax
473	movups	%xmm3,16(%rsi)
474	movdqu	16(%rdi),%xmm3
475	movups	%xmm4,32(%rsi)
476	movdqu	32(%rdi),%xmm4
477	movups	%xmm5,48(%rsi)
478	movdqu	48(%rdi),%xmm5
479	movups	%xmm6,64(%rsi)
480	movdqu	64(%rdi),%xmm6
481	movups	%xmm7,80(%rsi)
482	movdqu	80(%rdi),%xmm7
483	movups	%xmm8,96(%rsi)
484	movdqu	96(%rdi),%xmm8
485	movups	%xmm9,112(%rsi)
486	leaq	128(%rsi),%rsi
487	movdqu	112(%rdi),%xmm9
488	leaq	128(%rdi),%rdi
489.Lecb_enc_loop8_enter:
490
491	call	_aesni_encrypt8
492
493	subq	$128,%rdx
494	jnc	.Lecb_enc_loop8
495
496	movups	%xmm2,(%rsi)
497	movq	%r11,%rcx
498	movups	%xmm3,16(%rsi)
499	movl	%r10d,%eax
500	movups	%xmm4,32(%rsi)
501	movups	%xmm5,48(%rsi)
502	movups	%xmm6,64(%rsi)
503	movups	%xmm7,80(%rsi)
504	movups	%xmm8,96(%rsi)
505	movups	%xmm9,112(%rsi)
506	leaq	128(%rsi),%rsi
507	addq	$128,%rdx
508	jz	.Lecb_ret
509
510.Lecb_enc_tail:
511	movups	(%rdi),%xmm2
512	cmpq	$32,%rdx
513	jb	.Lecb_enc_one
514	movups	16(%rdi),%xmm3
515	je	.Lecb_enc_two
516	movups	32(%rdi),%xmm4
517	cmpq	$64,%rdx
518	jb	.Lecb_enc_three
519	movups	48(%rdi),%xmm5
520	je	.Lecb_enc_four
521	movups	64(%rdi),%xmm6
522	cmpq	$96,%rdx
523	jb	.Lecb_enc_five
524	movups	80(%rdi),%xmm7
525	je	.Lecb_enc_six
526	movdqu	96(%rdi),%xmm8
527	call	_aesni_encrypt8
528	movups	%xmm2,(%rsi)
529	movups	%xmm3,16(%rsi)
530	movups	%xmm4,32(%rsi)
531	movups	%xmm5,48(%rsi)
532	movups	%xmm6,64(%rsi)
533	movups	%xmm7,80(%rsi)
534	movups	%xmm8,96(%rsi)
535	jmp	.Lecb_ret
536.align	16
537.Lecb_enc_one:
538	movups	(%rcx),%xmm0
539	movups	16(%rcx),%xmm1
540	leaq	32(%rcx),%rcx
541	xorps	%xmm0,%xmm2
542.Loop_enc1_3:
543.byte	102,15,56,220,209
544	decl	%eax
545	movups	(%rcx),%xmm1
546	leaq	16(%rcx),%rcx
547	jnz	.Loop_enc1_3
548.byte	102,15,56,221,209
549	movups	%xmm2,(%rsi)
550	jmp	.Lecb_ret
551.align	16
552.Lecb_enc_two:
553	xorps	%xmm4,%xmm4
554	call	_aesni_encrypt3
555	movups	%xmm2,(%rsi)
556	movups	%xmm3,16(%rsi)
557	jmp	.Lecb_ret
558.align	16
559.Lecb_enc_three:
560	call	_aesni_encrypt3
561	movups	%xmm2,(%rsi)
562	movups	%xmm3,16(%rsi)
563	movups	%xmm4,32(%rsi)
564	jmp	.Lecb_ret
565.align	16
566.Lecb_enc_four:
567	call	_aesni_encrypt4
568	movups	%xmm2,(%rsi)
569	movups	%xmm3,16(%rsi)
570	movups	%xmm4,32(%rsi)
571	movups	%xmm5,48(%rsi)
572	jmp	.Lecb_ret
573.align	16
574.Lecb_enc_five:
575	xorps	%xmm7,%xmm7
576	call	_aesni_encrypt6
577	movups	%xmm2,(%rsi)
578	movups	%xmm3,16(%rsi)
579	movups	%xmm4,32(%rsi)
580	movups	%xmm5,48(%rsi)
581	movups	%xmm6,64(%rsi)
582	jmp	.Lecb_ret
583.align	16
584.Lecb_enc_six:
585	call	_aesni_encrypt6
586	movups	%xmm2,(%rsi)
587	movups	%xmm3,16(%rsi)
588	movups	%xmm4,32(%rsi)
589	movups	%xmm5,48(%rsi)
590	movups	%xmm6,64(%rsi)
591	movups	%xmm7,80(%rsi)
592	jmp	.Lecb_ret
593
594.align	16
595.Lecb_decrypt:
596	cmpq	$128,%rdx
597	jb	.Lecb_dec_tail
598
599	movdqu	(%rdi),%xmm2
600	movdqu	16(%rdi),%xmm3
601	movdqu	32(%rdi),%xmm4
602	movdqu	48(%rdi),%xmm5
603	movdqu	64(%rdi),%xmm6
604	movdqu	80(%rdi),%xmm7
605	movdqu	96(%rdi),%xmm8
606	movdqu	112(%rdi),%xmm9
607	leaq	128(%rdi),%rdi
608	subq	$128,%rdx
609	jmp	.Lecb_dec_loop8_enter
610.align	16
611.Lecb_dec_loop8:
612	movups	%xmm2,(%rsi)
613	movq	%r11,%rcx
614	movdqu	(%rdi),%xmm2
615	movl	%r10d,%eax
616	movups	%xmm3,16(%rsi)
617	movdqu	16(%rdi),%xmm3
618	movups	%xmm4,32(%rsi)
619	movdqu	32(%rdi),%xmm4
620	movups	%xmm5,48(%rsi)
621	movdqu	48(%rdi),%xmm5
622	movups	%xmm6,64(%rsi)
623	movdqu	64(%rdi),%xmm6
624	movups	%xmm7,80(%rsi)
625	movdqu	80(%rdi),%xmm7
626	movups	%xmm8,96(%rsi)
627	movdqu	96(%rdi),%xmm8
628	movups	%xmm9,112(%rsi)
629	leaq	128(%rsi),%rsi
630	movdqu	112(%rdi),%xmm9
631	leaq	128(%rdi),%rdi
632.Lecb_dec_loop8_enter:
633
634	call	_aesni_decrypt8
635
636	movups	(%r11),%xmm0
637	subq	$128,%rdx
638	jnc	.Lecb_dec_loop8
639
640	movups	%xmm2,(%rsi)
641	movq	%r11,%rcx
642	movups	%xmm3,16(%rsi)
643	movl	%r10d,%eax
644	movups	%xmm4,32(%rsi)
645	movups	%xmm5,48(%rsi)
646	movups	%xmm6,64(%rsi)
647	movups	%xmm7,80(%rsi)
648	movups	%xmm8,96(%rsi)
649	movups	%xmm9,112(%rsi)
650	leaq	128(%rsi),%rsi
651	addq	$128,%rdx
652	jz	.Lecb_ret
653
654.Lecb_dec_tail:
655	movups	(%rdi),%xmm2
656	cmpq	$32,%rdx
657	jb	.Lecb_dec_one
658	movups	16(%rdi),%xmm3
659	je	.Lecb_dec_two
660	movups	32(%rdi),%xmm4
661	cmpq	$64,%rdx
662	jb	.Lecb_dec_three
663	movups	48(%rdi),%xmm5
664	je	.Lecb_dec_four
665	movups	64(%rdi),%xmm6
666	cmpq	$96,%rdx
667	jb	.Lecb_dec_five
668	movups	80(%rdi),%xmm7
669	je	.Lecb_dec_six
670	movups	96(%rdi),%xmm8
671	movups	(%rcx),%xmm0
672	call	_aesni_decrypt8
673	movups	%xmm2,(%rsi)
674	movups	%xmm3,16(%rsi)
675	movups	%xmm4,32(%rsi)
676	movups	%xmm5,48(%rsi)
677	movups	%xmm6,64(%rsi)
678	movups	%xmm7,80(%rsi)
679	movups	%xmm8,96(%rsi)
680	jmp	.Lecb_ret
681.align	16
682.Lecb_dec_one:
683	movups	(%rcx),%xmm0
684	movups	16(%rcx),%xmm1
685	leaq	32(%rcx),%rcx
686	xorps	%xmm0,%xmm2
687.Loop_dec1_4:
688.byte	102,15,56,222,209
689	decl	%eax
690	movups	(%rcx),%xmm1
691	leaq	16(%rcx),%rcx
692	jnz	.Loop_dec1_4
693.byte	102,15,56,223,209
694	movups	%xmm2,(%rsi)
695	jmp	.Lecb_ret
696.align	16
697.Lecb_dec_two:
698	xorps	%xmm4,%xmm4
699	call	_aesni_decrypt3
700	movups	%xmm2,(%rsi)
701	movups	%xmm3,16(%rsi)
702	jmp	.Lecb_ret
703.align	16
704.Lecb_dec_three:
705	call	_aesni_decrypt3
706	movups	%xmm2,(%rsi)
707	movups	%xmm3,16(%rsi)
708	movups	%xmm4,32(%rsi)
709	jmp	.Lecb_ret
710.align	16
711.Lecb_dec_four:
712	call	_aesni_decrypt4
713	movups	%xmm2,(%rsi)
714	movups	%xmm3,16(%rsi)
715	movups	%xmm4,32(%rsi)
716	movups	%xmm5,48(%rsi)
717	jmp	.Lecb_ret
718.align	16
719.Lecb_dec_five:
720	xorps	%xmm7,%xmm7
721	call	_aesni_decrypt6
722	movups	%xmm2,(%rsi)
723	movups	%xmm3,16(%rsi)
724	movups	%xmm4,32(%rsi)
725	movups	%xmm5,48(%rsi)
726	movups	%xmm6,64(%rsi)
727	jmp	.Lecb_ret
728.align	16
729.Lecb_dec_six:
730	call	_aesni_decrypt6
731	movups	%xmm2,(%rsi)
732	movups	%xmm3,16(%rsi)
733	movups	%xmm4,32(%rsi)
734	movups	%xmm5,48(%rsi)
735	movups	%xmm6,64(%rsi)
736	movups	%xmm7,80(%rsi)
737
738.Lecb_ret:
739	.byte	0xf3,0xc3
740.size	aesni_ecb_encrypt,.-aesni_ecb_encrypt
741.globl	aesni_ccm64_encrypt_blocks
742.type	aesni_ccm64_encrypt_blocks,@function
743.align	16
744aesni_ccm64_encrypt_blocks:
745	movl	240(%rcx),%eax
746	movdqu	(%r8),%xmm9
747	movdqa	.Lincrement64(%rip),%xmm6
748	movdqa	.Lbswap_mask(%rip),%xmm7
749
750	shrl	$1,%eax
751	leaq	0(%rcx),%r11
752	movdqu	(%r9),%xmm3
753	movdqa	%xmm9,%xmm2
754	movl	%eax,%r10d
755.byte	102,68,15,56,0,207
756	jmp	.Lccm64_enc_outer
757.align	16
758.Lccm64_enc_outer:
759	movups	(%r11),%xmm0
760	movl	%r10d,%eax
761	movups	(%rdi),%xmm8
762
763	xorps	%xmm0,%xmm2
764	movups	16(%r11),%xmm1
765	xorps	%xmm8,%xmm0
766	leaq	32(%r11),%rcx
767	xorps	%xmm0,%xmm3
768	movups	(%rcx),%xmm0
769
770.Lccm64_enc2_loop:
771.byte	102,15,56,220,209
772	decl	%eax
773.byte	102,15,56,220,217
774	movups	16(%rcx),%xmm1
775.byte	102,15,56,220,208
776	leaq	32(%rcx),%rcx
777.byte	102,15,56,220,216
778	movups	0(%rcx),%xmm0
779	jnz	.Lccm64_enc2_loop
780.byte	102,15,56,220,209
781.byte	102,15,56,220,217
782	paddq	%xmm6,%xmm9
783.byte	102,15,56,221,208
784.byte	102,15,56,221,216
785
786	decq	%rdx
787	leaq	16(%rdi),%rdi
788	xorps	%xmm2,%xmm8
789	movdqa	%xmm9,%xmm2
790	movups	%xmm8,(%rsi)
791	leaq	16(%rsi),%rsi
792.byte	102,15,56,0,215
793	jnz	.Lccm64_enc_outer
794
795	movups	%xmm3,(%r9)
796	.byte	0xf3,0xc3
797.size	aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks
798.globl	aesni_ccm64_decrypt_blocks
799.type	aesni_ccm64_decrypt_blocks,@function
800.align	16
801aesni_ccm64_decrypt_blocks:
802	movl	240(%rcx),%eax
803	movups	(%r8),%xmm9
804	movdqu	(%r9),%xmm3
805	movdqa	.Lincrement64(%rip),%xmm6
806	movdqa	.Lbswap_mask(%rip),%xmm7
807
808	movaps	%xmm9,%xmm2
809	movl	%eax,%r10d
810	movq	%rcx,%r11
811.byte	102,68,15,56,0,207
812	movups	(%rcx),%xmm0
813	movups	16(%rcx),%xmm1
814	leaq	32(%rcx),%rcx
815	xorps	%xmm0,%xmm2
816.Loop_enc1_5:
817.byte	102,15,56,220,209
818	decl	%eax
819	movups	(%rcx),%xmm1
820	leaq	16(%rcx),%rcx
821	jnz	.Loop_enc1_5
822.byte	102,15,56,221,209
823	movups	(%rdi),%xmm8
824	paddq	%xmm6,%xmm9
825	leaq	16(%rdi),%rdi
826	jmp	.Lccm64_dec_outer
827.align	16
828.Lccm64_dec_outer:
829	xorps	%xmm2,%xmm8
830	movdqa	%xmm9,%xmm2
831	movl	%r10d,%eax
832	movups	%xmm8,(%rsi)
833	leaq	16(%rsi),%rsi
834.byte	102,15,56,0,215
835
836	subq	$1,%rdx
837	jz	.Lccm64_dec_break
838
839	movups	(%r11),%xmm0
840	shrl	$1,%eax
841	movups	16(%r11),%xmm1
842	xorps	%xmm0,%xmm8
843	leaq	32(%r11),%rcx
844	xorps	%xmm0,%xmm2
845	xorps	%xmm8,%xmm3
846	movups	(%rcx),%xmm0
847
848.Lccm64_dec2_loop:
849.byte	102,15,56,220,209
850	decl	%eax
851.byte	102,15,56,220,217
852	movups	16(%rcx),%xmm1
853.byte	102,15,56,220,208
854	leaq	32(%rcx),%rcx
855.byte	102,15,56,220,216
856	movups	0(%rcx),%xmm0
857	jnz	.Lccm64_dec2_loop
858	movups	(%rdi),%xmm8
859	paddq	%xmm6,%xmm9
860.byte	102,15,56,220,209
861.byte	102,15,56,220,217
862	leaq	16(%rdi),%rdi
863.byte	102,15,56,221,208
864.byte	102,15,56,221,216
865	jmp	.Lccm64_dec_outer
866
867.align	16
868.Lccm64_dec_break:
869
870	movups	(%r11),%xmm0
871	movups	16(%r11),%xmm1
872	xorps	%xmm0,%xmm8
873	leaq	32(%r11),%r11
874	xorps	%xmm8,%xmm3
875.Loop_enc1_6:
876.byte	102,15,56,220,217
877	decl	%eax
878	movups	(%r11),%xmm1
879	leaq	16(%r11),%r11
880	jnz	.Loop_enc1_6
881.byte	102,15,56,221,217
882	movups	%xmm3,(%r9)
883	.byte	0xf3,0xc3
884.size	aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks
885.globl	aesni_ctr32_encrypt_blocks
886.type	aesni_ctr32_encrypt_blocks,@function
887.align	16
888aesni_ctr32_encrypt_blocks:
889	cmpq	$1,%rdx
890	je	.Lctr32_one_shortcut
891
892	movdqu	(%r8),%xmm14
893	movdqa	.Lbswap_mask(%rip),%xmm15
894	xorl	%eax,%eax
895.byte	102,69,15,58,22,242,3
896.byte	102,68,15,58,34,240,3
897
898	movl	240(%rcx),%eax
899	bswapl	%r10d
900	pxor	%xmm12,%xmm12
901	pxor	%xmm13,%xmm13
902.byte	102,69,15,58,34,226,0
903	leaq	3(%r10),%r11
904.byte	102,69,15,58,34,235,0
905	incl	%r10d
906.byte	102,69,15,58,34,226,1
907	incq	%r11
908.byte	102,69,15,58,34,235,1
909	incl	%r10d
910.byte	102,69,15,58,34,226,2
911	incq	%r11
912.byte	102,69,15,58,34,235,2
913	movdqa	%xmm12,-40(%rsp)
914.byte	102,69,15,56,0,231
915	movdqa	%xmm13,-24(%rsp)
916.byte	102,69,15,56,0,239
917
918	pshufd	$192,%xmm12,%xmm2
919	pshufd	$128,%xmm12,%xmm3
920	pshufd	$64,%xmm12,%xmm4
921	cmpq	$6,%rdx
922	jb	.Lctr32_tail
923	shrl	$1,%eax
924	movq	%rcx,%r11
925	movl	%eax,%r10d
926	subq	$6,%rdx
927	jmp	.Lctr32_loop6
928
929.align	16
930.Lctr32_loop6:
931	pshufd	$192,%xmm13,%xmm5
932	por	%xmm14,%xmm2
933	movups	(%r11),%xmm0
934	pshufd	$128,%xmm13,%xmm6
935	por	%xmm14,%xmm3
936	movups	16(%r11),%xmm1
937	pshufd	$64,%xmm13,%xmm7
938	por	%xmm14,%xmm4
939	por	%xmm14,%xmm5
940	xorps	%xmm0,%xmm2
941	por	%xmm14,%xmm6
942	por	%xmm14,%xmm7
943
944
945
946
947	pxor	%xmm0,%xmm3
948.byte	102,15,56,220,209
949	leaq	32(%r11),%rcx
950	pxor	%xmm0,%xmm4
951.byte	102,15,56,220,217
952	movdqa	.Lincrement32(%rip),%xmm13
953	pxor	%xmm0,%xmm5
954.byte	102,15,56,220,225
955	movdqa	-40(%rsp),%xmm12
956	pxor	%xmm0,%xmm6
957.byte	102,15,56,220,233
958	pxor	%xmm0,%xmm7
959	movups	(%rcx),%xmm0
960	decl	%eax
961.byte	102,15,56,220,241
962.byte	102,15,56,220,249
963	jmp	.Lctr32_enc_loop6_enter
964.align	16
965.Lctr32_enc_loop6:
966.byte	102,15,56,220,209
967.byte	102,15,56,220,217
968	decl	%eax
969.byte	102,15,56,220,225
970.byte	102,15,56,220,233
971.byte	102,15,56,220,241
972.byte	102,15,56,220,249
973.Lctr32_enc_loop6_enter:
974	movups	16(%rcx),%xmm1
975.byte	102,15,56,220,208
976.byte	102,15,56,220,216
977	leaq	32(%rcx),%rcx
978.byte	102,15,56,220,224
979.byte	102,15,56,220,232
980.byte	102,15,56,220,240
981.byte	102,15,56,220,248
982	movups	(%rcx),%xmm0
983	jnz	.Lctr32_enc_loop6
984
985.byte	102,15,56,220,209
986	paddd	%xmm13,%xmm12
987.byte	102,15,56,220,217
988	paddd	-24(%rsp),%xmm13
989.byte	102,15,56,220,225
990	movdqa	%xmm12,-40(%rsp)
991.byte	102,15,56,220,233
992	movdqa	%xmm13,-24(%rsp)
993.byte	102,15,56,220,241
994.byte	102,69,15,56,0,231
995.byte	102,15,56,220,249
996.byte	102,69,15,56,0,239
997
998.byte	102,15,56,221,208
999	movups	(%rdi),%xmm8
1000.byte	102,15,56,221,216
1001	movups	16(%rdi),%xmm9
1002.byte	102,15,56,221,224
1003	movups	32(%rdi),%xmm10
1004.byte	102,15,56,221,232
1005	movups	48(%rdi),%xmm11
1006.byte	102,15,56,221,240
1007	movups	64(%rdi),%xmm1
1008.byte	102,15,56,221,248
1009	movups	80(%rdi),%xmm0
1010	leaq	96(%rdi),%rdi
1011
1012	xorps	%xmm2,%xmm8
1013	pshufd	$192,%xmm12,%xmm2
1014	xorps	%xmm3,%xmm9
1015	pshufd	$128,%xmm12,%xmm3
1016	movups	%xmm8,(%rsi)
1017	xorps	%xmm4,%xmm10
1018	pshufd	$64,%xmm12,%xmm4
1019	movups	%xmm9,16(%rsi)
1020	xorps	%xmm5,%xmm11
1021	movups	%xmm10,32(%rsi)
1022	xorps	%xmm6,%xmm1
1023	movups	%xmm11,48(%rsi)
1024	xorps	%xmm7,%xmm0
1025	movups	%xmm1,64(%rsi)
1026	movups	%xmm0,80(%rsi)
1027	leaq	96(%rsi),%rsi
1028	movl	%r10d,%eax
1029	subq	$6,%rdx
1030	jnc	.Lctr32_loop6
1031
1032	addq	$6,%rdx
1033	jz	.Lctr32_done
1034	movq	%r11,%rcx
1035	leal	1(%rax,%rax,1),%eax
1036
1037.Lctr32_tail:
1038	por	%xmm14,%xmm2
1039	movups	(%rdi),%xmm8
1040	cmpq	$2,%rdx
1041	jb	.Lctr32_one
1042
1043	por	%xmm14,%xmm3
1044	movups	16(%rdi),%xmm9
1045	je	.Lctr32_two
1046
1047	pshufd	$192,%xmm13,%xmm5
1048	por	%xmm14,%xmm4
1049	movups	32(%rdi),%xmm10
1050	cmpq	$4,%rdx
1051	jb	.Lctr32_three
1052
1053	pshufd	$128,%xmm13,%xmm6
1054	por	%xmm14,%xmm5
1055	movups	48(%rdi),%xmm11
1056	je	.Lctr32_four
1057
1058	por	%xmm14,%xmm6
1059	xorps	%xmm7,%xmm7
1060
1061	call	_aesni_encrypt6
1062
1063	movups	64(%rdi),%xmm1
1064	xorps	%xmm2,%xmm8
1065	xorps	%xmm3,%xmm9
1066	movups	%xmm8,(%rsi)
1067	xorps	%xmm4,%xmm10
1068	movups	%xmm9,16(%rsi)
1069	xorps	%xmm5,%xmm11
1070	movups	%xmm10,32(%rsi)
1071	xorps	%xmm6,%xmm1
1072	movups	%xmm11,48(%rsi)
1073	movups	%xmm1,64(%rsi)
1074	jmp	.Lctr32_done
1075
1076.align	16
1077.Lctr32_one_shortcut:
1078	movups	(%r8),%xmm2
1079	movups	(%rdi),%xmm8
1080	movl	240(%rcx),%eax
1081.Lctr32_one:
1082	movups	(%rcx),%xmm0
1083	movups	16(%rcx),%xmm1
1084	leaq	32(%rcx),%rcx
1085	xorps	%xmm0,%xmm2
1086.Loop_enc1_7:
1087.byte	102,15,56,220,209
1088	decl	%eax
1089	movups	(%rcx),%xmm1
1090	leaq	16(%rcx),%rcx
1091	jnz	.Loop_enc1_7
1092.byte	102,15,56,221,209
1093	xorps	%xmm2,%xmm8
1094	movups	%xmm8,(%rsi)
1095	jmp	.Lctr32_done
1096
1097.align	16
1098.Lctr32_two:
1099	xorps	%xmm4,%xmm4
1100	call	_aesni_encrypt3
1101	xorps	%xmm2,%xmm8
1102	xorps	%xmm3,%xmm9
1103	movups	%xmm8,(%rsi)
1104	movups	%xmm9,16(%rsi)
1105	jmp	.Lctr32_done
1106
1107.align	16
1108.Lctr32_three:
1109	call	_aesni_encrypt3
1110	xorps	%xmm2,%xmm8
1111	xorps	%xmm3,%xmm9
1112	movups	%xmm8,(%rsi)
1113	xorps	%xmm4,%xmm10
1114	movups	%xmm9,16(%rsi)
1115	movups	%xmm10,32(%rsi)
1116	jmp	.Lctr32_done
1117
1118.align	16
1119.Lctr32_four:
1120	call	_aesni_encrypt4
1121	xorps	%xmm2,%xmm8
1122	xorps	%xmm3,%xmm9
1123	movups	%xmm8,(%rsi)
1124	xorps	%xmm4,%xmm10
1125	movups	%xmm9,16(%rsi)
1126	xorps	%xmm5,%xmm11
1127	movups	%xmm10,32(%rsi)
1128	movups	%xmm11,48(%rsi)
1129
1130.Lctr32_done:
1131	.byte	0xf3,0xc3
1132.size	aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
1133.globl	aesni_xts_encrypt
1134.type	aesni_xts_encrypt,@function
1135.align	16
1136aesni_xts_encrypt:
1137	leaq	-104(%rsp),%rsp
1138	movups	(%r9),%xmm15
1139	movl	240(%r8),%eax
1140	movl	240(%rcx),%r10d
1141	movups	(%r8),%xmm0
1142	movups	16(%r8),%xmm1
1143	leaq	32(%r8),%r8
1144	xorps	%xmm0,%xmm15
1145.Loop_enc1_8:
1146.byte	102,68,15,56,220,249
1147	decl	%eax
1148	movups	(%r8),%xmm1
1149	leaq	16(%r8),%r8
1150	jnz	.Loop_enc1_8
1151.byte	102,68,15,56,221,249
1152	movq	%rcx,%r11
1153	movl	%r10d,%eax
1154	movq	%rdx,%r9
1155	andq	$-16,%rdx
1156
1157	movdqa	.Lxts_magic(%rip),%xmm8
1158	pxor	%xmm14,%xmm14
1159	pcmpgtd	%xmm15,%xmm14
1160	pshufd	$19,%xmm14,%xmm9
1161	pxor	%xmm14,%xmm14
1162	movdqa	%xmm15,%xmm10
1163	paddq	%xmm15,%xmm15
1164	pand	%xmm8,%xmm9
1165	pcmpgtd	%xmm15,%xmm14
1166	pxor	%xmm9,%xmm15
1167	pshufd	$19,%xmm14,%xmm9
1168	pxor	%xmm14,%xmm14
1169	movdqa	%xmm15,%xmm11
1170	paddq	%xmm15,%xmm15
1171	pand	%xmm8,%xmm9
1172	pcmpgtd	%xmm15,%xmm14
1173	pxor	%xmm9,%xmm15
1174	pshufd	$19,%xmm14,%xmm9
1175	pxor	%xmm14,%xmm14
1176	movdqa	%xmm15,%xmm12
1177	paddq	%xmm15,%xmm15
1178	pand	%xmm8,%xmm9
1179	pcmpgtd	%xmm15,%xmm14
1180	pxor	%xmm9,%xmm15
1181	pshufd	$19,%xmm14,%xmm9
1182	pxor	%xmm14,%xmm14
1183	movdqa	%xmm15,%xmm13
1184	paddq	%xmm15,%xmm15
1185	pand	%xmm8,%xmm9
1186	pcmpgtd	%xmm15,%xmm14
1187	pxor	%xmm9,%xmm15
1188	subq	$96,%rdx
1189	jc	.Lxts_enc_short
1190
1191	shrl	$1,%eax
1192	subl	$1,%eax
1193	movl	%eax,%r10d
1194	jmp	.Lxts_enc_grandloop
1195
1196.align	16
1197.Lxts_enc_grandloop:
1198	pshufd	$19,%xmm14,%xmm9
1199	movdqa	%xmm15,%xmm14
1200	paddq	%xmm15,%xmm15
1201	movdqu	0(%rdi),%xmm2
1202	pand	%xmm8,%xmm9
1203	movdqu	16(%rdi),%xmm3
1204	pxor	%xmm9,%xmm15
1205
1206	movdqu	32(%rdi),%xmm4
1207	pxor	%xmm10,%xmm2
1208	movdqu	48(%rdi),%xmm5
1209	pxor	%xmm11,%xmm3
1210	movdqu	64(%rdi),%xmm6
1211	pxor	%xmm12,%xmm4
1212	movdqu	80(%rdi),%xmm7
1213	leaq	96(%rdi),%rdi
1214	pxor	%xmm13,%xmm5
1215	movups	(%r11),%xmm0
1216	pxor	%xmm14,%xmm6
1217	pxor	%xmm15,%xmm7
1218
1219
1220
1221	movups	16(%r11),%xmm1
1222	pxor	%xmm0,%xmm2
1223	pxor	%xmm0,%xmm3
1224	movdqa	%xmm10,0(%rsp)
1225.byte	102,15,56,220,209
1226	leaq	32(%r11),%rcx
1227	pxor	%xmm0,%xmm4
1228	movdqa	%xmm11,16(%rsp)
1229.byte	102,15,56,220,217
1230	pxor	%xmm0,%xmm5
1231	movdqa	%xmm12,32(%rsp)
1232.byte	102,15,56,220,225
1233	pxor	%xmm0,%xmm6
1234	movdqa	%xmm13,48(%rsp)
1235.byte	102,15,56,220,233
1236	pxor	%xmm0,%xmm7
1237	movups	(%rcx),%xmm0
1238	decl	%eax
1239	movdqa	%xmm14,64(%rsp)
1240.byte	102,15,56,220,241
1241	movdqa	%xmm15,80(%rsp)
1242.byte	102,15,56,220,249
1243	pxor	%xmm14,%xmm14
1244	pcmpgtd	%xmm15,%xmm14
1245	jmp	.Lxts_enc_loop6_enter
1246
1247.align	16
1248.Lxts_enc_loop6:
1249.byte	102,15,56,220,209
1250.byte	102,15,56,220,217
1251	decl	%eax
1252.byte	102,15,56,220,225
1253.byte	102,15,56,220,233
1254.byte	102,15,56,220,241
1255.byte	102,15,56,220,249
1256.Lxts_enc_loop6_enter:
1257	movups	16(%rcx),%xmm1
1258.byte	102,15,56,220,208
1259.byte	102,15,56,220,216
1260	leaq	32(%rcx),%rcx
1261.byte	102,15,56,220,224
1262.byte	102,15,56,220,232
1263.byte	102,15,56,220,240
1264.byte	102,15,56,220,248
1265	movups	(%rcx),%xmm0
1266	jnz	.Lxts_enc_loop6
1267
1268	pshufd	$19,%xmm14,%xmm9
1269	pxor	%xmm14,%xmm14
1270	paddq	%xmm15,%xmm15
1271.byte	102,15,56,220,209
1272	pand	%xmm8,%xmm9
1273.byte	102,15,56,220,217
1274	pcmpgtd	%xmm15,%xmm14
1275.byte	102,15,56,220,225
1276	pxor	%xmm9,%xmm15
1277.byte	102,15,56,220,233
1278.byte	102,15,56,220,241
1279.byte	102,15,56,220,249
1280	movups	16(%rcx),%xmm1
1281
1282	pshufd	$19,%xmm14,%xmm9
1283	pxor	%xmm14,%xmm14
1284	movdqa	%xmm15,%xmm10
1285	paddq	%xmm15,%xmm15
1286.byte	102,15,56,220,208
1287	pand	%xmm8,%xmm9
1288.byte	102,15,56,220,216
1289	pcmpgtd	%xmm15,%xmm14
1290.byte	102,15,56,220,224
1291	pxor	%xmm9,%xmm15
1292.byte	102,15,56,220,232
1293.byte	102,15,56,220,240
1294.byte	102,15,56,220,248
1295	movups	32(%rcx),%xmm0
1296
1297	pshufd	$19,%xmm14,%xmm9
1298	pxor	%xmm14,%xmm14
1299	movdqa	%xmm15,%xmm11
1300	paddq	%xmm15,%xmm15
1301.byte	102,15,56,220,209
1302	pand	%xmm8,%xmm9
1303.byte	102,15,56,220,217
1304	pcmpgtd	%xmm15,%xmm14
1305.byte	102,15,56,220,225
1306	pxor	%xmm9,%xmm15
1307.byte	102,15,56,220,233
1308.byte	102,15,56,220,241
1309.byte	102,15,56,220,249
1310
1311	pshufd	$19,%xmm14,%xmm9
1312	pxor	%xmm14,%xmm14
1313	movdqa	%xmm15,%xmm12
1314	paddq	%xmm15,%xmm15
1315.byte	102,15,56,221,208
1316	pand	%xmm8,%xmm9
1317.byte	102,15,56,221,216
1318	pcmpgtd	%xmm15,%xmm14
1319.byte	102,15,56,221,224
1320	pxor	%xmm9,%xmm15
1321.byte	102,15,56,221,232
1322.byte	102,15,56,221,240
1323.byte	102,15,56,221,248
1324
1325	pshufd	$19,%xmm14,%xmm9
1326	pxor	%xmm14,%xmm14
1327	movdqa	%xmm15,%xmm13
1328	paddq	%xmm15,%xmm15
1329	xorps	0(%rsp),%xmm2
1330	pand	%xmm8,%xmm9
1331	xorps	16(%rsp),%xmm3
1332	pcmpgtd	%xmm15,%xmm14
1333	pxor	%xmm9,%xmm15
1334
1335	xorps	32(%rsp),%xmm4
1336	movups	%xmm2,0(%rsi)
1337	xorps	48(%rsp),%xmm5
1338	movups	%xmm3,16(%rsi)
1339	xorps	64(%rsp),%xmm6
1340	movups	%xmm4,32(%rsi)
1341	xorps	80(%rsp),%xmm7
1342	movups	%xmm5,48(%rsi)
1343	movl	%r10d,%eax
1344	movups	%xmm6,64(%rsi)
1345	movups	%xmm7,80(%rsi)
1346	leaq	96(%rsi),%rsi
1347	subq	$96,%rdx
1348	jnc	.Lxts_enc_grandloop
1349
1350	leal	3(%rax,%rax,1),%eax
1351	movq	%r11,%rcx
1352	movl	%eax,%r10d
1353
1354.Lxts_enc_short:
1355	addq	$96,%rdx
1356	jz	.Lxts_enc_done
1357
1358	cmpq	$32,%rdx
1359	jb	.Lxts_enc_one
1360	je	.Lxts_enc_two
1361
1362	cmpq	$64,%rdx
1363	jb	.Lxts_enc_three
1364	je	.Lxts_enc_four
1365
1366	pshufd	$19,%xmm14,%xmm9
1367	movdqa	%xmm15,%xmm14
1368	paddq	%xmm15,%xmm15
1369	movdqu	(%rdi),%xmm2
1370	pand	%xmm8,%xmm9
1371	movdqu	16(%rdi),%xmm3
1372	pxor	%xmm9,%xmm15
1373
1374	movdqu	32(%rdi),%xmm4
1375	pxor	%xmm10,%xmm2
1376	movdqu	48(%rdi),%xmm5
1377	pxor	%xmm11,%xmm3
1378	movdqu	64(%rdi),%xmm6
1379	leaq	80(%rdi),%rdi
1380	pxor	%xmm12,%xmm4
1381	pxor	%xmm13,%xmm5
1382	pxor	%xmm14,%xmm6
1383
1384	call	_aesni_encrypt6
1385
1386	xorps	%xmm10,%xmm2
1387	movdqa	%xmm15,%xmm10
1388	xorps	%xmm11,%xmm3
1389	xorps	%xmm12,%xmm4
1390	movdqu	%xmm2,(%rsi)
1391	xorps	%xmm13,%xmm5
1392	movdqu	%xmm3,16(%rsi)
1393	xorps	%xmm14,%xmm6
1394	movdqu	%xmm4,32(%rsi)
1395	movdqu	%xmm5,48(%rsi)
1396	movdqu	%xmm6,64(%rsi)
1397	leaq	80(%rsi),%rsi
1398	jmp	.Lxts_enc_done
1399
1400.align	16
1401.Lxts_enc_one:
1402	movups	(%rdi),%xmm2
1403	leaq	16(%rdi),%rdi
1404	xorps	%xmm10,%xmm2
1405	movups	(%rcx),%xmm0
1406	movups	16(%rcx),%xmm1
1407	leaq	32(%rcx),%rcx
1408	xorps	%xmm0,%xmm2
1409.Loop_enc1_9:
1410.byte	102,15,56,220,209
1411	decl	%eax
1412	movups	(%rcx),%xmm1
1413	leaq	16(%rcx),%rcx
1414	jnz	.Loop_enc1_9
1415.byte	102,15,56,221,209
1416	xorps	%xmm10,%xmm2
1417	movdqa	%xmm11,%xmm10
1418	movups	%xmm2,(%rsi)
1419	leaq	16(%rsi),%rsi
1420	jmp	.Lxts_enc_done
1421
1422.align	16
1423.Lxts_enc_two:
1424	movups	(%rdi),%xmm2
1425	movups	16(%rdi),%xmm3
1426	leaq	32(%rdi),%rdi
1427	xorps	%xmm10,%xmm2
1428	xorps	%xmm11,%xmm3
1429
1430	call	_aesni_encrypt3
1431
1432	xorps	%xmm10,%xmm2
1433	movdqa	%xmm12,%xmm10
1434	xorps	%xmm11,%xmm3
1435	movups	%xmm2,(%rsi)
1436	movups	%xmm3,16(%rsi)
1437	leaq	32(%rsi),%rsi
1438	jmp	.Lxts_enc_done
1439
1440.align	16
1441.Lxts_enc_three:
1442	movups	(%rdi),%xmm2
1443	movups	16(%rdi),%xmm3
1444	movups	32(%rdi),%xmm4
1445	leaq	48(%rdi),%rdi
1446	xorps	%xmm10,%xmm2
1447	xorps	%xmm11,%xmm3
1448	xorps	%xmm12,%xmm4
1449
1450	call	_aesni_encrypt3
1451
1452	xorps	%xmm10,%xmm2
1453	movdqa	%xmm13,%xmm10
1454	xorps	%xmm11,%xmm3
1455	xorps	%xmm12,%xmm4
1456	movups	%xmm2,(%rsi)
1457	movups	%xmm3,16(%rsi)
1458	movups	%xmm4,32(%rsi)
1459	leaq	48(%rsi),%rsi
1460	jmp	.Lxts_enc_done
1461
1462.align	16
1463.Lxts_enc_four:
1464	movups	(%rdi),%xmm2
1465	movups	16(%rdi),%xmm3
1466	movups	32(%rdi),%xmm4
1467	xorps	%xmm10,%xmm2
1468	movups	48(%rdi),%xmm5
1469	leaq	64(%rdi),%rdi
1470	xorps	%xmm11,%xmm3
1471	xorps	%xmm12,%xmm4
1472	xorps	%xmm13,%xmm5
1473
1474	call	_aesni_encrypt4
1475
1476	xorps	%xmm10,%xmm2
1477	movdqa	%xmm15,%xmm10
1478	xorps	%xmm11,%xmm3
1479	xorps	%xmm12,%xmm4
1480	movups	%xmm2,(%rsi)
1481	xorps	%xmm13,%xmm5
1482	movups	%xmm3,16(%rsi)
1483	movups	%xmm4,32(%rsi)
1484	movups	%xmm5,48(%rsi)
1485	leaq	64(%rsi),%rsi
1486	jmp	.Lxts_enc_done
1487
1488.align	16
1489.Lxts_enc_done:
1490	andq	$15,%r9
1491	jz	.Lxts_enc_ret
1492	movq	%r9,%rdx
1493
1494.Lxts_enc_steal:
1495	movzbl	(%rdi),%eax
1496	movzbl	-16(%rsi),%ecx
1497	leaq	1(%rdi),%rdi
1498	movb	%al,-16(%rsi)
1499	movb	%cl,0(%rsi)
1500	leaq	1(%rsi),%rsi
1501	subq	$1,%rdx
1502	jnz	.Lxts_enc_steal
1503
1504	subq	%r9,%rsi
1505	movq	%r11,%rcx
1506	movl	%r10d,%eax
1507
1508	movups	-16(%rsi),%xmm2
1509	xorps	%xmm10,%xmm2
1510	movups	(%rcx),%xmm0
1511	movups	16(%rcx),%xmm1
1512	leaq	32(%rcx),%rcx
1513	xorps	%xmm0,%xmm2
1514.Loop_enc1_10:
1515.byte	102,15,56,220,209
1516	decl	%eax
1517	movups	(%rcx),%xmm1
1518	leaq	16(%rcx),%rcx
1519	jnz	.Loop_enc1_10
1520.byte	102,15,56,221,209
1521	xorps	%xmm10,%xmm2
1522	movups	%xmm2,-16(%rsi)
1523
1524.Lxts_enc_ret:
1525	leaq	104(%rsp),%rsp
1526.Lxts_enc_epilogue:
1527	.byte	0xf3,0xc3
1528.size	aesni_xts_encrypt,.-aesni_xts_encrypt
1529.globl	aesni_xts_decrypt
1530.type	aesni_xts_decrypt,@function
1531.align	16
1532aesni_xts_decrypt:
1533	leaq	-104(%rsp),%rsp
1534	movups	(%r9),%xmm15
1535	movl	240(%r8),%eax
1536	movl	240(%rcx),%r10d
1537	movups	(%r8),%xmm0
1538	movups	16(%r8),%xmm1
1539	leaq	32(%r8),%r8
1540	xorps	%xmm0,%xmm15
1541.Loop_enc1_11:
1542.byte	102,68,15,56,220,249
1543	decl	%eax
1544	movups	(%r8),%xmm1
1545	leaq	16(%r8),%r8
1546	jnz	.Loop_enc1_11
1547.byte	102,68,15,56,221,249
1548	xorl	%eax,%eax
1549	testq	$15,%rdx
1550	setnz	%al
1551	shlq	$4,%rax
1552	subq	%rax,%rdx
1553
1554	movq	%rcx,%r11
1555	movl	%r10d,%eax
1556	movq	%rdx,%r9
1557	andq	$-16,%rdx
1558
1559	movdqa	.Lxts_magic(%rip),%xmm8
1560	pxor	%xmm14,%xmm14
1561	pcmpgtd	%xmm15,%xmm14
1562	pshufd	$19,%xmm14,%xmm9
1563	pxor	%xmm14,%xmm14
1564	movdqa	%xmm15,%xmm10
1565	paddq	%xmm15,%xmm15
1566	pand	%xmm8,%xmm9
1567	pcmpgtd	%xmm15,%xmm14
1568	pxor	%xmm9,%xmm15
1569	pshufd	$19,%xmm14,%xmm9
1570	pxor	%xmm14,%xmm14
1571	movdqa	%xmm15,%xmm11
1572	paddq	%xmm15,%xmm15
1573	pand	%xmm8,%xmm9
1574	pcmpgtd	%xmm15,%xmm14
1575	pxor	%xmm9,%xmm15
1576	pshufd	$19,%xmm14,%xmm9
1577	pxor	%xmm14,%xmm14
1578	movdqa	%xmm15,%xmm12
1579	paddq	%xmm15,%xmm15
1580	pand	%xmm8,%xmm9
1581	pcmpgtd	%xmm15,%xmm14
1582	pxor	%xmm9,%xmm15
1583	pshufd	$19,%xmm14,%xmm9
1584	pxor	%xmm14,%xmm14
1585	movdqa	%xmm15,%xmm13
1586	paddq	%xmm15,%xmm15
1587	pand	%xmm8,%xmm9
1588	pcmpgtd	%xmm15,%xmm14
1589	pxor	%xmm9,%xmm15
1590	subq	$96,%rdx
1591	jc	.Lxts_dec_short
1592
1593	shrl	$1,%eax
1594	subl	$1,%eax
1595	movl	%eax,%r10d
1596	jmp	.Lxts_dec_grandloop
1597
1598.align	16
1599.Lxts_dec_grandloop:
1600	pshufd	$19,%xmm14,%xmm9
1601	movdqa	%xmm15,%xmm14
1602	paddq	%xmm15,%xmm15
1603	movdqu	0(%rdi),%xmm2
1604	pand	%xmm8,%xmm9
1605	movdqu	16(%rdi),%xmm3
1606	pxor	%xmm9,%xmm15
1607
1608	movdqu	32(%rdi),%xmm4
1609	pxor	%xmm10,%xmm2
1610	movdqu	48(%rdi),%xmm5
1611	pxor	%xmm11,%xmm3
1612	movdqu	64(%rdi),%xmm6
1613	pxor	%xmm12,%xmm4
1614	movdqu	80(%rdi),%xmm7
1615	leaq	96(%rdi),%rdi
1616	pxor	%xmm13,%xmm5
1617	movups	(%r11),%xmm0
1618	pxor	%xmm14,%xmm6
1619	pxor	%xmm15,%xmm7
1620
1621
1622
1623	movups	16(%r11),%xmm1
1624	pxor	%xmm0,%xmm2
1625	pxor	%xmm0,%xmm3
1626	movdqa	%xmm10,0(%rsp)
1627.byte	102,15,56,222,209
1628	leaq	32(%r11),%rcx
1629	pxor	%xmm0,%xmm4
1630	movdqa	%xmm11,16(%rsp)
1631.byte	102,15,56,222,217
1632	pxor	%xmm0,%xmm5
1633	movdqa	%xmm12,32(%rsp)
1634.byte	102,15,56,222,225
1635	pxor	%xmm0,%xmm6
1636	movdqa	%xmm13,48(%rsp)
1637.byte	102,15,56,222,233
1638	pxor	%xmm0,%xmm7
1639	movups	(%rcx),%xmm0
1640	decl	%eax
1641	movdqa	%xmm14,64(%rsp)
1642.byte	102,15,56,222,241
1643	movdqa	%xmm15,80(%rsp)
1644.byte	102,15,56,222,249
1645	pxor	%xmm14,%xmm14
1646	pcmpgtd	%xmm15,%xmm14
1647	jmp	.Lxts_dec_loop6_enter
1648
1649.align	16
1650.Lxts_dec_loop6:
1651.byte	102,15,56,222,209
1652.byte	102,15,56,222,217
1653	decl	%eax
1654.byte	102,15,56,222,225
1655.byte	102,15,56,222,233
1656.byte	102,15,56,222,241
1657.byte	102,15,56,222,249
1658.Lxts_dec_loop6_enter:
1659	movups	16(%rcx),%xmm1
1660.byte	102,15,56,222,208
1661.byte	102,15,56,222,216
1662	leaq	32(%rcx),%rcx
1663.byte	102,15,56,222,224
1664.byte	102,15,56,222,232
1665.byte	102,15,56,222,240
1666.byte	102,15,56,222,248
1667	movups	(%rcx),%xmm0
1668	jnz	.Lxts_dec_loop6
1669
1670	pshufd	$19,%xmm14,%xmm9
1671	pxor	%xmm14,%xmm14
1672	paddq	%xmm15,%xmm15
1673.byte	102,15,56,222,209
1674	pand	%xmm8,%xmm9
1675.byte	102,15,56,222,217
1676	pcmpgtd	%xmm15,%xmm14
1677.byte	102,15,56,222,225
1678	pxor	%xmm9,%xmm15
1679.byte	102,15,56,222,233
1680.byte	102,15,56,222,241
1681.byte	102,15,56,222,249
1682	movups	16(%rcx),%xmm1
1683
1684	pshufd	$19,%xmm14,%xmm9
1685	pxor	%xmm14,%xmm14
1686	movdqa	%xmm15,%xmm10
1687	paddq	%xmm15,%xmm15
1688.byte	102,15,56,222,208
1689	pand	%xmm8,%xmm9
1690.byte	102,15,56,222,216
1691	pcmpgtd	%xmm15,%xmm14
1692.byte	102,15,56,222,224
1693	pxor	%xmm9,%xmm15
1694.byte	102,15,56,222,232
1695.byte	102,15,56,222,240
1696.byte	102,15,56,222,248
1697	movups	32(%rcx),%xmm0
1698
1699	pshufd	$19,%xmm14,%xmm9
1700	pxor	%xmm14,%xmm14
1701	movdqa	%xmm15,%xmm11
1702	paddq	%xmm15,%xmm15
1703.byte	102,15,56,222,209
1704	pand	%xmm8,%xmm9
1705.byte	102,15,56,222,217
1706	pcmpgtd	%xmm15,%xmm14
1707.byte	102,15,56,222,225
1708	pxor	%xmm9,%xmm15
1709.byte	102,15,56,222,233
1710.byte	102,15,56,222,241
1711.byte	102,15,56,222,249
1712
1713	pshufd	$19,%xmm14,%xmm9
1714	pxor	%xmm14,%xmm14
1715	movdqa	%xmm15,%xmm12
1716	paddq	%xmm15,%xmm15
1717.byte	102,15,56,223,208
1718	pand	%xmm8,%xmm9
1719.byte	102,15,56,223,216
1720	pcmpgtd	%xmm15,%xmm14
1721.byte	102,15,56,223,224
1722	pxor	%xmm9,%xmm15
1723.byte	102,15,56,223,232
1724.byte	102,15,56,223,240
1725.byte	102,15,56,223,248
1726
1727	pshufd	$19,%xmm14,%xmm9
1728	pxor	%xmm14,%xmm14
1729	movdqa	%xmm15,%xmm13
1730	paddq	%xmm15,%xmm15
1731	xorps	0(%rsp),%xmm2
1732	pand	%xmm8,%xmm9
1733	xorps	16(%rsp),%xmm3
1734	pcmpgtd	%xmm15,%xmm14
1735	pxor	%xmm9,%xmm15
1736
1737	xorps	32(%rsp),%xmm4
1738	movups	%xmm2,0(%rsi)
1739	xorps	48(%rsp),%xmm5
1740	movups	%xmm3,16(%rsi)
1741	xorps	64(%rsp),%xmm6
1742	movups	%xmm4,32(%rsi)
1743	xorps	80(%rsp),%xmm7
1744	movups	%xmm5,48(%rsi)
1745	movl	%r10d,%eax
1746	movups	%xmm6,64(%rsi)
1747	movups	%xmm7,80(%rsi)
1748	leaq	96(%rsi),%rsi
1749	subq	$96,%rdx
1750	jnc	.Lxts_dec_grandloop
1751
1752	leal	3(%rax,%rax,1),%eax
1753	movq	%r11,%rcx
1754	movl	%eax,%r10d
1755
1756.Lxts_dec_short:
1757	addq	$96,%rdx
1758	jz	.Lxts_dec_done
1759
1760	cmpq	$32,%rdx
1761	jb	.Lxts_dec_one
1762	je	.Lxts_dec_two
1763
1764	cmpq	$64,%rdx
1765	jb	.Lxts_dec_three
1766	je	.Lxts_dec_four
1767
1768	pshufd	$19,%xmm14,%xmm9
1769	movdqa	%xmm15,%xmm14
1770	paddq	%xmm15,%xmm15
1771	movdqu	(%rdi),%xmm2
1772	pand	%xmm8,%xmm9
1773	movdqu	16(%rdi),%xmm3
1774	pxor	%xmm9,%xmm15
1775
1776	movdqu	32(%rdi),%xmm4
1777	pxor	%xmm10,%xmm2
1778	movdqu	48(%rdi),%xmm5
1779	pxor	%xmm11,%xmm3
1780	movdqu	64(%rdi),%xmm6
1781	leaq	80(%rdi),%rdi
1782	pxor	%xmm12,%xmm4
1783	pxor	%xmm13,%xmm5
1784	pxor	%xmm14,%xmm6
1785
1786	call	_aesni_decrypt6
1787
1788	xorps	%xmm10,%xmm2
1789	xorps	%xmm11,%xmm3
1790	xorps	%xmm12,%xmm4
1791	movdqu	%xmm2,(%rsi)
1792	xorps	%xmm13,%xmm5
1793	movdqu	%xmm3,16(%rsi)
1794	xorps	%xmm14,%xmm6
1795	movdqu	%xmm4,32(%rsi)
1796	pxor	%xmm14,%xmm14
1797	movdqu	%xmm5,48(%rsi)
1798	pcmpgtd	%xmm15,%xmm14
1799	movdqu	%xmm6,64(%rsi)
1800	leaq	80(%rsi),%rsi
1801	pshufd	$19,%xmm14,%xmm11
1802	andq	$15,%r9
1803	jz	.Lxts_dec_ret
1804
1805	movdqa	%xmm15,%xmm10
1806	paddq	%xmm15,%xmm15
1807	pand	%xmm8,%xmm11
1808	pxor	%xmm15,%xmm11
1809	jmp	.Lxts_dec_done2
1810
1811.align	16
1812.Lxts_dec_one:
1813	movups	(%rdi),%xmm2
1814	leaq	16(%rdi),%rdi
1815	xorps	%xmm10,%xmm2
1816	movups	(%rcx),%xmm0
1817	movups	16(%rcx),%xmm1
1818	leaq	32(%rcx),%rcx
1819	xorps	%xmm0,%xmm2
1820.Loop_dec1_12:
1821.byte	102,15,56,222,209
1822	decl	%eax
1823	movups	(%rcx),%xmm1
1824	leaq	16(%rcx),%rcx
1825	jnz	.Loop_dec1_12
1826.byte	102,15,56,223,209
1827	xorps	%xmm10,%xmm2
1828	movdqa	%xmm11,%xmm10
1829	movups	%xmm2,(%rsi)
1830	movdqa	%xmm12,%xmm11
1831	leaq	16(%rsi),%rsi
1832	jmp	.Lxts_dec_done
1833
1834.align	16
1835.Lxts_dec_two:
1836	movups	(%rdi),%xmm2
1837	movups	16(%rdi),%xmm3
1838	leaq	32(%rdi),%rdi
1839	xorps	%xmm10,%xmm2
1840	xorps	%xmm11,%xmm3
1841
1842	call	_aesni_decrypt3
1843
1844	xorps	%xmm10,%xmm2
1845	movdqa	%xmm12,%xmm10
1846	xorps	%xmm11,%xmm3
1847	movdqa	%xmm13,%xmm11
1848	movups	%xmm2,(%rsi)
1849	movups	%xmm3,16(%rsi)
1850	leaq	32(%rsi),%rsi
1851	jmp	.Lxts_dec_done
1852
1853.align	16
1854.Lxts_dec_three:
1855	movups	(%rdi),%xmm2
1856	movups	16(%rdi),%xmm3
1857	movups	32(%rdi),%xmm4
1858	leaq	48(%rdi),%rdi
1859	xorps	%xmm10,%xmm2
1860	xorps	%xmm11,%xmm3
1861	xorps	%xmm12,%xmm4
1862
1863	call	_aesni_decrypt3
1864
1865	xorps	%xmm10,%xmm2
1866	movdqa	%xmm13,%xmm10
1867	xorps	%xmm11,%xmm3
1868	movdqa	%xmm15,%xmm11
1869	xorps	%xmm12,%xmm4
1870	movups	%xmm2,(%rsi)
1871	movups	%xmm3,16(%rsi)
1872	movups	%xmm4,32(%rsi)
1873	leaq	48(%rsi),%rsi
1874	jmp	.Lxts_dec_done
1875
1876.align	16
1877.Lxts_dec_four:
1878	pshufd	$19,%xmm14,%xmm9
1879	movdqa	%xmm15,%xmm14
1880	paddq	%xmm15,%xmm15
1881	movups	(%rdi),%xmm2
1882	pand	%xmm8,%xmm9
1883	movups	16(%rdi),%xmm3
1884	pxor	%xmm9,%xmm15
1885
1886	movups	32(%rdi),%xmm4
1887	xorps	%xmm10,%xmm2
1888	movups	48(%rdi),%xmm5
1889	leaq	64(%rdi),%rdi
1890	xorps	%xmm11,%xmm3
1891	xorps	%xmm12,%xmm4
1892	xorps	%xmm13,%xmm5
1893
1894	call	_aesni_decrypt4
1895
1896	xorps	%xmm10,%xmm2
1897	movdqa	%xmm14,%xmm10
1898	xorps	%xmm11,%xmm3
1899	movdqa	%xmm15,%xmm11
1900	xorps	%xmm12,%xmm4
1901	movups	%xmm2,(%rsi)
1902	xorps	%xmm13,%xmm5
1903	movups	%xmm3,16(%rsi)
1904	movups	%xmm4,32(%rsi)
1905	movups	%xmm5,48(%rsi)
1906	leaq	64(%rsi),%rsi
1907	jmp	.Lxts_dec_done
1908
1909.align	16
1910.Lxts_dec_done:
1911	andq	$15,%r9
1912	jz	.Lxts_dec_ret
1913.Lxts_dec_done2:
1914	movq	%r9,%rdx
1915	movq	%r11,%rcx
1916	movl	%r10d,%eax
1917
1918	movups	(%rdi),%xmm2
1919	xorps	%xmm11,%xmm2
1920	movups	(%rcx),%xmm0
1921	movups	16(%rcx),%xmm1
1922	leaq	32(%rcx),%rcx
1923	xorps	%xmm0,%xmm2
1924.Loop_dec1_13:
1925.byte	102,15,56,222,209
1926	decl	%eax
1927	movups	(%rcx),%xmm1
1928	leaq	16(%rcx),%rcx
1929	jnz	.Loop_dec1_13
1930.byte	102,15,56,223,209
1931	xorps	%xmm11,%xmm2
1932	movups	%xmm2,(%rsi)
1933
1934.Lxts_dec_steal:
1935	movzbl	16(%rdi),%eax
1936	movzbl	(%rsi),%ecx
1937	leaq	1(%rdi),%rdi
1938	movb	%al,(%rsi)
1939	movb	%cl,16(%rsi)
1940	leaq	1(%rsi),%rsi
1941	subq	$1,%rdx
1942	jnz	.Lxts_dec_steal
1943
1944	subq	%r9,%rsi
1945	movq	%r11,%rcx
1946	movl	%r10d,%eax
1947
1948	movups	(%rsi),%xmm2
1949	xorps	%xmm10,%xmm2
1950	movups	(%rcx),%xmm0
1951	movups	16(%rcx),%xmm1
1952	leaq	32(%rcx),%rcx
1953	xorps	%xmm0,%xmm2
1954.Loop_dec1_14:
1955.byte	102,15,56,222,209
1956	decl	%eax
1957	movups	(%rcx),%xmm1
1958	leaq	16(%rcx),%rcx
1959	jnz	.Loop_dec1_14
1960.byte	102,15,56,223,209
1961	xorps	%xmm10,%xmm2
1962	movups	%xmm2,(%rsi)
1963
1964.Lxts_dec_ret:
1965	leaq	104(%rsp),%rsp
1966.Lxts_dec_epilogue:
1967	.byte	0xf3,0xc3
1968.size	aesni_xts_decrypt,.-aesni_xts_decrypt
1969.globl	aesni_cbc_encrypt
1970.type	aesni_cbc_encrypt,@function
1971.align	16
1972aesni_cbc_encrypt:
1973	testq	%rdx,%rdx
1974	jz	.Lcbc_ret
1975
1976	movl	240(%rcx),%r10d
1977	movq	%rcx,%r11
1978	testl	%r9d,%r9d
1979	jz	.Lcbc_decrypt
1980
1981	movups	(%r8),%xmm2
1982	movl	%r10d,%eax
1983	cmpq	$16,%rdx
1984	jb	.Lcbc_enc_tail
1985	subq	$16,%rdx
1986	jmp	.Lcbc_enc_loop
1987.align	16
1988.Lcbc_enc_loop:
1989	movups	(%rdi),%xmm3
1990	leaq	16(%rdi),%rdi
1991
1992	movups	(%rcx),%xmm0
1993	movups	16(%rcx),%xmm1
1994	xorps	%xmm0,%xmm3
1995	leaq	32(%rcx),%rcx
1996	xorps	%xmm3,%xmm2
1997.Loop_enc1_15:
1998.byte	102,15,56,220,209
1999	decl	%eax
2000	movups	(%rcx),%xmm1
2001	leaq	16(%rcx),%rcx
2002	jnz	.Loop_enc1_15
2003.byte	102,15,56,221,209
2004	movl	%r10d,%eax
2005	movq	%r11,%rcx
2006	movups	%xmm2,0(%rsi)
2007	leaq	16(%rsi),%rsi
2008	subq	$16,%rdx
2009	jnc	.Lcbc_enc_loop
2010	addq	$16,%rdx
2011	jnz	.Lcbc_enc_tail
2012	movups	%xmm2,(%r8)
2013	jmp	.Lcbc_ret
2014
2015.Lcbc_enc_tail:
2016	movq	%rdx,%rcx
2017	xchgq	%rdi,%rsi
2018.long	0x9066A4F3
2019	movl	$16,%ecx
2020	subq	%rdx,%rcx
2021	xorl	%eax,%eax
2022.long	0x9066AAF3
2023	leaq	-16(%rdi),%rdi
2024	movl	%r10d,%eax
2025	movq	%rdi,%rsi
2026	movq	%r11,%rcx
2027	xorq	%rdx,%rdx
2028	jmp	.Lcbc_enc_loop
2029
2030.align	16
2031.Lcbc_decrypt:
2032	movups	(%r8),%xmm9
2033	movl	%r10d,%eax
2034	cmpq	$112,%rdx
2035	jbe	.Lcbc_dec_tail
2036	shrl	$1,%r10d
2037	subq	$112,%rdx
2038	movl	%r10d,%eax
2039	movaps	%xmm9,-24(%rsp)
2040	jmp	.Lcbc_dec_loop8_enter
2041.align	16
2042.Lcbc_dec_loop8:
2043	movaps	%xmm0,-24(%rsp)
2044	movups	%xmm9,(%rsi)
2045	leaq	16(%rsi),%rsi
2046.Lcbc_dec_loop8_enter:
2047	movups	(%rcx),%xmm0
2048	movups	(%rdi),%xmm2
2049	movups	16(%rdi),%xmm3
2050	movups	16(%rcx),%xmm1
2051
2052	leaq	32(%rcx),%rcx
2053	movdqu	32(%rdi),%xmm4
2054	xorps	%xmm0,%xmm2
2055	movdqu	48(%rdi),%xmm5
2056	xorps	%xmm0,%xmm3
2057	movdqu	64(%rdi),%xmm6
2058.byte	102,15,56,222,209
2059	pxor	%xmm0,%xmm4
2060	movdqu	80(%rdi),%xmm7
2061.byte	102,15,56,222,217
2062	pxor	%xmm0,%xmm5
2063	movdqu	96(%rdi),%xmm8
2064.byte	102,15,56,222,225
2065	pxor	%xmm0,%xmm6
2066	movdqu	112(%rdi),%xmm9
2067.byte	102,15,56,222,233
2068	pxor	%xmm0,%xmm7
2069	decl	%eax
2070.byte	102,15,56,222,241
2071	pxor	%xmm0,%xmm8
2072.byte	102,15,56,222,249
2073	pxor	%xmm0,%xmm9
2074	movups	(%rcx),%xmm0
2075.byte	102,68,15,56,222,193
2076.byte	102,68,15,56,222,201
2077	movups	16(%rcx),%xmm1
2078
2079	call	.Ldec_loop8_enter
2080
2081	movups	(%rdi),%xmm1
2082	movups	16(%rdi),%xmm0
2083	xorps	-24(%rsp),%xmm2
2084	xorps	%xmm1,%xmm3
2085	movups	32(%rdi),%xmm1
2086	xorps	%xmm0,%xmm4
2087	movups	48(%rdi),%xmm0
2088	xorps	%xmm1,%xmm5
2089	movups	64(%rdi),%xmm1
2090	xorps	%xmm0,%xmm6
2091	movups	80(%rdi),%xmm0
2092	xorps	%xmm1,%xmm7
2093	movups	96(%rdi),%xmm1
2094	xorps	%xmm0,%xmm8
2095	movups	112(%rdi),%xmm0
2096	xorps	%xmm1,%xmm9
2097	movups	%xmm2,(%rsi)
2098	movups	%xmm3,16(%rsi)
2099	movups	%xmm4,32(%rsi)
2100	movups	%xmm5,48(%rsi)
2101	movl	%r10d,%eax
2102	movups	%xmm6,64(%rsi)
2103	movq	%r11,%rcx
2104	movups	%xmm7,80(%rsi)
2105	leaq	128(%rdi),%rdi
2106	movups	%xmm8,96(%rsi)
2107	leaq	112(%rsi),%rsi
2108	subq	$128,%rdx
2109	ja	.Lcbc_dec_loop8
2110
2111	movaps	%xmm9,%xmm2
2112	movaps	%xmm0,%xmm9
2113	addq	$112,%rdx
2114	jle	.Lcbc_dec_tail_collected
2115	movups	%xmm2,(%rsi)
2116	leal	1(%r10,%r10,1),%eax
2117	leaq	16(%rsi),%rsi
2118.Lcbc_dec_tail:
2119	movups	(%rdi),%xmm2
2120	movaps	%xmm2,%xmm8
2121	cmpq	$16,%rdx
2122	jbe	.Lcbc_dec_one
2123
2124	movups	16(%rdi),%xmm3
2125	movaps	%xmm3,%xmm7
2126	cmpq	$32,%rdx
2127	jbe	.Lcbc_dec_two
2128
2129	movups	32(%rdi),%xmm4
2130	movaps	%xmm4,%xmm6
2131	cmpq	$48,%rdx
2132	jbe	.Lcbc_dec_three
2133
2134	movups	48(%rdi),%xmm5
2135	cmpq	$64,%rdx
2136	jbe	.Lcbc_dec_four
2137
2138	movups	64(%rdi),%xmm6
2139	cmpq	$80,%rdx
2140	jbe	.Lcbc_dec_five
2141
2142	movups	80(%rdi),%xmm7
2143	cmpq	$96,%rdx
2144	jbe	.Lcbc_dec_six
2145
2146	movups	96(%rdi),%xmm8
2147	movaps	%xmm9,-24(%rsp)
2148	call	_aesni_decrypt8
2149	movups	(%rdi),%xmm1
2150	movups	16(%rdi),%xmm0
2151	xorps	-24(%rsp),%xmm2
2152	xorps	%xmm1,%xmm3
2153	movups	32(%rdi),%xmm1
2154	xorps	%xmm0,%xmm4
2155	movups	48(%rdi),%xmm0
2156	xorps	%xmm1,%xmm5
2157	movups	64(%rdi),%xmm1
2158	xorps	%xmm0,%xmm6
2159	movups	80(%rdi),%xmm0
2160	xorps	%xmm1,%xmm7
2161	movups	96(%rdi),%xmm9
2162	xorps	%xmm0,%xmm8
2163	movups	%xmm2,(%rsi)
2164	movups	%xmm3,16(%rsi)
2165	movups	%xmm4,32(%rsi)
2166	movups	%xmm5,48(%rsi)
2167	movups	%xmm6,64(%rsi)
2168	movups	%xmm7,80(%rsi)
2169	leaq	96(%rsi),%rsi
2170	movaps	%xmm8,%xmm2
2171	subq	$112,%rdx
2172	jmp	.Lcbc_dec_tail_collected
2173.align	16
2174.Lcbc_dec_one:
2175	movups	(%rcx),%xmm0
2176	movups	16(%rcx),%xmm1
2177	leaq	32(%rcx),%rcx
2178	xorps	%xmm0,%xmm2
2179.Loop_dec1_16:
2180.byte	102,15,56,222,209
2181	decl	%eax
2182	movups	(%rcx),%xmm1
2183	leaq	16(%rcx),%rcx
2184	jnz	.Loop_dec1_16
2185.byte	102,15,56,223,209
2186	xorps	%xmm9,%xmm2
2187	movaps	%xmm8,%xmm9
2188	subq	$16,%rdx
2189	jmp	.Lcbc_dec_tail_collected
2190.align	16
2191.Lcbc_dec_two:
2192	xorps	%xmm4,%xmm4
2193	call	_aesni_decrypt3
2194	xorps	%xmm9,%xmm2
2195	xorps	%xmm8,%xmm3
2196	movups	%xmm2,(%rsi)
2197	movaps	%xmm7,%xmm9
2198	movaps	%xmm3,%xmm2
2199	leaq	16(%rsi),%rsi
2200	subq	$32,%rdx
2201	jmp	.Lcbc_dec_tail_collected
2202.align	16
2203.Lcbc_dec_three:
2204	call	_aesni_decrypt3
2205	xorps	%xmm9,%xmm2
2206	xorps	%xmm8,%xmm3
2207	movups	%xmm2,(%rsi)
2208	xorps	%xmm7,%xmm4
2209	movups	%xmm3,16(%rsi)
2210	movaps	%xmm6,%xmm9
2211	movaps	%xmm4,%xmm2
2212	leaq	32(%rsi),%rsi
2213	subq	$48,%rdx
2214	jmp	.Lcbc_dec_tail_collected
2215.align	16
2216.Lcbc_dec_four:
2217	call	_aesni_decrypt4
2218	xorps	%xmm9,%xmm2
2219	movups	48(%rdi),%xmm9
2220	xorps	%xmm8,%xmm3
2221	movups	%xmm2,(%rsi)
2222	xorps	%xmm7,%xmm4
2223	movups	%xmm3,16(%rsi)
2224	xorps	%xmm6,%xmm5
2225	movups	%xmm4,32(%rsi)
2226	movaps	%xmm5,%xmm2
2227	leaq	48(%rsi),%rsi
2228	subq	$64,%rdx
2229	jmp	.Lcbc_dec_tail_collected
2230.align	16
2231.Lcbc_dec_five:
2232	xorps	%xmm7,%xmm7
2233	call	_aesni_decrypt6
2234	movups	16(%rdi),%xmm1
2235	movups	32(%rdi),%xmm0
2236	xorps	%xmm9,%xmm2
2237	xorps	%xmm8,%xmm3
2238	xorps	%xmm1,%xmm4
2239	movups	48(%rdi),%xmm1
2240	xorps	%xmm0,%xmm5
2241	movups	64(%rdi),%xmm9
2242	xorps	%xmm1,%xmm6
2243	movups	%xmm2,(%rsi)
2244	movups	%xmm3,16(%rsi)
2245	movups	%xmm4,32(%rsi)
2246	movups	%xmm5,48(%rsi)
2247	leaq	64(%rsi),%rsi
2248	movaps	%xmm6,%xmm2
2249	subq	$80,%rdx
2250	jmp	.Lcbc_dec_tail_collected
2251.align	16
2252.Lcbc_dec_six:
2253	call	_aesni_decrypt6
2254	movups	16(%rdi),%xmm1
2255	movups	32(%rdi),%xmm0
2256	xorps	%xmm9,%xmm2
2257	xorps	%xmm8,%xmm3
2258	xorps	%xmm1,%xmm4
2259	movups	48(%rdi),%xmm1
2260	xorps	%xmm0,%xmm5
2261	movups	64(%rdi),%xmm0
2262	xorps	%xmm1,%xmm6
2263	movups	80(%rdi),%xmm9
2264	xorps	%xmm0,%xmm7
2265	movups	%xmm2,(%rsi)
2266	movups	%xmm3,16(%rsi)
2267	movups	%xmm4,32(%rsi)
2268	movups	%xmm5,48(%rsi)
2269	movups	%xmm6,64(%rsi)
2270	leaq	80(%rsi),%rsi
2271	movaps	%xmm7,%xmm2
2272	subq	$96,%rdx
2273	jmp	.Lcbc_dec_tail_collected
2274.align	16
2275.Lcbc_dec_tail_collected:
2276	andq	$15,%rdx
2277	movups	%xmm9,(%r8)
2278	jnz	.Lcbc_dec_tail_partial
2279	movups	%xmm2,(%rsi)
2280	jmp	.Lcbc_dec_ret
2281.align	16
2282.Lcbc_dec_tail_partial:
2283	movaps	%xmm2,-24(%rsp)
2284	movq	$16,%rcx
2285	movq	%rsi,%rdi
2286	subq	%rdx,%rcx
2287	leaq	-24(%rsp),%rsi
2288.long	0x9066A4F3
2289
2290.Lcbc_dec_ret:
2291.Lcbc_ret:
2292	.byte	0xf3,0xc3
2293.size	aesni_cbc_encrypt,.-aesni_cbc_encrypt
2294.globl	aesni_set_decrypt_key
2295.type	aesni_set_decrypt_key,@function
2296.align	16
2297aesni_set_decrypt_key:
2298.byte	0x48,0x83,0xEC,0x08
2299	call	__aesni_set_encrypt_key
2300	shll	$4,%esi
2301	testl	%eax,%eax
2302	jnz	.Ldec_key_ret
2303	leaq	16(%rdx,%rsi,1),%rdi
2304
2305	movups	(%rdx),%xmm0
2306	movups	(%rdi),%xmm1
2307	movups	%xmm0,(%rdi)
2308	movups	%xmm1,(%rdx)
2309	leaq	16(%rdx),%rdx
2310	leaq	-16(%rdi),%rdi
2311
2312.Ldec_key_inverse:
2313	movups	(%rdx),%xmm0
2314	movups	(%rdi),%xmm1
2315.byte	102,15,56,219,192
2316.byte	102,15,56,219,201
2317	leaq	16(%rdx),%rdx
2318	leaq	-16(%rdi),%rdi
2319	movups	%xmm0,16(%rdi)
2320	movups	%xmm1,-16(%rdx)
2321	cmpq	%rdx,%rdi
2322	ja	.Ldec_key_inverse
2323
2324	movups	(%rdx),%xmm0
2325.byte	102,15,56,219,192
2326	movups	%xmm0,(%rdi)
2327.Ldec_key_ret:
2328	addq	$8,%rsp
2329	.byte	0xf3,0xc3
2330.LSEH_end_set_decrypt_key:
2331.size	aesni_set_decrypt_key,.-aesni_set_decrypt_key
2332.globl	aesni_set_encrypt_key
2333.type	aesni_set_encrypt_key,@function
2334.align	16
2335aesni_set_encrypt_key:
2336__aesni_set_encrypt_key:
2337.byte	0x48,0x83,0xEC,0x08
2338	movq	$-1,%rax
2339	testq	%rdi,%rdi
2340	jz	.Lenc_key_ret
2341	testq	%rdx,%rdx
2342	jz	.Lenc_key_ret
2343
2344	movups	(%rdi),%xmm0
2345	xorps	%xmm4,%xmm4
2346	leaq	16(%rdx),%rax
2347	cmpl	$256,%esi
2348	je	.L14rounds
2349	cmpl	$192,%esi
2350	je	.L12rounds
2351	cmpl	$128,%esi
2352	jne	.Lbad_keybits
2353
2354.L10rounds:
2355	movl	$9,%esi
2356	movups	%xmm0,(%rdx)
2357.byte	102,15,58,223,200,1
2358	call	.Lkey_expansion_128_cold
2359.byte	102,15,58,223,200,2
2360	call	.Lkey_expansion_128
2361.byte	102,15,58,223,200,4
2362	call	.Lkey_expansion_128
2363.byte	102,15,58,223,200,8
2364	call	.Lkey_expansion_128
2365.byte	102,15,58,223,200,16
2366	call	.Lkey_expansion_128
2367.byte	102,15,58,223,200,32
2368	call	.Lkey_expansion_128
2369.byte	102,15,58,223,200,64
2370	call	.Lkey_expansion_128
2371.byte	102,15,58,223,200,128
2372	call	.Lkey_expansion_128
2373.byte	102,15,58,223,200,27
2374	call	.Lkey_expansion_128
2375.byte	102,15,58,223,200,54
2376	call	.Lkey_expansion_128
2377	movups	%xmm0,(%rax)
2378	movl	%esi,80(%rax)
2379	xorl	%eax,%eax
2380	jmp	.Lenc_key_ret
2381
2382.align	16
2383.L12rounds:
2384	movq	16(%rdi),%xmm2
2385	movl	$11,%esi
2386	movups	%xmm0,(%rdx)
2387.byte	102,15,58,223,202,1
2388	call	.Lkey_expansion_192a_cold
2389.byte	102,15,58,223,202,2
2390	call	.Lkey_expansion_192b
2391.byte	102,15,58,223,202,4
2392	call	.Lkey_expansion_192a
2393.byte	102,15,58,223,202,8
2394	call	.Lkey_expansion_192b
2395.byte	102,15,58,223,202,16
2396	call	.Lkey_expansion_192a
2397.byte	102,15,58,223,202,32
2398	call	.Lkey_expansion_192b
2399.byte	102,15,58,223,202,64
2400	call	.Lkey_expansion_192a
2401.byte	102,15,58,223,202,128
2402	call	.Lkey_expansion_192b
2403	movups	%xmm0,(%rax)
2404	movl	%esi,48(%rax)
2405	xorq	%rax,%rax
2406	jmp	.Lenc_key_ret
2407
2408.align	16
2409.L14rounds:
2410	movups	16(%rdi),%xmm2
2411	movl	$13,%esi
2412	leaq	16(%rax),%rax
2413	movups	%xmm0,(%rdx)
2414	movups	%xmm2,16(%rdx)
2415.byte	102,15,58,223,202,1
2416	call	.Lkey_expansion_256a_cold
2417.byte	102,15,58,223,200,1
2418	call	.Lkey_expansion_256b
2419.byte	102,15,58,223,202,2
2420	call	.Lkey_expansion_256a
2421.byte	102,15,58,223,200,2
2422	call	.Lkey_expansion_256b
2423.byte	102,15,58,223,202,4
2424	call	.Lkey_expansion_256a
2425.byte	102,15,58,223,200,4
2426	call	.Lkey_expansion_256b
2427.byte	102,15,58,223,202,8
2428	call	.Lkey_expansion_256a
2429.byte	102,15,58,223,200,8
2430	call	.Lkey_expansion_256b
2431.byte	102,15,58,223,202,16
2432	call	.Lkey_expansion_256a
2433.byte	102,15,58,223,200,16
2434	call	.Lkey_expansion_256b
2435.byte	102,15,58,223,202,32
2436	call	.Lkey_expansion_256a
2437.byte	102,15,58,223,200,32
2438	call	.Lkey_expansion_256b
2439.byte	102,15,58,223,202,64
2440	call	.Lkey_expansion_256a
2441	movups	%xmm0,(%rax)
2442	movl	%esi,16(%rax)
2443	xorq	%rax,%rax
2444	jmp	.Lenc_key_ret
2445
2446.align	16
2447.Lbad_keybits:
2448	movq	$-2,%rax
2449.Lenc_key_ret:
2450	addq	$8,%rsp
2451	.byte	0xf3,0xc3
2452.LSEH_end_set_encrypt_key:
2453
2454.align	16
2455.Lkey_expansion_128:
2456	movups	%xmm0,(%rax)
2457	leaq	16(%rax),%rax
2458.Lkey_expansion_128_cold:
2459	shufps	$16,%xmm0,%xmm4
2460	xorps	%xmm4,%xmm0
2461	shufps	$140,%xmm0,%xmm4
2462	xorps	%xmm4,%xmm0
2463	shufps	$255,%xmm1,%xmm1
2464	xorps	%xmm1,%xmm0
2465	.byte	0xf3,0xc3
2466
2467.align	16
2468.Lkey_expansion_192a:
2469	movups	%xmm0,(%rax)
2470	leaq	16(%rax),%rax
2471.Lkey_expansion_192a_cold:
2472	movaps	%xmm2,%xmm5
2473.Lkey_expansion_192b_warm:
2474	shufps	$16,%xmm0,%xmm4
2475	movdqa	%xmm2,%xmm3
2476	xorps	%xmm4,%xmm0
2477	shufps	$140,%xmm0,%xmm4
2478	pslldq	$4,%xmm3
2479	xorps	%xmm4,%xmm0
2480	pshufd	$85,%xmm1,%xmm1
2481	pxor	%xmm3,%xmm2
2482	pxor	%xmm1,%xmm0
2483	pshufd	$255,%xmm0,%xmm3
2484	pxor	%xmm3,%xmm2
2485	.byte	0xf3,0xc3
2486
2487.align	16
2488.Lkey_expansion_192b:
2489	movaps	%xmm0,%xmm3
2490	shufps	$68,%xmm0,%xmm5
2491	movups	%xmm5,(%rax)
2492	shufps	$78,%xmm2,%xmm3
2493	movups	%xmm3,16(%rax)
2494	leaq	32(%rax),%rax
2495	jmp	.Lkey_expansion_192b_warm
2496
2497.align	16
2498.Lkey_expansion_256a:
2499	movups	%xmm2,(%rax)
2500	leaq	16(%rax),%rax
2501.Lkey_expansion_256a_cold:
2502	shufps	$16,%xmm0,%xmm4
2503	xorps	%xmm4,%xmm0
2504	shufps	$140,%xmm0,%xmm4
2505	xorps	%xmm4,%xmm0
2506	shufps	$255,%xmm1,%xmm1
2507	xorps	%xmm1,%xmm0
2508	.byte	0xf3,0xc3
2509
2510.align	16
2511.Lkey_expansion_256b:
2512	movups	%xmm0,(%rax)
2513	leaq	16(%rax),%rax
2514
2515	shufps	$16,%xmm2,%xmm4
2516	xorps	%xmm4,%xmm2
2517	shufps	$140,%xmm2,%xmm4
2518	xorps	%xmm4,%xmm2
2519	shufps	$170,%xmm1,%xmm1
2520	xorps	%xmm1,%xmm2
2521	.byte	0xf3,0xc3
2522.size	aesni_set_encrypt_key,.-aesni_set_encrypt_key
2523.size	__aesni_set_encrypt_key,.-__aesni_set_encrypt_key
2524.align	64
2525.Lbswap_mask:
2526.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
2527.Lincrement32:
2528.long	6,6,6,0
2529.Lincrement64:
2530.long	1,0,0,0
2531.Lxts_magic:
2532.long	0x87,0,1,0
2533
2534.byte	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2535.align	64
2536