1#if defined(__i386__)
2.text
3.globl	_ChaCha20_ctr32
4.private_extern	_ChaCha20_ctr32
5.align	4
6_ChaCha20_ctr32:
7L_ChaCha20_ctr32_begin:
8	pushl	%ebp
9	pushl	%ebx
10	pushl	%esi
11	pushl	%edi
12	xorl	%eax,%eax
13	cmpl	28(%esp),%eax
14	je	L000no_data
15	call	Lpic_point
16Lpic_point:
17	popl	%eax
18	movl	L_OPENSSL_ia32cap_P$non_lazy_ptr-Lpic_point(%eax),%ebp
19	testl	$16777216,(%ebp)
20	jz	L001x86
21	testl	$512,4(%ebp)
22	jz	L001x86
23	jmp	Lssse3_shortcut
24L001x86:
25	movl	32(%esp),%esi
26	movl	36(%esp),%edi
27	subl	$132,%esp
28	movl	(%esi),%eax
29	movl	4(%esi),%ebx
30	movl	8(%esi),%ecx
31	movl	12(%esi),%edx
32	movl	%eax,80(%esp)
33	movl	%ebx,84(%esp)
34	movl	%ecx,88(%esp)
35	movl	%edx,92(%esp)
36	movl	16(%esi),%eax
37	movl	20(%esi),%ebx
38	movl	24(%esi),%ecx
39	movl	28(%esi),%edx
40	movl	%eax,96(%esp)
41	movl	%ebx,100(%esp)
42	movl	%ecx,104(%esp)
43	movl	%edx,108(%esp)
44	movl	(%edi),%eax
45	movl	4(%edi),%ebx
46	movl	8(%edi),%ecx
47	movl	12(%edi),%edx
48	subl	$1,%eax
49	movl	%eax,112(%esp)
50	movl	%ebx,116(%esp)
51	movl	%ecx,120(%esp)
52	movl	%edx,124(%esp)
53	jmp	L002entry
54.align	4,0x90
55L003outer_loop:
56	movl	%ebx,156(%esp)
57	movl	%eax,152(%esp)
58	movl	%ecx,160(%esp)
59L002entry:
60	movl	$1634760805,%eax
61	movl	$857760878,4(%esp)
62	movl	$2036477234,8(%esp)
63	movl	$1797285236,12(%esp)
64	movl	84(%esp),%ebx
65	movl	88(%esp),%ebp
66	movl	104(%esp),%ecx
67	movl	108(%esp),%esi
68	movl	116(%esp),%edx
69	movl	120(%esp),%edi
70	movl	%ebx,20(%esp)
71	movl	%ebp,24(%esp)
72	movl	%ecx,40(%esp)
73	movl	%esi,44(%esp)
74	movl	%edx,52(%esp)
75	movl	%edi,56(%esp)
76	movl	92(%esp),%ebx
77	movl	124(%esp),%edi
78	movl	112(%esp),%edx
79	movl	80(%esp),%ebp
80	movl	96(%esp),%ecx
81	movl	100(%esp),%esi
82	addl	$1,%edx
83	movl	%ebx,28(%esp)
84	movl	%edi,60(%esp)
85	movl	%edx,112(%esp)
86	movl	$10,%ebx
87	jmp	L004loop
88.align	4,0x90
89L004loop:
90	addl	%ebp,%eax
91	movl	%ebx,128(%esp)
92	movl	%ebp,%ebx
93	xorl	%eax,%edx
94	roll	$16,%edx
95	addl	%edx,%ecx
96	xorl	%ecx,%ebx
97	movl	52(%esp),%edi
98	roll	$12,%ebx
99	movl	20(%esp),%ebp
100	addl	%ebx,%eax
101	xorl	%eax,%edx
102	movl	%eax,(%esp)
103	roll	$8,%edx
104	movl	4(%esp),%eax
105	addl	%edx,%ecx
106	movl	%edx,48(%esp)
107	xorl	%ecx,%ebx
108	addl	%ebp,%eax
109	roll	$7,%ebx
110	xorl	%eax,%edi
111	movl	%ecx,32(%esp)
112	roll	$16,%edi
113	movl	%ebx,16(%esp)
114	addl	%edi,%esi
115	movl	40(%esp),%ecx
116	xorl	%esi,%ebp
117	movl	56(%esp),%edx
118	roll	$12,%ebp
119	movl	24(%esp),%ebx
120	addl	%ebp,%eax
121	xorl	%eax,%edi
122	movl	%eax,4(%esp)
123	roll	$8,%edi
124	movl	8(%esp),%eax
125	addl	%edi,%esi
126	movl	%edi,52(%esp)
127	xorl	%esi,%ebp
128	addl	%ebx,%eax
129	roll	$7,%ebp
130	xorl	%eax,%edx
131	movl	%esi,36(%esp)
132	roll	$16,%edx
133	movl	%ebp,20(%esp)
134	addl	%edx,%ecx
135	movl	44(%esp),%esi
136	xorl	%ecx,%ebx
137	movl	60(%esp),%edi
138	roll	$12,%ebx
139	movl	28(%esp),%ebp
140	addl	%ebx,%eax
141	xorl	%eax,%edx
142	movl	%eax,8(%esp)
143	roll	$8,%edx
144	movl	12(%esp),%eax
145	addl	%edx,%ecx
146	movl	%edx,56(%esp)
147	xorl	%ecx,%ebx
148	addl	%ebp,%eax
149	roll	$7,%ebx
150	xorl	%eax,%edi
151	roll	$16,%edi
152	movl	%ebx,24(%esp)
153	addl	%edi,%esi
154	xorl	%esi,%ebp
155	roll	$12,%ebp
156	movl	20(%esp),%ebx
157	addl	%ebp,%eax
158	xorl	%eax,%edi
159	movl	%eax,12(%esp)
160	roll	$8,%edi
161	movl	(%esp),%eax
162	addl	%edi,%esi
163	movl	%edi,%edx
164	xorl	%esi,%ebp
165	addl	%ebx,%eax
166	roll	$7,%ebp
167	xorl	%eax,%edx
168	roll	$16,%edx
169	movl	%ebp,28(%esp)
170	addl	%edx,%ecx
171	xorl	%ecx,%ebx
172	movl	48(%esp),%edi
173	roll	$12,%ebx
174	movl	24(%esp),%ebp
175	addl	%ebx,%eax
176	xorl	%eax,%edx
177	movl	%eax,(%esp)
178	roll	$8,%edx
179	movl	4(%esp),%eax
180	addl	%edx,%ecx
181	movl	%edx,60(%esp)
182	xorl	%ecx,%ebx
183	addl	%ebp,%eax
184	roll	$7,%ebx
185	xorl	%eax,%edi
186	movl	%ecx,40(%esp)
187	roll	$16,%edi
188	movl	%ebx,20(%esp)
189	addl	%edi,%esi
190	movl	32(%esp),%ecx
191	xorl	%esi,%ebp
192	movl	52(%esp),%edx
193	roll	$12,%ebp
194	movl	28(%esp),%ebx
195	addl	%ebp,%eax
196	xorl	%eax,%edi
197	movl	%eax,4(%esp)
198	roll	$8,%edi
199	movl	8(%esp),%eax
200	addl	%edi,%esi
201	movl	%edi,48(%esp)
202	xorl	%esi,%ebp
203	addl	%ebx,%eax
204	roll	$7,%ebp
205	xorl	%eax,%edx
206	movl	%esi,44(%esp)
207	roll	$16,%edx
208	movl	%ebp,24(%esp)
209	addl	%edx,%ecx
210	movl	36(%esp),%esi
211	xorl	%ecx,%ebx
212	movl	56(%esp),%edi
213	roll	$12,%ebx
214	movl	16(%esp),%ebp
215	addl	%ebx,%eax
216	xorl	%eax,%edx
217	movl	%eax,8(%esp)
218	roll	$8,%edx
219	movl	12(%esp),%eax
220	addl	%edx,%ecx
221	movl	%edx,52(%esp)
222	xorl	%ecx,%ebx
223	addl	%ebp,%eax
224	roll	$7,%ebx
225	xorl	%eax,%edi
226	roll	$16,%edi
227	movl	%ebx,28(%esp)
228	addl	%edi,%esi
229	xorl	%esi,%ebp
230	movl	48(%esp),%edx
231	roll	$12,%ebp
232	movl	128(%esp),%ebx
233	addl	%ebp,%eax
234	xorl	%eax,%edi
235	movl	%eax,12(%esp)
236	roll	$8,%edi
237	movl	(%esp),%eax
238	addl	%edi,%esi
239	movl	%edi,56(%esp)
240	xorl	%esi,%ebp
241	roll	$7,%ebp
242	decl	%ebx
243	jnz	L004loop
244	movl	160(%esp),%ebx
245	addl	$1634760805,%eax
246	addl	80(%esp),%ebp
247	addl	96(%esp),%ecx
248	addl	100(%esp),%esi
249	cmpl	$64,%ebx
250	jb	L005tail
251	movl	156(%esp),%ebx
252	addl	112(%esp),%edx
253	addl	120(%esp),%edi
254	xorl	(%ebx),%eax
255	xorl	16(%ebx),%ebp
256	movl	%eax,(%esp)
257	movl	152(%esp),%eax
258	xorl	32(%ebx),%ecx
259	xorl	36(%ebx),%esi
260	xorl	48(%ebx),%edx
261	xorl	56(%ebx),%edi
262	movl	%ebp,16(%eax)
263	movl	%ecx,32(%eax)
264	movl	%esi,36(%eax)
265	movl	%edx,48(%eax)
266	movl	%edi,56(%eax)
267	movl	4(%esp),%ebp
268	movl	8(%esp),%ecx
269	movl	12(%esp),%esi
270	movl	20(%esp),%edx
271	movl	24(%esp),%edi
272	addl	$857760878,%ebp
273	addl	$2036477234,%ecx
274	addl	$1797285236,%esi
275	addl	84(%esp),%edx
276	addl	88(%esp),%edi
277	xorl	4(%ebx),%ebp
278	xorl	8(%ebx),%ecx
279	xorl	12(%ebx),%esi
280	xorl	20(%ebx),%edx
281	xorl	24(%ebx),%edi
282	movl	%ebp,4(%eax)
283	movl	%ecx,8(%eax)
284	movl	%esi,12(%eax)
285	movl	%edx,20(%eax)
286	movl	%edi,24(%eax)
287	movl	28(%esp),%ebp
288	movl	40(%esp),%ecx
289	movl	44(%esp),%esi
290	movl	52(%esp),%edx
291	movl	60(%esp),%edi
292	addl	92(%esp),%ebp
293	addl	104(%esp),%ecx
294	addl	108(%esp),%esi
295	addl	116(%esp),%edx
296	addl	124(%esp),%edi
297	xorl	28(%ebx),%ebp
298	xorl	40(%ebx),%ecx
299	xorl	44(%ebx),%esi
300	xorl	52(%ebx),%edx
301	xorl	60(%ebx),%edi
302	leal	64(%ebx),%ebx
303	movl	%ebp,28(%eax)
304	movl	(%esp),%ebp
305	movl	%ecx,40(%eax)
306	movl	160(%esp),%ecx
307	movl	%esi,44(%eax)
308	movl	%edx,52(%eax)
309	movl	%edi,60(%eax)
310	movl	%ebp,(%eax)
311	leal	64(%eax),%eax
312	subl	$64,%ecx
313	jnz	L003outer_loop
314	jmp	L006done
315L005tail:
316	addl	112(%esp),%edx
317	addl	120(%esp),%edi
318	movl	%eax,(%esp)
319	movl	%ebp,16(%esp)
320	movl	%ecx,32(%esp)
321	movl	%esi,36(%esp)
322	movl	%edx,48(%esp)
323	movl	%edi,56(%esp)
324	movl	4(%esp),%ebp
325	movl	8(%esp),%ecx
326	movl	12(%esp),%esi
327	movl	20(%esp),%edx
328	movl	24(%esp),%edi
329	addl	$857760878,%ebp
330	addl	$2036477234,%ecx
331	addl	$1797285236,%esi
332	addl	84(%esp),%edx
333	addl	88(%esp),%edi
334	movl	%ebp,4(%esp)
335	movl	%ecx,8(%esp)
336	movl	%esi,12(%esp)
337	movl	%edx,20(%esp)
338	movl	%edi,24(%esp)
339	movl	28(%esp),%ebp
340	movl	40(%esp),%ecx
341	movl	44(%esp),%esi
342	movl	52(%esp),%edx
343	movl	60(%esp),%edi
344	addl	92(%esp),%ebp
345	addl	104(%esp),%ecx
346	addl	108(%esp),%esi
347	addl	116(%esp),%edx
348	addl	124(%esp),%edi
349	movl	%ebp,28(%esp)
350	movl	156(%esp),%ebp
351	movl	%ecx,40(%esp)
352	movl	152(%esp),%ecx
353	movl	%esi,44(%esp)
354	xorl	%esi,%esi
355	movl	%edx,52(%esp)
356	movl	%edi,60(%esp)
357	xorl	%eax,%eax
358	xorl	%edx,%edx
359L007tail_loop:
360	movb	(%esi,%ebp,1),%al
361	movb	(%esp,%esi,1),%dl
362	leal	1(%esi),%esi
363	xorb	%dl,%al
364	movb	%al,-1(%ecx,%esi,1)
365	decl	%ebx
366	jnz	L007tail_loop
367L006done:
368	addl	$132,%esp
369L000no_data:
370	popl	%edi
371	popl	%esi
372	popl	%ebx
373	popl	%ebp
374	ret
375.globl	_ChaCha20_ssse3
376.private_extern	_ChaCha20_ssse3
377.align	4
378_ChaCha20_ssse3:
379L_ChaCha20_ssse3_begin:
380	pushl	%ebp
381	pushl	%ebx
382	pushl	%esi
383	pushl	%edi
384Lssse3_shortcut:
385	movl	20(%esp),%edi
386	movl	24(%esp),%esi
387	movl	28(%esp),%ecx
388	movl	32(%esp),%edx
389	movl	36(%esp),%ebx
390	movl	%esp,%ebp
391	subl	$524,%esp
392	andl	$-64,%esp
393	movl	%ebp,512(%esp)
394	leal	Lssse3_data-Lpic_point(%eax),%eax
395	movdqu	(%ebx),%xmm3
396	cmpl	$256,%ecx
397	jb	L0081x
398	movl	%edx,516(%esp)
399	movl	%ebx,520(%esp)
400	subl	$256,%ecx
401	leal	384(%esp),%ebp
402	movdqu	(%edx),%xmm7
403	pshufd	$0,%xmm3,%xmm0
404	pshufd	$85,%xmm3,%xmm1
405	pshufd	$170,%xmm3,%xmm2
406	pshufd	$255,%xmm3,%xmm3
407	paddd	48(%eax),%xmm0
408	pshufd	$0,%xmm7,%xmm4
409	pshufd	$85,%xmm7,%xmm5
410	psubd	64(%eax),%xmm0
411	pshufd	$170,%xmm7,%xmm6
412	pshufd	$255,%xmm7,%xmm7
413	movdqa	%xmm0,64(%ebp)
414	movdqa	%xmm1,80(%ebp)
415	movdqa	%xmm2,96(%ebp)
416	movdqa	%xmm3,112(%ebp)
417	movdqu	16(%edx),%xmm3
418	movdqa	%xmm4,-64(%ebp)
419	movdqa	%xmm5,-48(%ebp)
420	movdqa	%xmm6,-32(%ebp)
421	movdqa	%xmm7,-16(%ebp)
422	movdqa	32(%eax),%xmm7
423	leal	128(%esp),%ebx
424	pshufd	$0,%xmm3,%xmm0
425	pshufd	$85,%xmm3,%xmm1
426	pshufd	$170,%xmm3,%xmm2
427	pshufd	$255,%xmm3,%xmm3
428	pshufd	$0,%xmm7,%xmm4
429	pshufd	$85,%xmm7,%xmm5
430	pshufd	$170,%xmm7,%xmm6
431	pshufd	$255,%xmm7,%xmm7
432	movdqa	%xmm0,(%ebp)
433	movdqa	%xmm1,16(%ebp)
434	movdqa	%xmm2,32(%ebp)
435	movdqa	%xmm3,48(%ebp)
436	movdqa	%xmm4,-128(%ebp)
437	movdqa	%xmm5,-112(%ebp)
438	movdqa	%xmm6,-96(%ebp)
439	movdqa	%xmm7,-80(%ebp)
440	leal	128(%esi),%esi
441	leal	128(%edi),%edi
442	jmp	L009outer_loop
443.align	4,0x90
444L009outer_loop:
445	movdqa	-112(%ebp),%xmm1
446	movdqa	-96(%ebp),%xmm2
447	movdqa	-80(%ebp),%xmm3
448	movdqa	-48(%ebp),%xmm5
449	movdqa	-32(%ebp),%xmm6
450	movdqa	-16(%ebp),%xmm7
451	movdqa	%xmm1,-112(%ebx)
452	movdqa	%xmm2,-96(%ebx)
453	movdqa	%xmm3,-80(%ebx)
454	movdqa	%xmm5,-48(%ebx)
455	movdqa	%xmm6,-32(%ebx)
456	movdqa	%xmm7,-16(%ebx)
457	movdqa	32(%ebp),%xmm2
458	movdqa	48(%ebp),%xmm3
459	movdqa	64(%ebp),%xmm4
460	movdqa	80(%ebp),%xmm5
461	movdqa	96(%ebp),%xmm6
462	movdqa	112(%ebp),%xmm7
463	paddd	64(%eax),%xmm4
464	movdqa	%xmm2,32(%ebx)
465	movdqa	%xmm3,48(%ebx)
466	movdqa	%xmm4,64(%ebx)
467	movdqa	%xmm5,80(%ebx)
468	movdqa	%xmm6,96(%ebx)
469	movdqa	%xmm7,112(%ebx)
470	movdqa	%xmm4,64(%ebp)
471	movdqa	-128(%ebp),%xmm0
472	movdqa	%xmm4,%xmm6
473	movdqa	-64(%ebp),%xmm3
474	movdqa	(%ebp),%xmm4
475	movdqa	16(%ebp),%xmm5
476	movl	$10,%edx
477	nop
478.align	4,0x90
479L010loop:
480	paddd	%xmm3,%xmm0
481	movdqa	%xmm3,%xmm2
482	pxor	%xmm0,%xmm6
483	pshufb	(%eax),%xmm6
484	paddd	%xmm6,%xmm4
485	pxor	%xmm4,%xmm2
486	movdqa	-48(%ebx),%xmm3
487	movdqa	%xmm2,%xmm1
488	pslld	$12,%xmm2
489	psrld	$20,%xmm1
490	por	%xmm1,%xmm2
491	movdqa	-112(%ebx),%xmm1
492	paddd	%xmm2,%xmm0
493	movdqa	80(%ebx),%xmm7
494	pxor	%xmm0,%xmm6
495	movdqa	%xmm0,-128(%ebx)
496	pshufb	16(%eax),%xmm6
497	paddd	%xmm6,%xmm4
498	movdqa	%xmm6,64(%ebx)
499	pxor	%xmm4,%xmm2
500	paddd	%xmm3,%xmm1
501	movdqa	%xmm2,%xmm0
502	pslld	$7,%xmm2
503	psrld	$25,%xmm0
504	pxor	%xmm1,%xmm7
505	por	%xmm0,%xmm2
506	movdqa	%xmm4,(%ebx)
507	pshufb	(%eax),%xmm7
508	movdqa	%xmm2,-64(%ebx)
509	paddd	%xmm7,%xmm5
510	movdqa	32(%ebx),%xmm4
511	pxor	%xmm5,%xmm3
512	movdqa	-32(%ebx),%xmm2
513	movdqa	%xmm3,%xmm0
514	pslld	$12,%xmm3
515	psrld	$20,%xmm0
516	por	%xmm0,%xmm3
517	movdqa	-96(%ebx),%xmm0
518	paddd	%xmm3,%xmm1
519	movdqa	96(%ebx),%xmm6
520	pxor	%xmm1,%xmm7
521	movdqa	%xmm1,-112(%ebx)
522	pshufb	16(%eax),%xmm7
523	paddd	%xmm7,%xmm5
524	movdqa	%xmm7,80(%ebx)
525	pxor	%xmm5,%xmm3
526	paddd	%xmm2,%xmm0
527	movdqa	%xmm3,%xmm1
528	pslld	$7,%xmm3
529	psrld	$25,%xmm1
530	pxor	%xmm0,%xmm6
531	por	%xmm1,%xmm3
532	movdqa	%xmm5,16(%ebx)
533	pshufb	(%eax),%xmm6
534	movdqa	%xmm3,-48(%ebx)
535	paddd	%xmm6,%xmm4
536	movdqa	48(%ebx),%xmm5
537	pxor	%xmm4,%xmm2
538	movdqa	-16(%ebx),%xmm3
539	movdqa	%xmm2,%xmm1
540	pslld	$12,%xmm2
541	psrld	$20,%xmm1
542	por	%xmm1,%xmm2
543	movdqa	-80(%ebx),%xmm1
544	paddd	%xmm2,%xmm0
545	movdqa	112(%ebx),%xmm7
546	pxor	%xmm0,%xmm6
547	movdqa	%xmm0,-96(%ebx)
548	pshufb	16(%eax),%xmm6
549	paddd	%xmm6,%xmm4
550	movdqa	%xmm6,96(%ebx)
551	pxor	%xmm4,%xmm2
552	paddd	%xmm3,%xmm1
553	movdqa	%xmm2,%xmm0
554	pslld	$7,%xmm2
555	psrld	$25,%xmm0
556	pxor	%xmm1,%xmm7
557	por	%xmm0,%xmm2
558	pshufb	(%eax),%xmm7
559	movdqa	%xmm2,-32(%ebx)
560	paddd	%xmm7,%xmm5
561	pxor	%xmm5,%xmm3
562	movdqa	-48(%ebx),%xmm2
563	movdqa	%xmm3,%xmm0
564	pslld	$12,%xmm3
565	psrld	$20,%xmm0
566	por	%xmm0,%xmm3
567	movdqa	-128(%ebx),%xmm0
568	paddd	%xmm3,%xmm1
569	pxor	%xmm1,%xmm7
570	movdqa	%xmm1,-80(%ebx)
571	pshufb	16(%eax),%xmm7
572	paddd	%xmm7,%xmm5
573	movdqa	%xmm7,%xmm6
574	pxor	%xmm5,%xmm3
575	paddd	%xmm2,%xmm0
576	movdqa	%xmm3,%xmm1
577	pslld	$7,%xmm3
578	psrld	$25,%xmm1
579	pxor	%xmm0,%xmm6
580	por	%xmm1,%xmm3
581	pshufb	(%eax),%xmm6
582	movdqa	%xmm3,-16(%ebx)
583	paddd	%xmm6,%xmm4
584	pxor	%xmm4,%xmm2
585	movdqa	-32(%ebx),%xmm3
586	movdqa	%xmm2,%xmm1
587	pslld	$12,%xmm2
588	psrld	$20,%xmm1
589	por	%xmm1,%xmm2
590	movdqa	-112(%ebx),%xmm1
591	paddd	%xmm2,%xmm0
592	movdqa	64(%ebx),%xmm7
593	pxor	%xmm0,%xmm6
594	movdqa	%xmm0,-128(%ebx)
595	pshufb	16(%eax),%xmm6
596	paddd	%xmm6,%xmm4
597	movdqa	%xmm6,112(%ebx)
598	pxor	%xmm4,%xmm2
599	paddd	%xmm3,%xmm1
600	movdqa	%xmm2,%xmm0
601	pslld	$7,%xmm2
602	psrld	$25,%xmm0
603	pxor	%xmm1,%xmm7
604	por	%xmm0,%xmm2
605	movdqa	%xmm4,32(%ebx)
606	pshufb	(%eax),%xmm7
607	movdqa	%xmm2,-48(%ebx)
608	paddd	%xmm7,%xmm5
609	movdqa	(%ebx),%xmm4
610	pxor	%xmm5,%xmm3
611	movdqa	-16(%ebx),%xmm2
612	movdqa	%xmm3,%xmm0
613	pslld	$12,%xmm3
614	psrld	$20,%xmm0
615	por	%xmm0,%xmm3
616	movdqa	-96(%ebx),%xmm0
617	paddd	%xmm3,%xmm1
618	movdqa	80(%ebx),%xmm6
619	pxor	%xmm1,%xmm7
620	movdqa	%xmm1,-112(%ebx)
621	pshufb	16(%eax),%xmm7
622	paddd	%xmm7,%xmm5
623	movdqa	%xmm7,64(%ebx)
624	pxor	%xmm5,%xmm3
625	paddd	%xmm2,%xmm0
626	movdqa	%xmm3,%xmm1
627	pslld	$7,%xmm3
628	psrld	$25,%xmm1
629	pxor	%xmm0,%xmm6
630	por	%xmm1,%xmm3
631	movdqa	%xmm5,48(%ebx)
632	pshufb	(%eax),%xmm6
633	movdqa	%xmm3,-32(%ebx)
634	paddd	%xmm6,%xmm4
635	movdqa	16(%ebx),%xmm5
636	pxor	%xmm4,%xmm2
637	movdqa	-64(%ebx),%xmm3
638	movdqa	%xmm2,%xmm1
639	pslld	$12,%xmm2
640	psrld	$20,%xmm1
641	por	%xmm1,%xmm2
642	movdqa	-80(%ebx),%xmm1
643	paddd	%xmm2,%xmm0
644	movdqa	96(%ebx),%xmm7
645	pxor	%xmm0,%xmm6
646	movdqa	%xmm0,-96(%ebx)
647	pshufb	16(%eax),%xmm6
648	paddd	%xmm6,%xmm4
649	movdqa	%xmm6,80(%ebx)
650	pxor	%xmm4,%xmm2
651	paddd	%xmm3,%xmm1
652	movdqa	%xmm2,%xmm0
653	pslld	$7,%xmm2
654	psrld	$25,%xmm0
655	pxor	%xmm1,%xmm7
656	por	%xmm0,%xmm2
657	pshufb	(%eax),%xmm7
658	movdqa	%xmm2,-16(%ebx)
659	paddd	%xmm7,%xmm5
660	pxor	%xmm5,%xmm3
661	movdqa	%xmm3,%xmm0
662	pslld	$12,%xmm3
663	psrld	$20,%xmm0
664	por	%xmm0,%xmm3
665	movdqa	-128(%ebx),%xmm0
666	paddd	%xmm3,%xmm1
667	movdqa	64(%ebx),%xmm6
668	pxor	%xmm1,%xmm7
669	movdqa	%xmm1,-80(%ebx)
670	pshufb	16(%eax),%xmm7
671	paddd	%xmm7,%xmm5
672	movdqa	%xmm7,96(%ebx)
673	pxor	%xmm5,%xmm3
674	movdqa	%xmm3,%xmm1
675	pslld	$7,%xmm3
676	psrld	$25,%xmm1
677	por	%xmm1,%xmm3
678	decl	%edx
679	jnz	L010loop
680	movdqa	%xmm3,-64(%ebx)
681	movdqa	%xmm4,(%ebx)
682	movdqa	%xmm5,16(%ebx)
683	movdqa	%xmm6,64(%ebx)
684	movdqa	%xmm7,96(%ebx)
685	movdqa	-112(%ebx),%xmm1
686	movdqa	-96(%ebx),%xmm2
687	movdqa	-80(%ebx),%xmm3
688	paddd	-128(%ebp),%xmm0
689	paddd	-112(%ebp),%xmm1
690	paddd	-96(%ebp),%xmm2
691	paddd	-80(%ebp),%xmm3
692	movdqa	%xmm0,%xmm6
693	punpckldq	%xmm1,%xmm0
694	movdqa	%xmm2,%xmm7
695	punpckldq	%xmm3,%xmm2
696	punpckhdq	%xmm1,%xmm6
697	punpckhdq	%xmm3,%xmm7
698	movdqa	%xmm0,%xmm1
699	punpcklqdq	%xmm2,%xmm0
700	movdqa	%xmm6,%xmm3
701	punpcklqdq	%xmm7,%xmm6
702	punpckhqdq	%xmm2,%xmm1
703	punpckhqdq	%xmm7,%xmm3
704	movdqu	-128(%esi),%xmm4
705	movdqu	-64(%esi),%xmm5
706	movdqu	(%esi),%xmm2
707	movdqu	64(%esi),%xmm7
708	leal	16(%esi),%esi
709	pxor	%xmm0,%xmm4
710	movdqa	-64(%ebx),%xmm0
711	pxor	%xmm1,%xmm5
712	movdqa	-48(%ebx),%xmm1
713	pxor	%xmm2,%xmm6
714	movdqa	-32(%ebx),%xmm2
715	pxor	%xmm3,%xmm7
716	movdqa	-16(%ebx),%xmm3
717	movdqu	%xmm4,-128(%edi)
718	movdqu	%xmm5,-64(%edi)
719	movdqu	%xmm6,(%edi)
720	movdqu	%xmm7,64(%edi)
721	leal	16(%edi),%edi
722	paddd	-64(%ebp),%xmm0
723	paddd	-48(%ebp),%xmm1
724	paddd	-32(%ebp),%xmm2
725	paddd	-16(%ebp),%xmm3
726	movdqa	%xmm0,%xmm6
727	punpckldq	%xmm1,%xmm0
728	movdqa	%xmm2,%xmm7
729	punpckldq	%xmm3,%xmm2
730	punpckhdq	%xmm1,%xmm6
731	punpckhdq	%xmm3,%xmm7
732	movdqa	%xmm0,%xmm1
733	punpcklqdq	%xmm2,%xmm0
734	movdqa	%xmm6,%xmm3
735	punpcklqdq	%xmm7,%xmm6
736	punpckhqdq	%xmm2,%xmm1
737	punpckhqdq	%xmm7,%xmm3
738	movdqu	-128(%esi),%xmm4
739	movdqu	-64(%esi),%xmm5
740	movdqu	(%esi),%xmm2
741	movdqu	64(%esi),%xmm7
742	leal	16(%esi),%esi
743	pxor	%xmm0,%xmm4
744	movdqa	(%ebx),%xmm0
745	pxor	%xmm1,%xmm5
746	movdqa	16(%ebx),%xmm1
747	pxor	%xmm2,%xmm6
748	movdqa	32(%ebx),%xmm2
749	pxor	%xmm3,%xmm7
750	movdqa	48(%ebx),%xmm3
751	movdqu	%xmm4,-128(%edi)
752	movdqu	%xmm5,-64(%edi)
753	movdqu	%xmm6,(%edi)
754	movdqu	%xmm7,64(%edi)
755	leal	16(%edi),%edi
756	paddd	(%ebp),%xmm0
757	paddd	16(%ebp),%xmm1
758	paddd	32(%ebp),%xmm2
759	paddd	48(%ebp),%xmm3
760	movdqa	%xmm0,%xmm6
761	punpckldq	%xmm1,%xmm0
762	movdqa	%xmm2,%xmm7
763	punpckldq	%xmm3,%xmm2
764	punpckhdq	%xmm1,%xmm6
765	punpckhdq	%xmm3,%xmm7
766	movdqa	%xmm0,%xmm1
767	punpcklqdq	%xmm2,%xmm0
768	movdqa	%xmm6,%xmm3
769	punpcklqdq	%xmm7,%xmm6
770	punpckhqdq	%xmm2,%xmm1
771	punpckhqdq	%xmm7,%xmm3
772	movdqu	-128(%esi),%xmm4
773	movdqu	-64(%esi),%xmm5
774	movdqu	(%esi),%xmm2
775	movdqu	64(%esi),%xmm7
776	leal	16(%esi),%esi
777	pxor	%xmm0,%xmm4
778	movdqa	64(%ebx),%xmm0
779	pxor	%xmm1,%xmm5
780	movdqa	80(%ebx),%xmm1
781	pxor	%xmm2,%xmm6
782	movdqa	96(%ebx),%xmm2
783	pxor	%xmm3,%xmm7
784	movdqa	112(%ebx),%xmm3
785	movdqu	%xmm4,-128(%edi)
786	movdqu	%xmm5,-64(%edi)
787	movdqu	%xmm6,(%edi)
788	movdqu	%xmm7,64(%edi)
789	leal	16(%edi),%edi
790	paddd	64(%ebp),%xmm0
791	paddd	80(%ebp),%xmm1
792	paddd	96(%ebp),%xmm2
793	paddd	112(%ebp),%xmm3
794	movdqa	%xmm0,%xmm6
795	punpckldq	%xmm1,%xmm0
796	movdqa	%xmm2,%xmm7
797	punpckldq	%xmm3,%xmm2
798	punpckhdq	%xmm1,%xmm6
799	punpckhdq	%xmm3,%xmm7
800	movdqa	%xmm0,%xmm1
801	punpcklqdq	%xmm2,%xmm0
802	movdqa	%xmm6,%xmm3
803	punpcklqdq	%xmm7,%xmm6
804	punpckhqdq	%xmm2,%xmm1
805	punpckhqdq	%xmm7,%xmm3
806	movdqu	-128(%esi),%xmm4
807	movdqu	-64(%esi),%xmm5
808	movdqu	(%esi),%xmm2
809	movdqu	64(%esi),%xmm7
810	leal	208(%esi),%esi
811	pxor	%xmm0,%xmm4
812	pxor	%xmm1,%xmm5
813	pxor	%xmm2,%xmm6
814	pxor	%xmm3,%xmm7
815	movdqu	%xmm4,-128(%edi)
816	movdqu	%xmm5,-64(%edi)
817	movdqu	%xmm6,(%edi)
818	movdqu	%xmm7,64(%edi)
819	leal	208(%edi),%edi
820	subl	$256,%ecx
821	jnc	L009outer_loop
822	addl	$256,%ecx
823	jz	L011done
824	movl	520(%esp),%ebx
825	leal	-128(%esi),%esi
826	movl	516(%esp),%edx
827	leal	-128(%edi),%edi
828	movd	64(%ebp),%xmm2
829	movdqu	(%ebx),%xmm3
830	paddd	96(%eax),%xmm2
831	pand	112(%eax),%xmm3
832	por	%xmm2,%xmm3
833L0081x:
834	movdqa	32(%eax),%xmm0
835	movdqu	(%edx),%xmm1
836	movdqu	16(%edx),%xmm2
837	movdqa	(%eax),%xmm6
838	movdqa	16(%eax),%xmm7
839	movl	%ebp,48(%esp)
840	movdqa	%xmm0,(%esp)
841	movdqa	%xmm1,16(%esp)
842	movdqa	%xmm2,32(%esp)
843	movdqa	%xmm3,48(%esp)
844	movl	$10,%edx
845	jmp	L012loop1x
846.align	4,0x90
847L013outer1x:
848	movdqa	80(%eax),%xmm3
849	movdqa	(%esp),%xmm0
850	movdqa	16(%esp),%xmm1
851	movdqa	32(%esp),%xmm2
852	paddd	48(%esp),%xmm3
853	movl	$10,%edx
854	movdqa	%xmm3,48(%esp)
855	jmp	L012loop1x
856.align	4,0x90
857L012loop1x:
858	paddd	%xmm1,%xmm0
859	pxor	%xmm0,%xmm3
860.byte	102,15,56,0,222
861	paddd	%xmm3,%xmm2
862	pxor	%xmm2,%xmm1
863	movdqa	%xmm1,%xmm4
864	psrld	$20,%xmm1
865	pslld	$12,%xmm4
866	por	%xmm4,%xmm1
867	paddd	%xmm1,%xmm0
868	pxor	%xmm0,%xmm3
869.byte	102,15,56,0,223
870	paddd	%xmm3,%xmm2
871	pxor	%xmm2,%xmm1
872	movdqa	%xmm1,%xmm4
873	psrld	$25,%xmm1
874	pslld	$7,%xmm4
875	por	%xmm4,%xmm1
876	pshufd	$78,%xmm2,%xmm2
877	pshufd	$57,%xmm1,%xmm1
878	pshufd	$147,%xmm3,%xmm3
879	nop
880	paddd	%xmm1,%xmm0
881	pxor	%xmm0,%xmm3
882.byte	102,15,56,0,222
883	paddd	%xmm3,%xmm2
884	pxor	%xmm2,%xmm1
885	movdqa	%xmm1,%xmm4
886	psrld	$20,%xmm1
887	pslld	$12,%xmm4
888	por	%xmm4,%xmm1
889	paddd	%xmm1,%xmm0
890	pxor	%xmm0,%xmm3
891.byte	102,15,56,0,223
892	paddd	%xmm3,%xmm2
893	pxor	%xmm2,%xmm1
894	movdqa	%xmm1,%xmm4
895	psrld	$25,%xmm1
896	pslld	$7,%xmm4
897	por	%xmm4,%xmm1
898	pshufd	$78,%xmm2,%xmm2
899	pshufd	$147,%xmm1,%xmm1
900	pshufd	$57,%xmm3,%xmm3
901	decl	%edx
902	jnz	L012loop1x
903	paddd	(%esp),%xmm0
904	paddd	16(%esp),%xmm1
905	paddd	32(%esp),%xmm2
906	paddd	48(%esp),%xmm3
907	cmpl	$64,%ecx
908	jb	L014tail
909	movdqu	(%esi),%xmm4
910	movdqu	16(%esi),%xmm5
911	pxor	%xmm4,%xmm0
912	movdqu	32(%esi),%xmm4
913	pxor	%xmm5,%xmm1
914	movdqu	48(%esi),%xmm5
915	pxor	%xmm4,%xmm2
916	pxor	%xmm5,%xmm3
917	leal	64(%esi),%esi
918	movdqu	%xmm0,(%edi)
919	movdqu	%xmm1,16(%edi)
920	movdqu	%xmm2,32(%edi)
921	movdqu	%xmm3,48(%edi)
922	leal	64(%edi),%edi
923	subl	$64,%ecx
924	jnz	L013outer1x
925	jmp	L011done
926L014tail:
927	movdqa	%xmm0,(%esp)
928	movdqa	%xmm1,16(%esp)
929	movdqa	%xmm2,32(%esp)
930	movdqa	%xmm3,48(%esp)
931	xorl	%eax,%eax
932	xorl	%edx,%edx
933	xorl	%ebp,%ebp
934L015tail_loop:
935	movb	(%esp,%ebp,1),%al
936	movb	(%esi,%ebp,1),%dl
937	leal	1(%ebp),%ebp
938	xorb	%dl,%al
939	movb	%al,-1(%edi,%ebp,1)
940	decl	%ecx
941	jnz	L015tail_loop
942L011done:
943	movl	512(%esp),%esp
944	popl	%edi
945	popl	%esi
946	popl	%ebx
947	popl	%ebp
948	ret
949.align	6,0x90
950Lssse3_data:
951.byte	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
952.byte	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
953.long	1634760805,857760878,2036477234,1797285236
954.long	0,1,2,3
955.long	4,4,4,4
956.long	1,0,0,0
957.long	4,0,0,0
958.long	0,-1,-1,-1
959.align	6,0x90
960.byte	67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
961.byte	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
962.byte	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
963.byte	114,103,62,0
964.section __IMPORT,__pointers,non_lazy_symbol_pointers
965L_OPENSSL_ia32cap_P$non_lazy_ptr:
966.indirect_symbol	_OPENSSL_ia32cap_P
967.long	0
968#endif
969