1# salsa20_pm.s version 20051229
2# D. J. Bernstein
3# Public domain.
4
5# enter ECRYPT_encrypt_bytes
6.text
7.p2align 5
8.globl ECRYPT_encrypt_bytes
9ECRYPT_encrypt_bytes:
10	mov	%esp,%eax
11	and	$31,%eax
12	add	$256,%eax
13	sub	%eax,%esp
14	# eax_stack = eax
15	movl	%eax,80(%esp)
16	# ebx_stack = ebx
17	movl	%ebx,84(%esp)
18	# esi_stack = esi
19	movl	%esi,88(%esp)
20	# edi_stack = edi
21	movl	%edi,92(%esp)
22	# ebp_stack = ebp
23	movl	%ebp,96(%esp)
24	# x = arg1
25	movl	4(%esp,%eax),%edx
26	# m = arg2
27	movl	8(%esp,%eax),%esi
28	# out = arg3
29	movl	12(%esp,%eax),%edi
30	# bytes = arg4
31	movl	16(%esp,%eax),%ebx
32	# bytes -= 0
33	sub	$0,%ebx
34	# goto done if unsigned<=
35	jbe	._done
36._start:
37	# in0 = *(uint32 *) (x + 0)
38	movl	0(%edx),%eax
39	# in1 = *(uint32 *) (x + 4)
40	movl	4(%edx),%ecx
41	# in2 = *(uint32 *) (x + 8)
42	movl	8(%edx),%ebp
43	# j0 = in0
44	movl	%eax,164(%esp)
45	# in3 = *(uint32 *) (x + 12)
46	movl	12(%edx),%eax
47	# j1 = in1
48	movl	%ecx,168(%esp)
49	# in4 = *(uint32 *) (x + 16)
50	movl	16(%edx),%ecx
51	# j2 = in2
52	movl	%ebp,172(%esp)
53	# in5 = *(uint32 *) (x + 20)
54	movl	20(%edx),%ebp
55	# j3 = in3
56	movl	%eax,176(%esp)
57	# in6 = *(uint32 *) (x + 24)
58	movl	24(%edx),%eax
59	# j4 = in4
60	movl	%ecx,180(%esp)
61	# in7 = *(uint32 *) (x + 28)
62	movl	28(%edx),%ecx
63	# j5 = in5
64	movl	%ebp,184(%esp)
65	# in8 = *(uint32 *) (x + 32)
66	movl	32(%edx),%ebp
67	# j6 = in6
68	movl	%eax,188(%esp)
69	# in9 = *(uint32 *) (x + 36)
70	movl	36(%edx),%eax
71	# j7 = in7
72	movl	%ecx,192(%esp)
73	# in10 = *(uint32 *) (x + 40)
74	movl	40(%edx),%ecx
75	# j8 = in8
76	movl	%ebp,196(%esp)
77	# in11 = *(uint32 *) (x + 44)
78	movl	44(%edx),%ebp
79	# j9 = in9
80	movl	%eax,200(%esp)
81	# in12 = *(uint32 *) (x + 48)
82	movl	48(%edx),%eax
83	# j10 = in10
84	movl	%ecx,204(%esp)
85	# in13 = *(uint32 *) (x + 52)
86	movl	52(%edx),%ecx
87	# j11 = in11
88	movl	%ebp,208(%esp)
89	# in14 = *(uint32 *) (x + 56)
90	movl	56(%edx),%ebp
91	# j12 = in12
92	movl	%eax,212(%esp)
93	# in15 = *(uint32 *) (x + 60)
94	movl	60(%edx),%eax
95	# j13 = in13
96	movl	%ecx,216(%esp)
97	# j14 = in14
98	movl	%ebp,220(%esp)
99	# j15 = in15
100	movl	%eax,224(%esp)
101	# x_backup = x
102	movl	%edx,64(%esp)
103._bytesatleast1:
104	#   bytes - 64
105	cmp	$64,%ebx
106	#   goto nocopy if unsigned>=
107	jae	._nocopy
108	#     ctarget = out
109	movl	%edi,228(%esp)
110	#     out = &tmp
111	leal	0(%esp),%edi
112	#     i = bytes
113	mov	%ebx,%ecx
114	#     while (i) { *out++ = *m++; --i }
115	rep	movsb
116	#     out = &tmp
117	leal	0(%esp),%edi
118	#     m = &tmp
119	leal	0(%esp),%esi
120._nocopy:
121	#   out_backup = out
122	movl	%edi,72(%esp)
123	#   m_backup = m
124	movl	%esi,68(%esp)
125	#   bytes_backup = bytes
126	movl	%ebx,76(%esp)
127	#   in0 = j0
128	movl	164(%esp),%eax
129	#   in1 = j1
130	movl	168(%esp),%ecx
131	#   in2 = j2
132	movl	172(%esp),%edx
133	#   in3 = j3
134	movl	176(%esp),%ebx
135	#   x0 = in0
136	movl	%eax,100(%esp)
137	#   x1 = in1
138	movl	%ecx,104(%esp)
139	#   x2 = in2
140	movl	%edx,108(%esp)
141	#   x3 = in3
142	movl	%ebx,112(%esp)
143	#   in4 = j4
144	movl	180(%esp),%eax
145	#   in5 = j5
146	movl	184(%esp),%ecx
147	#   in6 = j6
148	movl	188(%esp),%edx
149	#   in7 = j7
150	movl	192(%esp),%ebx
151	#   x4 = in4
152	movl	%eax,116(%esp)
153	#   x5 = in5
154	movl	%ecx,120(%esp)
155	#   x6 = in6
156	movl	%edx,124(%esp)
157	#   x7 = in7
158	movl	%ebx,128(%esp)
159	#   in8 = j8
160	movl	196(%esp),%eax
161	#   in9 = j9
162	movl	200(%esp),%ecx
163	#   in10 = j10
164	movl	204(%esp),%edx
165	#   in11 = j11
166	movl	208(%esp),%ebx
167	#   x8 = in8
168	movl	%eax,132(%esp)
169	#   x9 = in9
170	movl	%ecx,136(%esp)
171	#   x10 = in10
172	movl	%edx,140(%esp)
173	#   x11 = in11
174	movl	%ebx,144(%esp)
175	#   in12 = j12
176	movl	212(%esp),%eax
177	#   in13 = j13
178	movl	216(%esp),%ecx
179	#   in14 = j14
180	movl	220(%esp),%edx
181	#   in15 = j15
182	movl	224(%esp),%ebx
183	#   x12 = in12
184	movl	%eax,148(%esp)
185	#   x13 = in13
186	movl	%ecx,152(%esp)
187	#   x14 = in14
188	movl	%edx,156(%esp)
189	#   x15 = in15
190	movl	%ebx,160(%esp)
191	#   i = 20
192	mov	$20,%ebp
193	# p = x0
194	movl	100(%esp),%eax
195	# s = x5
196	movl	120(%esp),%ecx
197	# t = x10
198	movl	140(%esp),%edx
199	# w = x15
200	movl	160(%esp),%ebx
201._mainloop:
202	# x0 = p
203	movl	%eax,100(%esp)
204	# 				x10 = t
205	movl	%edx,140(%esp)
206	# p += x12
207	addl	148(%esp),%eax
208	# 		x5 = s
209	movl	%ecx,120(%esp)
210	# 				t += x6
211	addl	124(%esp),%edx
212	# 						x15 = w
213	movl	%ebx,160(%esp)
214	# 		r = x1
215	movl	104(%esp),%esi
216	# 		r += s
217	add	%ecx,%esi
218	# 						v = x11
219	movl	144(%esp),%edi
220	# 						v += w
221	add	%ebx,%edi
222	# p <<<= 7
223	rol	$7,%eax
224	# p ^= x4
225	xorl	116(%esp),%eax
226	# 				t <<<= 7
227	rol	$7,%edx
228	# 				t ^= x14
229	xorl	156(%esp),%edx
230	# 		r <<<= 7
231	rol	$7,%esi
232	# 		r ^= x9
233	xorl	136(%esp),%esi
234	# 						v <<<= 7
235	rol	$7,%edi
236	# 						v ^= x3
237	xorl	112(%esp),%edi
238	# x4 = p
239	movl	%eax,116(%esp)
240	# 				x14 = t
241	movl	%edx,156(%esp)
242	# p += x0
243	addl	100(%esp),%eax
244	# 		x9 = r
245	movl	%esi,136(%esp)
246	# 				t += x10
247	addl	140(%esp),%edx
248	# 						x3 = v
249	movl	%edi,112(%esp)
250	# p <<<= 9
251	rol	$9,%eax
252	# p ^= x8
253	xorl	132(%esp),%eax
254	# 				t <<<= 9
255	rol	$9,%edx
256	# 				t ^= x2
257	xorl	108(%esp),%edx
258	# 		s += r
259	add	%esi,%ecx
260	# 		s <<<= 9
261	rol	$9,%ecx
262	# 		s ^= x13
263	xorl	152(%esp),%ecx
264	# 						w += v
265	add	%edi,%ebx
266	# 						w <<<= 9
267	rol	$9,%ebx
268	# 						w ^= x7
269	xorl	128(%esp),%ebx
270	# x8 = p
271	movl	%eax,132(%esp)
272	# 				x2 = t
273	movl	%edx,108(%esp)
274	# p += x4
275	addl	116(%esp),%eax
276	# 		x13 = s
277	movl	%ecx,152(%esp)
278	# 				t += x14
279	addl	156(%esp),%edx
280	# 						x7 = w
281	movl	%ebx,128(%esp)
282	# p <<<= 13
283	rol	$13,%eax
284	# p ^= x12
285	xorl	148(%esp),%eax
286	# 				t <<<= 13
287	rol	$13,%edx
288	# 				t ^= x6
289	xorl	124(%esp),%edx
290	# 		r += s
291	add	%ecx,%esi
292	# 		r <<<= 13
293	rol	$13,%esi
294	# 		r ^= x1
295	xorl	104(%esp),%esi
296	# 						v += w
297	add	%ebx,%edi
298	# 						v <<<= 13
299	rol	$13,%edi
300	# 						v ^= x11
301	xorl	144(%esp),%edi
302	# x12 = p
303	movl	%eax,148(%esp)
304	# 				x6 = t
305	movl	%edx,124(%esp)
306	# p += x8
307	addl	132(%esp),%eax
308	# 		x1 = r
309	movl	%esi,104(%esp)
310	# 				t += x2
311	addl	108(%esp),%edx
312	# 						x11 = v
313	movl	%edi,144(%esp)
314	# p <<<= 18
315	rol	$18,%eax
316	# p ^= x0
317	xorl	100(%esp),%eax
318	# 				t <<<= 18
319	rol	$18,%edx
320	# 				t ^= x10
321	xorl	140(%esp),%edx
322	# 		s += r
323	add	%esi,%ecx
324	# 		s <<<= 18
325	rol	$18,%ecx
326	# 		s ^= x5
327	xorl	120(%esp),%ecx
328	# 						w += v
329	add	%edi,%ebx
330	# 						w <<<= 18
331	rol	$18,%ebx
332	# 						w ^= x15
333	xorl	160(%esp),%ebx
334	# x0 = p
335	movl	%eax,100(%esp)
336	# 				x10 = t
337	movl	%edx,140(%esp)
338	# p += x3
339	addl	112(%esp),%eax
340	# p <<<= 7
341	rol	$7,%eax
342	# 		x5 = s
343	movl	%ecx,120(%esp)
344	# 				t += x9
345	addl	136(%esp),%edx
346	# 						x15 = w
347	movl	%ebx,160(%esp)
348	# 		r = x4
349	movl	116(%esp),%esi
350	# 		r += s
351	add	%ecx,%esi
352	# 						v = x14
353	movl	156(%esp),%edi
354	# 						v += w
355	add	%ebx,%edi
356	# p ^= x1
357	xorl	104(%esp),%eax
358	# 				t <<<= 7
359	rol	$7,%edx
360	# 				t ^= x11
361	xorl	144(%esp),%edx
362	# 		r <<<= 7
363	rol	$7,%esi
364	# 		r ^= x6
365	xorl	124(%esp),%esi
366	# 						v <<<= 7
367	rol	$7,%edi
368	# 						v ^= x12
369	xorl	148(%esp),%edi
370	# x1 = p
371	movl	%eax,104(%esp)
372	# 				x11 = t
373	movl	%edx,144(%esp)
374	# p += x0
375	addl	100(%esp),%eax
376	# 		x6 = r
377	movl	%esi,124(%esp)
378	# 				t += x10
379	addl	140(%esp),%edx
380	# 						x12 = v
381	movl	%edi,148(%esp)
382	# p <<<= 9
383	rol	$9,%eax
384	# p ^= x2
385	xorl	108(%esp),%eax
386	# 				t <<<= 9
387	rol	$9,%edx
388	# 				t ^= x8
389	xorl	132(%esp),%edx
390	# 		s += r
391	add	%esi,%ecx
392	# 		s <<<= 9
393	rol	$9,%ecx
394	# 		s ^= x7
395	xorl	128(%esp),%ecx
396	# 						w += v
397	add	%edi,%ebx
398	# 						w <<<= 9
399	rol	$9,%ebx
400	# 						w ^= x13
401	xorl	152(%esp),%ebx
402	# x2 = p
403	movl	%eax,108(%esp)
404	# 				x8 = t
405	movl	%edx,132(%esp)
406	# p += x1
407	addl	104(%esp),%eax
408	# 		x7 = s
409	movl	%ecx,128(%esp)
410	# 				t += x11
411	addl	144(%esp),%edx
412	# 						x13 = w
413	movl	%ebx,152(%esp)
414	# p <<<= 13
415	rol	$13,%eax
416	# p ^= x3
417	xorl	112(%esp),%eax
418	# 				t <<<= 13
419	rol	$13,%edx
420	# 				t ^= x9
421	xorl	136(%esp),%edx
422	# 		r += s
423	add	%ecx,%esi
424	# 		r <<<= 13
425	rol	$13,%esi
426	# 		r ^= x4
427	xorl	116(%esp),%esi
428	# 						v += w
429	add	%ebx,%edi
430	# 						v <<<= 13
431	rol	$13,%edi
432	# 						v ^= x14
433	xorl	156(%esp),%edi
434	# x3 = p
435	movl	%eax,112(%esp)
436	# 				x9 = t
437	movl	%edx,136(%esp)
438	# p += x2
439	addl	108(%esp),%eax
440	# 		x4 = r
441	movl	%esi,116(%esp)
442	# 				t += x8
443	addl	132(%esp),%edx
444	# 						x14 = v
445	movl	%edi,156(%esp)
446	# p <<<= 18
447	rol	$18,%eax
448	# p ^= x0
449	xorl	100(%esp),%eax
450	# 				t <<<= 18
451	rol	$18,%edx
452	# 				t ^= x10
453	xorl	140(%esp),%edx
454	# 		s += r
455	add	%esi,%ecx
456	# 		s <<<= 18
457	rol	$18,%ecx
458	# 		s ^= x5
459	xorl	120(%esp),%ecx
460	# 						w += v
461	add	%edi,%ebx
462	# 						w <<<= 18
463	rol	$18,%ebx
464	# 						w ^= x15
465	xorl	160(%esp),%ebx
466	# x0 = p
467	movl	%eax,100(%esp)
468	# 				x10 = t
469	movl	%edx,140(%esp)
470	# p += x12
471	addl	148(%esp),%eax
472	# 		x5 = s
473	movl	%ecx,120(%esp)
474	# 				t += x6
475	addl	124(%esp),%edx
476	# 						x15 = w
477	movl	%ebx,160(%esp)
478	# 		r = x1
479	movl	104(%esp),%esi
480	# 		r += s
481	add	%ecx,%esi
482	# 						v = x11
483	movl	144(%esp),%edi
484	# 						v += w
485	add	%ebx,%edi
486	# p <<<= 7
487	rol	$7,%eax
488	# p ^= x4
489	xorl	116(%esp),%eax
490	# 				t <<<= 7
491	rol	$7,%edx
492	# 				t ^= x14
493	xorl	156(%esp),%edx
494	# 		r <<<= 7
495	rol	$7,%esi
496	# 		r ^= x9
497	xorl	136(%esp),%esi
498	# 						v <<<= 7
499	rol	$7,%edi
500	# 						v ^= x3
501	xorl	112(%esp),%edi
502	# x4 = p
503	movl	%eax,116(%esp)
504	# 				x14 = t
505	movl	%edx,156(%esp)
506	# p += x0
507	addl	100(%esp),%eax
508	# 		x9 = r
509	movl	%esi,136(%esp)
510	# 				t += x10
511	addl	140(%esp),%edx
512	# 						x3 = v
513	movl	%edi,112(%esp)
514	# p <<<= 9
515	rol	$9,%eax
516	# p ^= x8
517	xorl	132(%esp),%eax
518	# 				t <<<= 9
519	rol	$9,%edx
520	# 				t ^= x2
521	xorl	108(%esp),%edx
522	# 		s += r
523	add	%esi,%ecx
524	# 		s <<<= 9
525	rol	$9,%ecx
526	# 		s ^= x13
527	xorl	152(%esp),%ecx
528	# 						w += v
529	add	%edi,%ebx
530	# 						w <<<= 9
531	rol	$9,%ebx
532	# 						w ^= x7
533	xorl	128(%esp),%ebx
534	# x8 = p
535	movl	%eax,132(%esp)
536	# 				x2 = t
537	movl	%edx,108(%esp)
538	# p += x4
539	addl	116(%esp),%eax
540	# 		x13 = s
541	movl	%ecx,152(%esp)
542	# 				t += x14
543	addl	156(%esp),%edx
544	# 						x7 = w
545	movl	%ebx,128(%esp)
546	# p <<<= 13
547	rol	$13,%eax
548	# p ^= x12
549	xorl	148(%esp),%eax
550	# 				t <<<= 13
551	rol	$13,%edx
552	# 				t ^= x6
553	xorl	124(%esp),%edx
554	# 		r += s
555	add	%ecx,%esi
556	# 		r <<<= 13
557	rol	$13,%esi
558	# 		r ^= x1
559	xorl	104(%esp),%esi
560	# 						v += w
561	add	%ebx,%edi
562	# 						v <<<= 13
563	rol	$13,%edi
564	# 						v ^= x11
565	xorl	144(%esp),%edi
566	# x12 = p
567	movl	%eax,148(%esp)
568	# 				x6 = t
569	movl	%edx,124(%esp)
570	# p += x8
571	addl	132(%esp),%eax
572	# 		x1 = r
573	movl	%esi,104(%esp)
574	# 				t += x2
575	addl	108(%esp),%edx
576	# 						x11 = v
577	movl	%edi,144(%esp)
578	# p <<<= 18
579	rol	$18,%eax
580	# p ^= x0
581	xorl	100(%esp),%eax
582	# 				t <<<= 18
583	rol	$18,%edx
584	# 				t ^= x10
585	xorl	140(%esp),%edx
586	# 		s += r
587	add	%esi,%ecx
588	# 		s <<<= 18
589	rol	$18,%ecx
590	# 		s ^= x5
591	xorl	120(%esp),%ecx
592	# 						w += v
593	add	%edi,%ebx
594	# 						w <<<= 18
595	rol	$18,%ebx
596	# 						w ^= x15
597	xorl	160(%esp),%ebx
598	# x0 = p
599	movl	%eax,100(%esp)
600	# 				x10 = t
601	movl	%edx,140(%esp)
602	# p += x3
603	addl	112(%esp),%eax
604	# p <<<= 7
605	rol	$7,%eax
606	# 		x5 = s
607	movl	%ecx,120(%esp)
608	# 				t += x9
609	addl	136(%esp),%edx
610	# 						x15 = w
611	movl	%ebx,160(%esp)
612	# 		r = x4
613	movl	116(%esp),%esi
614	# 		r += s
615	add	%ecx,%esi
616	# 						v = x14
617	movl	156(%esp),%edi
618	# 						v += w
619	add	%ebx,%edi
620	# p ^= x1
621	xorl	104(%esp),%eax
622	# 				t <<<= 7
623	rol	$7,%edx
624	# 				t ^= x11
625	xorl	144(%esp),%edx
626	# 		r <<<= 7
627	rol	$7,%esi
628	# 		r ^= x6
629	xorl	124(%esp),%esi
630	# 						v <<<= 7
631	rol	$7,%edi
632	# 						v ^= x12
633	xorl	148(%esp),%edi
634	# x1 = p
635	movl	%eax,104(%esp)
636	# 				x11 = t
637	movl	%edx,144(%esp)
638	# p += x0
639	addl	100(%esp),%eax
640	# 		x6 = r
641	movl	%esi,124(%esp)
642	# 				t += x10
643	addl	140(%esp),%edx
644	# 						x12 = v
645	movl	%edi,148(%esp)
646	# p <<<= 9
647	rol	$9,%eax
648	# p ^= x2
649	xorl	108(%esp),%eax
650	# 				t <<<= 9
651	rol	$9,%edx
652	# 				t ^= x8
653	xorl	132(%esp),%edx
654	# 		s += r
655	add	%esi,%ecx
656	# 		s <<<= 9
657	rol	$9,%ecx
658	# 		s ^= x7
659	xorl	128(%esp),%ecx
660	# 						w += v
661	add	%edi,%ebx
662	# 						w <<<= 9
663	rol	$9,%ebx
664	# 						w ^= x13
665	xorl	152(%esp),%ebx
666	# x2 = p
667	movl	%eax,108(%esp)
668	# 				x8 = t
669	movl	%edx,132(%esp)
670	# p += x1
671	addl	104(%esp),%eax
672	# 		x7 = s
673	movl	%ecx,128(%esp)
674	# 				t += x11
675	addl	144(%esp),%edx
676	# 						x13 = w
677	movl	%ebx,152(%esp)
678	# p <<<= 13
679	rol	$13,%eax
680	# p ^= x3
681	xorl	112(%esp),%eax
682	# 				t <<<= 13
683	rol	$13,%edx
684	# 				t ^= x9
685	xorl	136(%esp),%edx
686	# 		r += s
687	add	%ecx,%esi
688	# 		r <<<= 13
689	rol	$13,%esi
690	# 		r ^= x4
691	xorl	116(%esp),%esi
692	# 						v += w
693	add	%ebx,%edi
694	# 						v <<<= 13
695	rol	$13,%edi
696	# 						v ^= x14
697	xorl	156(%esp),%edi
698	# x3 = p
699	movl	%eax,112(%esp)
700	# 				x9 = t
701	movl	%edx,136(%esp)
702	# p += x2
703	addl	108(%esp),%eax
704	# 		x4 = r
705	movl	%esi,116(%esp)
706	# 				t += x8
707	addl	132(%esp),%edx
708	# 						x14 = v
709	movl	%edi,156(%esp)
710	# p <<<= 18
711	rol	$18,%eax
712	# p ^= x0
713	xorl	100(%esp),%eax
714	# 				t <<<= 18
715	rol	$18,%edx
716	# 				t ^= x10
717	xorl	140(%esp),%edx
718	# 		s += r
719	add	%esi,%ecx
720	# 		s <<<= 18
721	rol	$18,%ecx
722	# 		s ^= x5
723	xorl	120(%esp),%ecx
724	# 						w += v
725	add	%edi,%ebx
726	# 						w <<<= 18
727	rol	$18,%ebx
728	# 						w ^= x15
729	xorl	160(%esp),%ebx
730	# i -= 4
731	sub	$4,%ebp
732	# goto mainloop if unsigned >
733	ja	._mainloop
734	# x0 = p
735	movl	%eax,100(%esp)
736	# x5 = s
737	movl	%ecx,120(%esp)
738	# x10 = t
739	movl	%edx,140(%esp)
740	# x15 = w
741	movl	%ebx,160(%esp)
742	#   out = out_backup
743	movl	72(%esp),%edi
744	#   m = m_backup
745	movl	68(%esp),%esi
746	#   in0 = x0
747	movl	100(%esp),%eax
748	#   in1 = x1
749	movl	104(%esp),%ecx
750	#   in0 += j0
751	addl	164(%esp),%eax
752	#   in1 += j1
753	addl	168(%esp),%ecx
754	#   in0 ^= *(uint32 *) (m + 0)
755	xorl	0(%esi),%eax
756	#   in1 ^= *(uint32 *) (m + 4)
757	xorl	4(%esi),%ecx
758	#   *(uint32 *) (out + 0) = in0
759	movl	%eax,0(%edi)
760	#   *(uint32 *) (out + 4) = in1
761	movl	%ecx,4(%edi)
762	#   in2 = x2
763	movl	108(%esp),%eax
764	#   in3 = x3
765	movl	112(%esp),%ecx
766	#   in2 += j2
767	addl	172(%esp),%eax
768	#   in3 += j3
769	addl	176(%esp),%ecx
770	#   in2 ^= *(uint32 *) (m + 8)
771	xorl	8(%esi),%eax
772	#   in3 ^= *(uint32 *) (m + 12)
773	xorl	12(%esi),%ecx
774	#   *(uint32 *) (out + 8) = in2
775	movl	%eax,8(%edi)
776	#   *(uint32 *) (out + 12) = in3
777	movl	%ecx,12(%edi)
778	#   in4 = x4
779	movl	116(%esp),%eax
780	#   in5 = x5
781	movl	120(%esp),%ecx
782	#   in4 += j4
783	addl	180(%esp),%eax
784	#   in5 += j5
785	addl	184(%esp),%ecx
786	#   in4 ^= *(uint32 *) (m + 16)
787	xorl	16(%esi),%eax
788	#   in5 ^= *(uint32 *) (m + 20)
789	xorl	20(%esi),%ecx
790	#   *(uint32 *) (out + 16) = in4
791	movl	%eax,16(%edi)
792	#   *(uint32 *) (out + 20) = in5
793	movl	%ecx,20(%edi)
794	#   in6 = x6
795	movl	124(%esp),%eax
796	#   in7 = x7
797	movl	128(%esp),%ecx
798	#   in6 += j6
799	addl	188(%esp),%eax
800	#   in7 += j7
801	addl	192(%esp),%ecx
802	#   in6 ^= *(uint32 *) (m + 24)
803	xorl	24(%esi),%eax
804	#   in7 ^= *(uint32 *) (m + 28)
805	xorl	28(%esi),%ecx
806	#   *(uint32 *) (out + 24) = in6
807	movl	%eax,24(%edi)
808	#   *(uint32 *) (out + 28) = in7
809	movl	%ecx,28(%edi)
810	#   in8 = x8
811	movl	132(%esp),%eax
812	#   in9 = x9
813	movl	136(%esp),%ecx
814	#   in8 += j8
815	addl	196(%esp),%eax
816	#   in9 += j9
817	addl	200(%esp),%ecx
818	#   in8 ^= *(uint32 *) (m + 32)
819	xorl	32(%esi),%eax
820	#   in9 ^= *(uint32 *) (m + 36)
821	xorl	36(%esi),%ecx
822	#   *(uint32 *) (out + 32) = in8
823	movl	%eax,32(%edi)
824	#   *(uint32 *) (out + 36) = in9
825	movl	%ecx,36(%edi)
826	#   in10 = x10
827	movl	140(%esp),%eax
828	#   in11 = x11
829	movl	144(%esp),%ecx
830	#   in10 += j10
831	addl	204(%esp),%eax
832	#   in11 += j11
833	addl	208(%esp),%ecx
834	#   in10 ^= *(uint32 *) (m + 40)
835	xorl	40(%esi),%eax
836	#   in11 ^= *(uint32 *) (m + 44)
837	xorl	44(%esi),%ecx
838	#   *(uint32 *) (out + 40) = in10
839	movl	%eax,40(%edi)
840	#   *(uint32 *) (out + 44) = in11
841	movl	%ecx,44(%edi)
842	#   in12 = x12
843	movl	148(%esp),%eax
844	#   in13 = x13
845	movl	152(%esp),%ecx
846	#   in12 += j12
847	addl	212(%esp),%eax
848	#   in13 += j13
849	addl	216(%esp),%ecx
850	#   in12 ^= *(uint32 *) (m + 48)
851	xorl	48(%esi),%eax
852	#   in13 ^= *(uint32 *) (m + 52)
853	xorl	52(%esi),%ecx
854	#   *(uint32 *) (out + 48) = in12
855	movl	%eax,48(%edi)
856	#   *(uint32 *) (out + 52) = in13
857	movl	%ecx,52(%edi)
858	#   in14 = x14
859	movl	156(%esp),%eax
860	#   in15 = x15
861	movl	160(%esp),%ecx
862	#   in14 += j14
863	addl	220(%esp),%eax
864	#   in15 += j15
865	addl	224(%esp),%ecx
866	#   in14 ^= *(uint32 *) (m + 56)
867	xorl	56(%esi),%eax
868	#   in15 ^= *(uint32 *) (m + 60)
869	xorl	60(%esi),%ecx
870	#   *(uint32 *) (out + 56) = in14
871	movl	%eax,56(%edi)
872	#   *(uint32 *) (out + 60) = in15
873	movl	%ecx,60(%edi)
874	#   bytes = bytes_backup
875	movl	76(%esp),%ebx
876	#   in8 = j8
877	movl	196(%esp),%eax
878	#   in9 = j9
879	movl	200(%esp),%ecx
880	#   in8 += 1
881	add	$1,%eax
882	#   in9 += 0 + carry
883	adc	$0,%ecx
884	#   j8 = in8
885	movl	%eax,196(%esp)
886	#   j9 = in9
887	movl	%ecx,200(%esp)
888	#   bytes - 64
889	cmp	$64,%ebx
890	#   goto bytesatleast65 if unsigned>
891	ja	._bytesatleast65
892	#     goto bytesatleast64 if unsigned>=
893	jae	._bytesatleast64
894	#       m = out
895	mov	%edi,%esi
896	#       out = ctarget
897	movl	228(%esp),%edi
898	#       i = bytes
899	mov	%ebx,%ecx
900	#       while (i) { *out++ = *m++; --i }
901	rep	movsb
902._bytesatleast64:
903	#     x = x_backup
904	movl	64(%esp),%eax
905	#     in8 = j8
906	movl	196(%esp),%ecx
907	#     in9 = j9
908	movl	200(%esp),%edx
909	#     *(uint32 *) (x + 32) = in8
910	movl	%ecx,32(%eax)
911	#     *(uint32 *) (x + 36) = in9
912	movl	%edx,36(%eax)
913._done:
914	#     eax = eax_stack
915	movl	80(%esp),%eax
916	#     ebx = ebx_stack
917	movl	84(%esp),%ebx
918	#     esi = esi_stack
919	movl	88(%esp),%esi
920	#     edi = edi_stack
921	movl	92(%esp),%edi
922	#     ebp = ebp_stack
923	movl	96(%esp),%ebp
924	#     leave
925	add	%eax,%esp
926	ret
927._bytesatleast65:
928	#   bytes -= 64
929	sub	$64,%ebx
930	#   out += 64
931	add	$64,%edi
932	#   m += 64
933	add	$64,%esi
934	# goto bytesatleast1
935	jmp	._bytesatleast1
936# enter ECRYPT_keysetup
937.text
938.p2align 5
939.globl ECRYPT_keysetup
940ECRYPT_keysetup:
941	mov	%esp,%eax
942	and	$31,%eax
943	add	$256,%eax
944	sub	%eax,%esp
945	#   eax_stack = eax
946	movl	%eax,64(%esp)
947	#   ebx_stack = ebx
948	movl	%ebx,68(%esp)
949	#   esi_stack = esi
950	movl	%esi,72(%esp)
951	#   edi_stack = edi
952	movl	%edi,76(%esp)
953	#   ebp_stack = ebp
954	movl	%ebp,80(%esp)
955	#   k = arg2
956	movl	8(%esp,%eax),%ecx
957	#   kbits = arg3
958	movl	12(%esp,%eax),%edx
959	#   x = arg1
960	movl	4(%esp,%eax),%eax
961	#   in1 = *(uint32 *) (k + 0)
962	movl	0(%ecx),%ebx
963	#   in2 = *(uint32 *) (k + 4)
964	movl	4(%ecx),%esi
965	#   in3 = *(uint32 *) (k + 8)
966	movl	8(%ecx),%edi
967	#   in4 = *(uint32 *) (k + 12)
968	movl	12(%ecx),%ebp
969	#   *(uint32 *) (x + 4) = in1
970	movl	%ebx,4(%eax)
971	#   *(uint32 *) (x + 8) = in2
972	movl	%esi,8(%eax)
973	#   *(uint32 *) (x + 12) = in3
974	movl	%edi,12(%eax)
975	#   *(uint32 *) (x + 16) = in4
976	movl	%ebp,16(%eax)
977	#   kbits - 256
978	cmp	$256,%edx
979	#   goto kbits128 if unsigned<
980	jb	._kbits128
981._kbits256:
982	#     in11 = *(uint32 *) (k + 16)
983	movl	16(%ecx),%edx
984	#     in12 = *(uint32 *) (k + 20)
985	movl	20(%ecx),%ebx
986	#     in13 = *(uint32 *) (k + 24)
987	movl	24(%ecx),%esi
988	#     in14 = *(uint32 *) (k + 28)
989	movl	28(%ecx),%ecx
990	#     *(uint32 *) (x + 44) = in11
991	movl	%edx,44(%eax)
992	#     *(uint32 *) (x + 48) = in12
993	movl	%ebx,48(%eax)
994	#     *(uint32 *) (x + 52) = in13
995	movl	%esi,52(%eax)
996	#     *(uint32 *) (x + 56) = in14
997	movl	%ecx,56(%eax)
998	#     in0 = 1634760805
999	mov	$1634760805,%ecx
1000	#     in5 = 857760878
1001	mov	$857760878,%edx
1002	#     in10 = 2036477234
1003	mov	$2036477234,%ebx
1004	#     in15 = 1797285236
1005	mov	$1797285236,%esi
1006	#     *(uint32 *) (x + 0) = in0
1007	movl	%ecx,0(%eax)
1008	#     *(uint32 *) (x + 20) = in5
1009	movl	%edx,20(%eax)
1010	#     *(uint32 *) (x + 40) = in10
1011	movl	%ebx,40(%eax)
1012	#     *(uint32 *) (x + 60) = in15
1013	movl	%esi,60(%eax)
1014	#   goto keysetupdone
1015	jmp	._keysetupdone
1016._kbits128:
1017	#     in11 = *(uint32 *) (k + 0)
1018	movl	0(%ecx),%edx
1019	#     in12 = *(uint32 *) (k + 4)
1020	movl	4(%ecx),%ebx
1021	#     in13 = *(uint32 *) (k + 8)
1022	movl	8(%ecx),%esi
1023	#     in14 = *(uint32 *) (k + 12)
1024	movl	12(%ecx),%ecx
1025	#     *(uint32 *) (x + 44) = in11
1026	movl	%edx,44(%eax)
1027	#     *(uint32 *) (x + 48) = in12
1028	movl	%ebx,48(%eax)
1029	#     *(uint32 *) (x + 52) = in13
1030	movl	%esi,52(%eax)
1031	#     *(uint32 *) (x + 56) = in14
1032	movl	%ecx,56(%eax)
1033	#     in0 = 1634760805
1034	mov	$1634760805,%ecx
1035	#     in5 = 824206446
1036	mov	$824206446,%edx
1037	#     in10 = 2036477238
1038	mov	$2036477238,%ebx
1039	#     in15 = 1797285236
1040	mov	$1797285236,%esi
1041	#     *(uint32 *) (x + 0) = in0
1042	movl	%ecx,0(%eax)
1043	#     *(uint32 *) (x + 20) = in5
1044	movl	%edx,20(%eax)
1045	#     *(uint32 *) (x + 40) = in10
1046	movl	%ebx,40(%eax)
1047	#     *(uint32 *) (x + 60) = in15
1048	movl	%esi,60(%eax)
1049._keysetupdone:
1050	#   eax = eax_stack
1051	movl	64(%esp),%eax
1052	#   ebx = ebx_stack
1053	movl	68(%esp),%ebx
1054	#   esi = esi_stack
1055	movl	72(%esp),%esi
1056	#   edi = edi_stack
1057	movl	76(%esp),%edi
1058	#   ebp = ebp_stack
1059	movl	80(%esp),%ebp
1060	# leave
1061	add	%eax,%esp
1062	ret
1063# enter ECRYPT_ivsetup
1064.text
1065.p2align 5
1066.globl ECRYPT_ivsetup
1067ECRYPT_ivsetup:
1068	mov	%esp,%eax
1069	and	$31,%eax
1070	add	$256,%eax
1071	sub	%eax,%esp
1072	#   eax_stack = eax
1073	movl	%eax,64(%esp)
1074	#   ebx_stack = ebx
1075	movl	%ebx,68(%esp)
1076	#   esi_stack = esi
1077	movl	%esi,72(%esp)
1078	#   edi_stack = edi
1079	movl	%edi,76(%esp)
1080	#   ebp_stack = ebp
1081	movl	%ebp,80(%esp)
1082	#   iv = arg2
1083	movl	8(%esp,%eax),%ecx
1084	#   x = arg1
1085	movl	4(%esp,%eax),%eax
1086	#   in6 = *(uint32 *) (iv + 0)
1087	movl	0(%ecx),%edx
1088	#   in7 = *(uint32 *) (iv + 4)
1089	movl	4(%ecx),%ecx
1090	#   in8 = 0
1091	mov	$0,%ebx
1092	#   in9 = 0
1093	mov	$0,%esi
1094	#   *(uint32 *) (x + 24) = in6
1095	movl	%edx,24(%eax)
1096	#   *(uint32 *) (x + 28) = in7
1097	movl	%ecx,28(%eax)
1098	#   *(uint32 *) (x + 32) = in8
1099	movl	%ebx,32(%eax)
1100	#   *(uint32 *) (x + 36) = in9
1101	movl	%esi,36(%eax)
1102	#   eax = eax_stack
1103	movl	64(%esp),%eax
1104	#   ebx = ebx_stack
1105	movl	68(%esp),%ebx
1106	#   esi = esi_stack
1107	movl	72(%esp),%esi
1108	#   edi = edi_stack
1109	movl	76(%esp),%edi
1110	#   ebp = ebp_stack
1111	movl	80(%esp),%ebp
1112	# leave
1113	add	%eax,%esp
1114	ret
1115