sha256-mips.S revision ff41a4bc41ae1e1391f9b05117623ff70b985983
1#ifdef OPENSSL_FIPSCANISTER
2# include <openssl/fipssyms.h>
3#endif
4
5.text
6.set	noat
7#if !defined(__vxworks) || defined(__pic__)
8.option	pic2
9#endif
10
11.align	5
12.globl	sha256_block_data_order
13.ent	sha256_block_data_order
14sha256_block_data_order:
15	.frame	$29,128,$31
16	.mask	3237937152,-4
17	.set	noreorder
18	.cpload	$25
19	sub $29,128
20	sw	$31,128-1*4($29)
21	sw	$30,128-2*4($29)
22	sw	$23,128-3*4($29)
23	sw	$22,128-4*4($29)
24	sw	$21,128-5*4($29)
25	sw	$20,128-6*4($29)
26	sw	$19,128-7*4($29)
27	sw	$18,128-8*4($29)
28	sw	$17,128-9*4($29)
29	sw	$16,128-10*4($29)
30	sll $23,$6,6
31	.set	reorder
32	la	$6,K256		# PIC-ified 'load address'
33
34	lw	$1,0*4($4)		# load context
35	lw	$2,1*4($4)
36	lw	$3,2*4($4)
37	lw	$7,3*4($4)
38	lw	$24,4*4($4)
39	lw	$25,5*4($4)
40	lw	$30,6*4($4)
41	lw	$31,7*4($4)
42
43	add $23,$5		# pointer to the end of input
44	sw	$23,16*4($29)
45	b	.Loop
46
47.align	5
48.Loop:
49	lwl	$8,3($5)
50	lwr	$8,0($5)
51	lwl	$9,7($5)
52	lwr	$9,4($5)
53	srl	$13,$8,24		# byte swap(0)
54	srl	$14,$8,8
55	andi	$15,$8,0xFF00
56	sll	$8,$8,24
57	andi	$14,0xFF00
58	sll	$15,$15,8
59	or	$8,$13
60	or	$14,$15
61	or	$8,$14
62	addu	$12,$8,$31			# 0
63	srl	$31,$24,6
64	xor	$15,$25,$30
65	sll	$14,$24,7
66	and	$15,$24
67	srl	$13,$24,11
68	xor	$31,$14
69	sll	$14,$24,21
70	xor	$31,$13
71	srl	$13,$24,25
72	xor	$31,$14
73	sll	$14,$24,26
74	xor	$31,$13
75	xor	$15,$30			# Ch(e,f,g)
76	xor	$13,$14,$31			# Sigma1(e)
77
78	srl	$31,$1,2
79	addu	$12,$15
80	lw	$15,0($6)		# K[0]
81	sll	$14,$1,10
82	addu	$12,$13
83	srl	$13,$1,13
84	xor	$31,$14
85	sll	$14,$1,19
86	xor	$31,$13
87	srl	$13,$1,22
88	xor	$31,$14
89	sll	$14,$1,30
90	xor	$31,$13
91	sw	$8,0($29)	# offload to ring buffer
92	xor	$31,$14			# Sigma0(a)
93
94	or	$13,$1,$2
95	and	$14,$1,$2
96	and	$13,$3
97	or	$14,$13			# Maj(a,b,c)
98	addu	$12,$15			# +=K[0]
99	addu	$31,$14
100
101	addu	$7,$12
102	addu	$31,$12
103	lwl	$10,11($5)
104	lwr	$10,8($5)
105	srl	$14,$9,24		# byte swap(1)
106	srl	$15,$9,8
107	andi	$16,$9,0xFF00
108	sll	$9,$9,24
109	andi	$15,0xFF00
110	sll	$16,$16,8
111	or	$9,$14
112	or	$15,$16
113	or	$9,$15
114	addu	$13,$9,$30			# 1
115	srl	$30,$7,6
116	xor	$16,$24,$25
117	sll	$15,$7,7
118	and	$16,$7
119	srl	$14,$7,11
120	xor	$30,$15
121	sll	$15,$7,21
122	xor	$30,$14
123	srl	$14,$7,25
124	xor	$30,$15
125	sll	$15,$7,26
126	xor	$30,$14
127	xor	$16,$25			# Ch(e,f,g)
128	xor	$14,$15,$30			# Sigma1(e)
129
130	srl	$30,$31,2
131	addu	$13,$16
132	lw	$16,4($6)		# K[1]
133	sll	$15,$31,10
134	addu	$13,$14
135	srl	$14,$31,13
136	xor	$30,$15
137	sll	$15,$31,19
138	xor	$30,$14
139	srl	$14,$31,22
140	xor	$30,$15
141	sll	$15,$31,30
142	xor	$30,$14
143	sw	$9,4($29)	# offload to ring buffer
144	xor	$30,$15			# Sigma0(a)
145
146	or	$14,$31,$1
147	and	$15,$31,$1
148	and	$14,$2
149	or	$15,$14			# Maj(a,b,c)
150	addu	$13,$16			# +=K[1]
151	addu	$30,$15
152
153	addu	$3,$13
154	addu	$30,$13
155	lwl	$11,15($5)
156	lwr	$11,12($5)
157	srl	$15,$10,24		# byte swap(2)
158	srl	$16,$10,8
159	andi	$17,$10,0xFF00
160	sll	$10,$10,24
161	andi	$16,0xFF00
162	sll	$17,$17,8
163	or	$10,$15
164	or	$16,$17
165	or	$10,$16
166	addu	$14,$10,$25			# 2
167	srl	$25,$3,6
168	xor	$17,$7,$24
169	sll	$16,$3,7
170	and	$17,$3
171	srl	$15,$3,11
172	xor	$25,$16
173	sll	$16,$3,21
174	xor	$25,$15
175	srl	$15,$3,25
176	xor	$25,$16
177	sll	$16,$3,26
178	xor	$25,$15
179	xor	$17,$24			# Ch(e,f,g)
180	xor	$15,$16,$25			# Sigma1(e)
181
182	srl	$25,$30,2
183	addu	$14,$17
184	lw	$17,8($6)		# K[2]
185	sll	$16,$30,10
186	addu	$14,$15
187	srl	$15,$30,13
188	xor	$25,$16
189	sll	$16,$30,19
190	xor	$25,$15
191	srl	$15,$30,22
192	xor	$25,$16
193	sll	$16,$30,30
194	xor	$25,$15
195	sw	$10,8($29)	# offload to ring buffer
196	xor	$25,$16			# Sigma0(a)
197
198	or	$15,$30,$31
199	and	$16,$30,$31
200	and	$15,$1
201	or	$16,$15			# Maj(a,b,c)
202	addu	$14,$17			# +=K[2]
203	addu	$25,$16
204
205	addu	$2,$14
206	addu	$25,$14
207	lwl	$12,19($5)
208	lwr	$12,16($5)
209	srl	$16,$11,24		# byte swap(3)
210	srl	$17,$11,8
211	andi	$18,$11,0xFF00
212	sll	$11,$11,24
213	andi	$17,0xFF00
214	sll	$18,$18,8
215	or	$11,$16
216	or	$17,$18
217	or	$11,$17
218	addu	$15,$11,$24			# 3
219	srl	$24,$2,6
220	xor	$18,$3,$7
221	sll	$17,$2,7
222	and	$18,$2
223	srl	$16,$2,11
224	xor	$24,$17
225	sll	$17,$2,21
226	xor	$24,$16
227	srl	$16,$2,25
228	xor	$24,$17
229	sll	$17,$2,26
230	xor	$24,$16
231	xor	$18,$7			# Ch(e,f,g)
232	xor	$16,$17,$24			# Sigma1(e)
233
234	srl	$24,$25,2
235	addu	$15,$18
236	lw	$18,12($6)		# K[3]
237	sll	$17,$25,10
238	addu	$15,$16
239	srl	$16,$25,13
240	xor	$24,$17
241	sll	$17,$25,19
242	xor	$24,$16
243	srl	$16,$25,22
244	xor	$24,$17
245	sll	$17,$25,30
246	xor	$24,$16
247	sw	$11,12($29)	# offload to ring buffer
248	xor	$24,$17			# Sigma0(a)
249
250	or	$16,$25,$30
251	and	$17,$25,$30
252	and	$16,$31
253	or	$17,$16			# Maj(a,b,c)
254	addu	$15,$18			# +=K[3]
255	addu	$24,$17
256
257	addu	$1,$15
258	addu	$24,$15
259	lwl	$13,23($5)
260	lwr	$13,20($5)
261	srl	$17,$12,24		# byte swap(4)
262	srl	$18,$12,8
263	andi	$19,$12,0xFF00
264	sll	$12,$12,24
265	andi	$18,0xFF00
266	sll	$19,$19,8
267	or	$12,$17
268	or	$18,$19
269	or	$12,$18
270	addu	$16,$12,$7			# 4
271	srl	$7,$1,6
272	xor	$19,$2,$3
273	sll	$18,$1,7
274	and	$19,$1
275	srl	$17,$1,11
276	xor	$7,$18
277	sll	$18,$1,21
278	xor	$7,$17
279	srl	$17,$1,25
280	xor	$7,$18
281	sll	$18,$1,26
282	xor	$7,$17
283	xor	$19,$3			# Ch(e,f,g)
284	xor	$17,$18,$7			# Sigma1(e)
285
286	srl	$7,$24,2
287	addu	$16,$19
288	lw	$19,16($6)		# K[4]
289	sll	$18,$24,10
290	addu	$16,$17
291	srl	$17,$24,13
292	xor	$7,$18
293	sll	$18,$24,19
294	xor	$7,$17
295	srl	$17,$24,22
296	xor	$7,$18
297	sll	$18,$24,30
298	xor	$7,$17
299	sw	$12,16($29)	# offload to ring buffer
300	xor	$7,$18			# Sigma0(a)
301
302	or	$17,$24,$25
303	and	$18,$24,$25
304	and	$17,$30
305	or	$18,$17			# Maj(a,b,c)
306	addu	$16,$19			# +=K[4]
307	addu	$7,$18
308
309	addu	$31,$16
310	addu	$7,$16
311	lwl	$14,27($5)
312	lwr	$14,24($5)
313	srl	$18,$13,24		# byte swap(5)
314	srl	$19,$13,8
315	andi	$20,$13,0xFF00
316	sll	$13,$13,24
317	andi	$19,0xFF00
318	sll	$20,$20,8
319	or	$13,$18
320	or	$19,$20
321	or	$13,$19
322	addu	$17,$13,$3			# 5
323	srl	$3,$31,6
324	xor	$20,$1,$2
325	sll	$19,$31,7
326	and	$20,$31
327	srl	$18,$31,11
328	xor	$3,$19
329	sll	$19,$31,21
330	xor	$3,$18
331	srl	$18,$31,25
332	xor	$3,$19
333	sll	$19,$31,26
334	xor	$3,$18
335	xor	$20,$2			# Ch(e,f,g)
336	xor	$18,$19,$3			# Sigma1(e)
337
338	srl	$3,$7,2
339	addu	$17,$20
340	lw	$20,20($6)		# K[5]
341	sll	$19,$7,10
342	addu	$17,$18
343	srl	$18,$7,13
344	xor	$3,$19
345	sll	$19,$7,19
346	xor	$3,$18
347	srl	$18,$7,22
348	xor	$3,$19
349	sll	$19,$7,30
350	xor	$3,$18
351	sw	$13,20($29)	# offload to ring buffer
352	xor	$3,$19			# Sigma0(a)
353
354	or	$18,$7,$24
355	and	$19,$7,$24
356	and	$18,$25
357	or	$19,$18			# Maj(a,b,c)
358	addu	$17,$20			# +=K[5]
359	addu	$3,$19
360
361	addu	$30,$17
362	addu	$3,$17
363	lwl	$15,31($5)
364	lwr	$15,28($5)
365	srl	$19,$14,24		# byte swap(6)
366	srl	$20,$14,8
367	andi	$21,$14,0xFF00
368	sll	$14,$14,24
369	andi	$20,0xFF00
370	sll	$21,$21,8
371	or	$14,$19
372	or	$20,$21
373	or	$14,$20
374	addu	$18,$14,$2			# 6
375	srl	$2,$30,6
376	xor	$21,$31,$1
377	sll	$20,$30,7
378	and	$21,$30
379	srl	$19,$30,11
380	xor	$2,$20
381	sll	$20,$30,21
382	xor	$2,$19
383	srl	$19,$30,25
384	xor	$2,$20
385	sll	$20,$30,26
386	xor	$2,$19
387	xor	$21,$1			# Ch(e,f,g)
388	xor	$19,$20,$2			# Sigma1(e)
389
390	srl	$2,$3,2
391	addu	$18,$21
392	lw	$21,24($6)		# K[6]
393	sll	$20,$3,10
394	addu	$18,$19
395	srl	$19,$3,13
396	xor	$2,$20
397	sll	$20,$3,19
398	xor	$2,$19
399	srl	$19,$3,22
400	xor	$2,$20
401	sll	$20,$3,30
402	xor	$2,$19
403	sw	$14,24($29)	# offload to ring buffer
404	xor	$2,$20			# Sigma0(a)
405
406	or	$19,$3,$7
407	and	$20,$3,$7
408	and	$19,$24
409	or	$20,$19			# Maj(a,b,c)
410	addu	$18,$21			# +=K[6]
411	addu	$2,$20
412
413	addu	$25,$18
414	addu	$2,$18
415	lwl	$16,35($5)
416	lwr	$16,32($5)
417	srl	$20,$15,24		# byte swap(7)
418	srl	$21,$15,8
419	andi	$22,$15,0xFF00
420	sll	$15,$15,24
421	andi	$21,0xFF00
422	sll	$22,$22,8
423	or	$15,$20
424	or	$21,$22
425	or	$15,$21
426	addu	$19,$15,$1			# 7
427	srl	$1,$25,6
428	xor	$22,$30,$31
429	sll	$21,$25,7
430	and	$22,$25
431	srl	$20,$25,11
432	xor	$1,$21
433	sll	$21,$25,21
434	xor	$1,$20
435	srl	$20,$25,25
436	xor	$1,$21
437	sll	$21,$25,26
438	xor	$1,$20
439	xor	$22,$31			# Ch(e,f,g)
440	xor	$20,$21,$1			# Sigma1(e)
441
442	srl	$1,$2,2
443	addu	$19,$22
444	lw	$22,28($6)		# K[7]
445	sll	$21,$2,10
446	addu	$19,$20
447	srl	$20,$2,13
448	xor	$1,$21
449	sll	$21,$2,19
450	xor	$1,$20
451	srl	$20,$2,22
452	xor	$1,$21
453	sll	$21,$2,30
454	xor	$1,$20
455	sw	$15,28($29)	# offload to ring buffer
456	xor	$1,$21			# Sigma0(a)
457
458	or	$20,$2,$3
459	and	$21,$2,$3
460	and	$20,$7
461	or	$21,$20			# Maj(a,b,c)
462	addu	$19,$22			# +=K[7]
463	addu	$1,$21
464
465	addu	$24,$19
466	addu	$1,$19
467	lwl	$17,39($5)
468	lwr	$17,36($5)
469	srl	$21,$16,24		# byte swap(8)
470	srl	$22,$16,8
471	andi	$23,$16,0xFF00
472	sll	$16,$16,24
473	andi	$22,0xFF00
474	sll	$23,$23,8
475	or	$16,$21
476	or	$22,$23
477	or	$16,$22
478	addu	$20,$16,$31			# 8
479	srl	$31,$24,6
480	xor	$23,$25,$30
481	sll	$22,$24,7
482	and	$23,$24
483	srl	$21,$24,11
484	xor	$31,$22
485	sll	$22,$24,21
486	xor	$31,$21
487	srl	$21,$24,25
488	xor	$31,$22
489	sll	$22,$24,26
490	xor	$31,$21
491	xor	$23,$30			# Ch(e,f,g)
492	xor	$21,$22,$31			# Sigma1(e)
493
494	srl	$31,$1,2
495	addu	$20,$23
496	lw	$23,32($6)		# K[8]
497	sll	$22,$1,10
498	addu	$20,$21
499	srl	$21,$1,13
500	xor	$31,$22
501	sll	$22,$1,19
502	xor	$31,$21
503	srl	$21,$1,22
504	xor	$31,$22
505	sll	$22,$1,30
506	xor	$31,$21
507	sw	$16,32($29)	# offload to ring buffer
508	xor	$31,$22			# Sigma0(a)
509
510	or	$21,$1,$2
511	and	$22,$1,$2
512	and	$21,$3
513	or	$22,$21			# Maj(a,b,c)
514	addu	$20,$23			# +=K[8]
515	addu	$31,$22
516
517	addu	$7,$20
518	addu	$31,$20
519	lwl	$18,43($5)
520	lwr	$18,40($5)
521	srl	$22,$17,24		# byte swap(9)
522	srl	$23,$17,8
523	andi	$8,$17,0xFF00
524	sll	$17,$17,24
525	andi	$23,0xFF00
526	sll	$8,$8,8
527	or	$17,$22
528	or	$23,$8
529	or	$17,$23
530	addu	$21,$17,$30			# 9
531	srl	$30,$7,6
532	xor	$8,$24,$25
533	sll	$23,$7,7
534	and	$8,$7
535	srl	$22,$7,11
536	xor	$30,$23
537	sll	$23,$7,21
538	xor	$30,$22
539	srl	$22,$7,25
540	xor	$30,$23
541	sll	$23,$7,26
542	xor	$30,$22
543	xor	$8,$25			# Ch(e,f,g)
544	xor	$22,$23,$30			# Sigma1(e)
545
546	srl	$30,$31,2
547	addu	$21,$8
548	lw	$8,36($6)		# K[9]
549	sll	$23,$31,10
550	addu	$21,$22
551	srl	$22,$31,13
552	xor	$30,$23
553	sll	$23,$31,19
554	xor	$30,$22
555	srl	$22,$31,22
556	xor	$30,$23
557	sll	$23,$31,30
558	xor	$30,$22
559	sw	$17,36($29)	# offload to ring buffer
560	xor	$30,$23			# Sigma0(a)
561
562	or	$22,$31,$1
563	and	$23,$31,$1
564	and	$22,$2
565	or	$23,$22			# Maj(a,b,c)
566	addu	$21,$8			# +=K[9]
567	addu	$30,$23
568
569	addu	$3,$21
570	addu	$30,$21
571	lwl	$19,47($5)
572	lwr	$19,44($5)
573	srl	$23,$18,24		# byte swap(10)
574	srl	$8,$18,8
575	andi	$9,$18,0xFF00
576	sll	$18,$18,24
577	andi	$8,0xFF00
578	sll	$9,$9,8
579	or	$18,$23
580	or	$8,$9
581	or	$18,$8
582	addu	$22,$18,$25			# 10
583	srl	$25,$3,6
584	xor	$9,$7,$24
585	sll	$8,$3,7
586	and	$9,$3
587	srl	$23,$3,11
588	xor	$25,$8
589	sll	$8,$3,21
590	xor	$25,$23
591	srl	$23,$3,25
592	xor	$25,$8
593	sll	$8,$3,26
594	xor	$25,$23
595	xor	$9,$24			# Ch(e,f,g)
596	xor	$23,$8,$25			# Sigma1(e)
597
598	srl	$25,$30,2
599	addu	$22,$9
600	lw	$9,40($6)		# K[10]
601	sll	$8,$30,10
602	addu	$22,$23
603	srl	$23,$30,13
604	xor	$25,$8
605	sll	$8,$30,19
606	xor	$25,$23
607	srl	$23,$30,22
608	xor	$25,$8
609	sll	$8,$30,30
610	xor	$25,$23
611	sw	$18,40($29)	# offload to ring buffer
612	xor	$25,$8			# Sigma0(a)
613
614	or	$23,$30,$31
615	and	$8,$30,$31
616	and	$23,$1
617	or	$8,$23			# Maj(a,b,c)
618	addu	$22,$9			# +=K[10]
619	addu	$25,$8
620
621	addu	$2,$22
622	addu	$25,$22
623	lwl	$20,51($5)
624	lwr	$20,48($5)
625	srl	$8,$19,24		# byte swap(11)
626	srl	$9,$19,8
627	andi	$10,$19,0xFF00
628	sll	$19,$19,24
629	andi	$9,0xFF00
630	sll	$10,$10,8
631	or	$19,$8
632	or	$9,$10
633	or	$19,$9
634	addu	$23,$19,$24			# 11
635	srl	$24,$2,6
636	xor	$10,$3,$7
637	sll	$9,$2,7
638	and	$10,$2
639	srl	$8,$2,11
640	xor	$24,$9
641	sll	$9,$2,21
642	xor	$24,$8
643	srl	$8,$2,25
644	xor	$24,$9
645	sll	$9,$2,26
646	xor	$24,$8
647	xor	$10,$7			# Ch(e,f,g)
648	xor	$8,$9,$24			# Sigma1(e)
649
650	srl	$24,$25,2
651	addu	$23,$10
652	lw	$10,44($6)		# K[11]
653	sll	$9,$25,10
654	addu	$23,$8
655	srl	$8,$25,13
656	xor	$24,$9
657	sll	$9,$25,19
658	xor	$24,$8
659	srl	$8,$25,22
660	xor	$24,$9
661	sll	$9,$25,30
662	xor	$24,$8
663	sw	$19,44($29)	# offload to ring buffer
664	xor	$24,$9			# Sigma0(a)
665
666	or	$8,$25,$30
667	and	$9,$25,$30
668	and	$8,$31
669	or	$9,$8			# Maj(a,b,c)
670	addu	$23,$10			# +=K[11]
671	addu	$24,$9
672
673	addu	$1,$23
674	addu	$24,$23
675	lwl	$21,55($5)
676	lwr	$21,52($5)
677	srl	$9,$20,24		# byte swap(12)
678	srl	$10,$20,8
679	andi	$11,$20,0xFF00
680	sll	$20,$20,24
681	andi	$10,0xFF00
682	sll	$11,$11,8
683	or	$20,$9
684	or	$10,$11
685	or	$20,$10
686	addu	$8,$20,$7			# 12
687	srl	$7,$1,6
688	xor	$11,$2,$3
689	sll	$10,$1,7
690	and	$11,$1
691	srl	$9,$1,11
692	xor	$7,$10
693	sll	$10,$1,21
694	xor	$7,$9
695	srl	$9,$1,25
696	xor	$7,$10
697	sll	$10,$1,26
698	xor	$7,$9
699	xor	$11,$3			# Ch(e,f,g)
700	xor	$9,$10,$7			# Sigma1(e)
701
702	srl	$7,$24,2
703	addu	$8,$11
704	lw	$11,48($6)		# K[12]
705	sll	$10,$24,10
706	addu	$8,$9
707	srl	$9,$24,13
708	xor	$7,$10
709	sll	$10,$24,19
710	xor	$7,$9
711	srl	$9,$24,22
712	xor	$7,$10
713	sll	$10,$24,30
714	xor	$7,$9
715	sw	$20,48($29)	# offload to ring buffer
716	xor	$7,$10			# Sigma0(a)
717
718	or	$9,$24,$25
719	and	$10,$24,$25
720	and	$9,$30
721	or	$10,$9			# Maj(a,b,c)
722	addu	$8,$11			# +=K[12]
723	addu	$7,$10
724
725	addu	$31,$8
726	addu	$7,$8
727	lwl	$22,59($5)
728	lwr	$22,56($5)
729	srl	$10,$21,24		# byte swap(13)
730	srl	$11,$21,8
731	andi	$12,$21,0xFF00
732	sll	$21,$21,24
733	andi	$11,0xFF00
734	sll	$12,$12,8
735	or	$21,$10
736	or	$11,$12
737	or	$21,$11
738	addu	$9,$21,$3			# 13
739	srl	$3,$31,6
740	xor	$12,$1,$2
741	sll	$11,$31,7
742	and	$12,$31
743	srl	$10,$31,11
744	xor	$3,$11
745	sll	$11,$31,21
746	xor	$3,$10
747	srl	$10,$31,25
748	xor	$3,$11
749	sll	$11,$31,26
750	xor	$3,$10
751	xor	$12,$2			# Ch(e,f,g)
752	xor	$10,$11,$3			# Sigma1(e)
753
754	srl	$3,$7,2
755	addu	$9,$12
756	lw	$12,52($6)		# K[13]
757	sll	$11,$7,10
758	addu	$9,$10
759	srl	$10,$7,13
760	xor	$3,$11
761	sll	$11,$7,19
762	xor	$3,$10
763	srl	$10,$7,22
764	xor	$3,$11
765	sll	$11,$7,30
766	xor	$3,$10
767	sw	$21,52($29)	# offload to ring buffer
768	xor	$3,$11			# Sigma0(a)
769
770	or	$10,$7,$24
771	and	$11,$7,$24
772	and	$10,$25
773	or	$11,$10			# Maj(a,b,c)
774	addu	$9,$12			# +=K[13]
775	addu	$3,$11
776
777	addu	$30,$9
778	addu	$3,$9
779	lw	$8,0($29)	# prefetch from ring buffer
780	lwl	$23,63($5)
781	lwr	$23,60($5)
782	srl	$11,$22,24		# byte swap(14)
783	srl	$12,$22,8
784	andi	$13,$22,0xFF00
785	sll	$22,$22,24
786	andi	$12,0xFF00
787	sll	$13,$13,8
788	or	$22,$11
789	or	$12,$13
790	or	$22,$12
791	addu	$10,$22,$2			# 14
792	srl	$2,$30,6
793	xor	$13,$31,$1
794	sll	$12,$30,7
795	and	$13,$30
796	srl	$11,$30,11
797	xor	$2,$12
798	sll	$12,$30,21
799	xor	$2,$11
800	srl	$11,$30,25
801	xor	$2,$12
802	sll	$12,$30,26
803	xor	$2,$11
804	xor	$13,$1			# Ch(e,f,g)
805	xor	$11,$12,$2			# Sigma1(e)
806
807	srl	$2,$3,2
808	addu	$10,$13
809	lw	$13,56($6)		# K[14]
810	sll	$12,$3,10
811	addu	$10,$11
812	srl	$11,$3,13
813	xor	$2,$12
814	sll	$12,$3,19
815	xor	$2,$11
816	srl	$11,$3,22
817	xor	$2,$12
818	sll	$12,$3,30
819	xor	$2,$11
820	sw	$22,56($29)	# offload to ring buffer
821	xor	$2,$12			# Sigma0(a)
822
823	or	$11,$3,$7
824	and	$12,$3,$7
825	and	$11,$24
826	or	$12,$11			# Maj(a,b,c)
827	addu	$10,$13			# +=K[14]
828	addu	$2,$12
829
830	addu	$25,$10
831	addu	$2,$10
832	lw	$9,4($29)	# prefetch from ring buffer
833	srl	$12,$23,24		# byte swap(15)
834	srl	$13,$23,8
835	andi	$14,$23,0xFF00
836	sll	$23,$23,24
837	andi	$13,0xFF00
838	sll	$14,$14,8
839	or	$23,$12
840	or	$13,$14
841	or	$23,$13
842	addu	$11,$23,$1			# 15
843	srl	$1,$25,6
844	xor	$14,$30,$31
845	sll	$13,$25,7
846	and	$14,$25
847	srl	$12,$25,11
848	xor	$1,$13
849	sll	$13,$25,21
850	xor	$1,$12
851	srl	$12,$25,25
852	xor	$1,$13
853	sll	$13,$25,26
854	xor	$1,$12
855	xor	$14,$31			# Ch(e,f,g)
856	xor	$12,$13,$1			# Sigma1(e)
857
858	srl	$1,$2,2
859	addu	$11,$14
860	lw	$14,60($6)		# K[15]
861	sll	$13,$2,10
862	addu	$11,$12
863	srl	$12,$2,13
864	xor	$1,$13
865	sll	$13,$2,19
866	xor	$1,$12
867	srl	$12,$2,22
868	xor	$1,$13
869	sll	$13,$2,30
870	xor	$1,$12
871	sw	$23,60($29)	# offload to ring buffer
872	xor	$1,$13			# Sigma0(a)
873
874	or	$12,$2,$3
875	and	$13,$2,$3
876	and	$12,$7
877	or	$13,$12			# Maj(a,b,c)
878	addu	$11,$14			# +=K[15]
879	addu	$1,$13
880
881	addu	$24,$11
882	addu	$1,$11
883	lw	$10,8($29)	# prefetch from ring buffer
884	b	.L16_xx
885.align	4
886.L16_xx:
887	srl	$14,$9,3		# Xupdate(16)
888	addu	$8,$17			# +=X[i+9]
889	sll	$13,$9,14
890	srl	$12,$9,7
891	xor	$14,$13
892	sll	$13,11
893	xor	$14,$12
894	srl	$12,$9,18
895	xor	$14,$13
896
897	srl	$15,$22,10
898	xor	$14,$12			# sigma0(X[i+1])
899	sll	$13,$22,13
900	addu	$8,$14
901	srl	$12,$22,17
902	xor	$15,$13
903	sll	$13,2
904	xor	$15,$12
905	srl	$12,$22,19
906	xor	$15,$13
907
908	xor	$15,$12			# sigma1(X[i+14])
909	addu	$8,$15
910	addu	$12,$8,$31			# 16
911	srl	$31,$24,6
912	xor	$15,$25,$30
913	sll	$14,$24,7
914	and	$15,$24
915	srl	$13,$24,11
916	xor	$31,$14
917	sll	$14,$24,21
918	xor	$31,$13
919	srl	$13,$24,25
920	xor	$31,$14
921	sll	$14,$24,26
922	xor	$31,$13
923	xor	$15,$30			# Ch(e,f,g)
924	xor	$13,$14,$31			# Sigma1(e)
925
926	srl	$31,$1,2
927	addu	$12,$15
928	lw	$15,64($6)		# K[16]
929	sll	$14,$1,10
930	addu	$12,$13
931	srl	$13,$1,13
932	xor	$31,$14
933	sll	$14,$1,19
934	xor	$31,$13
935	srl	$13,$1,22
936	xor	$31,$14
937	sll	$14,$1,30
938	xor	$31,$13
939	sw	$8,0($29)	# offload to ring buffer
940	xor	$31,$14			# Sigma0(a)
941
942	or	$13,$1,$2
943	and	$14,$1,$2
944	and	$13,$3
945	or	$14,$13			# Maj(a,b,c)
946	addu	$12,$15			# +=K[16]
947	addu	$31,$14
948
949	addu	$7,$12
950	addu	$31,$12
951	lw	$11,12($29)	# prefetch from ring buffer
952	srl	$15,$10,3		# Xupdate(17)
953	addu	$9,$18			# +=X[i+9]
954	sll	$14,$10,14
955	srl	$13,$10,7
956	xor	$15,$14
957	sll	$14,11
958	xor	$15,$13
959	srl	$13,$10,18
960	xor	$15,$14
961
962	srl	$16,$23,10
963	xor	$15,$13			# sigma0(X[i+1])
964	sll	$14,$23,13
965	addu	$9,$15
966	srl	$13,$23,17
967	xor	$16,$14
968	sll	$14,2
969	xor	$16,$13
970	srl	$13,$23,19
971	xor	$16,$14
972
973	xor	$16,$13			# sigma1(X[i+14])
974	addu	$9,$16
975	addu	$13,$9,$30			# 17
976	srl	$30,$7,6
977	xor	$16,$24,$25
978	sll	$15,$7,7
979	and	$16,$7
980	srl	$14,$7,11
981	xor	$30,$15
982	sll	$15,$7,21
983	xor	$30,$14
984	srl	$14,$7,25
985	xor	$30,$15
986	sll	$15,$7,26
987	xor	$30,$14
988	xor	$16,$25			# Ch(e,f,g)
989	xor	$14,$15,$30			# Sigma1(e)
990
991	srl	$30,$31,2
992	addu	$13,$16
993	lw	$16,68($6)		# K[17]
994	sll	$15,$31,10
995	addu	$13,$14
996	srl	$14,$31,13
997	xor	$30,$15
998	sll	$15,$31,19
999	xor	$30,$14
1000	srl	$14,$31,22
1001	xor	$30,$15
1002	sll	$15,$31,30
1003	xor	$30,$14
1004	sw	$9,4($29)	# offload to ring buffer
1005	xor	$30,$15			# Sigma0(a)
1006
1007	or	$14,$31,$1
1008	and	$15,$31,$1
1009	and	$14,$2
1010	or	$15,$14			# Maj(a,b,c)
1011	addu	$13,$16			# +=K[17]
1012	addu	$30,$15
1013
1014	addu	$3,$13
1015	addu	$30,$13
1016	lw	$12,16($29)	# prefetch from ring buffer
1017	srl	$16,$11,3		# Xupdate(18)
1018	addu	$10,$19			# +=X[i+9]
1019	sll	$15,$11,14
1020	srl	$14,$11,7
1021	xor	$16,$15
1022	sll	$15,11
1023	xor	$16,$14
1024	srl	$14,$11,18
1025	xor	$16,$15
1026
1027	srl	$17,$8,10
1028	xor	$16,$14			# sigma0(X[i+1])
1029	sll	$15,$8,13
1030	addu	$10,$16
1031	srl	$14,$8,17
1032	xor	$17,$15
1033	sll	$15,2
1034	xor	$17,$14
1035	srl	$14,$8,19
1036	xor	$17,$15
1037
1038	xor	$17,$14			# sigma1(X[i+14])
1039	addu	$10,$17
1040	addu	$14,$10,$25			# 18
1041	srl	$25,$3,6
1042	xor	$17,$7,$24
1043	sll	$16,$3,7
1044	and	$17,$3
1045	srl	$15,$3,11
1046	xor	$25,$16
1047	sll	$16,$3,21
1048	xor	$25,$15
1049	srl	$15,$3,25
1050	xor	$25,$16
1051	sll	$16,$3,26
1052	xor	$25,$15
1053	xor	$17,$24			# Ch(e,f,g)
1054	xor	$15,$16,$25			# Sigma1(e)
1055
1056	srl	$25,$30,2
1057	addu	$14,$17
1058	lw	$17,72($6)		# K[18]
1059	sll	$16,$30,10
1060	addu	$14,$15
1061	srl	$15,$30,13
1062	xor	$25,$16
1063	sll	$16,$30,19
1064	xor	$25,$15
1065	srl	$15,$30,22
1066	xor	$25,$16
1067	sll	$16,$30,30
1068	xor	$25,$15
1069	sw	$10,8($29)	# offload to ring buffer
1070	xor	$25,$16			# Sigma0(a)
1071
1072	or	$15,$30,$31
1073	and	$16,$30,$31
1074	and	$15,$1
1075	or	$16,$15			# Maj(a,b,c)
1076	addu	$14,$17			# +=K[18]
1077	addu	$25,$16
1078
1079	addu	$2,$14
1080	addu	$25,$14
1081	lw	$13,20($29)	# prefetch from ring buffer
1082	srl	$17,$12,3		# Xupdate(19)
1083	addu	$11,$20			# +=X[i+9]
1084	sll	$16,$12,14
1085	srl	$15,$12,7
1086	xor	$17,$16
1087	sll	$16,11
1088	xor	$17,$15
1089	srl	$15,$12,18
1090	xor	$17,$16
1091
1092	srl	$18,$9,10
1093	xor	$17,$15			# sigma0(X[i+1])
1094	sll	$16,$9,13
1095	addu	$11,$17
1096	srl	$15,$9,17
1097	xor	$18,$16
1098	sll	$16,2
1099	xor	$18,$15
1100	srl	$15,$9,19
1101	xor	$18,$16
1102
1103	xor	$18,$15			# sigma1(X[i+14])
1104	addu	$11,$18
1105	addu	$15,$11,$24			# 19
1106	srl	$24,$2,6
1107	xor	$18,$3,$7
1108	sll	$17,$2,7
1109	and	$18,$2
1110	srl	$16,$2,11
1111	xor	$24,$17
1112	sll	$17,$2,21
1113	xor	$24,$16
1114	srl	$16,$2,25
1115	xor	$24,$17
1116	sll	$17,$2,26
1117	xor	$24,$16
1118	xor	$18,$7			# Ch(e,f,g)
1119	xor	$16,$17,$24			# Sigma1(e)
1120
1121	srl	$24,$25,2
1122	addu	$15,$18
1123	lw	$18,76($6)		# K[19]
1124	sll	$17,$25,10
1125	addu	$15,$16
1126	srl	$16,$25,13
1127	xor	$24,$17
1128	sll	$17,$25,19
1129	xor	$24,$16
1130	srl	$16,$25,22
1131	xor	$24,$17
1132	sll	$17,$25,30
1133	xor	$24,$16
1134	sw	$11,12($29)	# offload to ring buffer
1135	xor	$24,$17			# Sigma0(a)
1136
1137	or	$16,$25,$30
1138	and	$17,$25,$30
1139	and	$16,$31
1140	or	$17,$16			# Maj(a,b,c)
1141	addu	$15,$18			# +=K[19]
1142	addu	$24,$17
1143
1144	addu	$1,$15
1145	addu	$24,$15
1146	lw	$14,24($29)	# prefetch from ring buffer
1147	srl	$18,$13,3		# Xupdate(20)
1148	addu	$12,$21			# +=X[i+9]
1149	sll	$17,$13,14
1150	srl	$16,$13,7
1151	xor	$18,$17
1152	sll	$17,11
1153	xor	$18,$16
1154	srl	$16,$13,18
1155	xor	$18,$17
1156
1157	srl	$19,$10,10
1158	xor	$18,$16			# sigma0(X[i+1])
1159	sll	$17,$10,13
1160	addu	$12,$18
1161	srl	$16,$10,17
1162	xor	$19,$17
1163	sll	$17,2
1164	xor	$19,$16
1165	srl	$16,$10,19
1166	xor	$19,$17
1167
1168	xor	$19,$16			# sigma1(X[i+14])
1169	addu	$12,$19
1170	addu	$16,$12,$7			# 20
1171	srl	$7,$1,6
1172	xor	$19,$2,$3
1173	sll	$18,$1,7
1174	and	$19,$1
1175	srl	$17,$1,11
1176	xor	$7,$18
1177	sll	$18,$1,21
1178	xor	$7,$17
1179	srl	$17,$1,25
1180	xor	$7,$18
1181	sll	$18,$1,26
1182	xor	$7,$17
1183	xor	$19,$3			# Ch(e,f,g)
1184	xor	$17,$18,$7			# Sigma1(e)
1185
1186	srl	$7,$24,2
1187	addu	$16,$19
1188	lw	$19,80($6)		# K[20]
1189	sll	$18,$24,10
1190	addu	$16,$17
1191	srl	$17,$24,13
1192	xor	$7,$18
1193	sll	$18,$24,19
1194	xor	$7,$17
1195	srl	$17,$24,22
1196	xor	$7,$18
1197	sll	$18,$24,30
1198	xor	$7,$17
1199	sw	$12,16($29)	# offload to ring buffer
1200	xor	$7,$18			# Sigma0(a)
1201
1202	or	$17,$24,$25
1203	and	$18,$24,$25
1204	and	$17,$30
1205	or	$18,$17			# Maj(a,b,c)
1206	addu	$16,$19			# +=K[20]
1207	addu	$7,$18
1208
1209	addu	$31,$16
1210	addu	$7,$16
1211	lw	$15,28($29)	# prefetch from ring buffer
1212	srl	$19,$14,3		# Xupdate(21)
1213	addu	$13,$22			# +=X[i+9]
1214	sll	$18,$14,14
1215	srl	$17,$14,7
1216	xor	$19,$18
1217	sll	$18,11
1218	xor	$19,$17
1219	srl	$17,$14,18
1220	xor	$19,$18
1221
1222	srl	$20,$11,10
1223	xor	$19,$17			# sigma0(X[i+1])
1224	sll	$18,$11,13
1225	addu	$13,$19
1226	srl	$17,$11,17
1227	xor	$20,$18
1228	sll	$18,2
1229	xor	$20,$17
1230	srl	$17,$11,19
1231	xor	$20,$18
1232
1233	xor	$20,$17			# sigma1(X[i+14])
1234	addu	$13,$20
1235	addu	$17,$13,$3			# 21
1236	srl	$3,$31,6
1237	xor	$20,$1,$2
1238	sll	$19,$31,7
1239	and	$20,$31
1240	srl	$18,$31,11
1241	xor	$3,$19
1242	sll	$19,$31,21
1243	xor	$3,$18
1244	srl	$18,$31,25
1245	xor	$3,$19
1246	sll	$19,$31,26
1247	xor	$3,$18
1248	xor	$20,$2			# Ch(e,f,g)
1249	xor	$18,$19,$3			# Sigma1(e)
1250
1251	srl	$3,$7,2
1252	addu	$17,$20
1253	lw	$20,84($6)		# K[21]
1254	sll	$19,$7,10
1255	addu	$17,$18
1256	srl	$18,$7,13
1257	xor	$3,$19
1258	sll	$19,$7,19
1259	xor	$3,$18
1260	srl	$18,$7,22
1261	xor	$3,$19
1262	sll	$19,$7,30
1263	xor	$3,$18
1264	sw	$13,20($29)	# offload to ring buffer
1265	xor	$3,$19			# Sigma0(a)
1266
1267	or	$18,$7,$24
1268	and	$19,$7,$24
1269	and	$18,$25
1270	or	$19,$18			# Maj(a,b,c)
1271	addu	$17,$20			# +=K[21]
1272	addu	$3,$19
1273
1274	addu	$30,$17
1275	addu	$3,$17
1276	lw	$16,32($29)	# prefetch from ring buffer
1277	srl	$20,$15,3		# Xupdate(22)
1278	addu	$14,$23			# +=X[i+9]
1279	sll	$19,$15,14
1280	srl	$18,$15,7
1281	xor	$20,$19
1282	sll	$19,11
1283	xor	$20,$18
1284	srl	$18,$15,18
1285	xor	$20,$19
1286
1287	srl	$21,$12,10
1288	xor	$20,$18			# sigma0(X[i+1])
1289	sll	$19,$12,13
1290	addu	$14,$20
1291	srl	$18,$12,17
1292	xor	$21,$19
1293	sll	$19,2
1294	xor	$21,$18
1295	srl	$18,$12,19
1296	xor	$21,$19
1297
1298	xor	$21,$18			# sigma1(X[i+14])
1299	addu	$14,$21
1300	addu	$18,$14,$2			# 22
1301	srl	$2,$30,6
1302	xor	$21,$31,$1
1303	sll	$20,$30,7
1304	and	$21,$30
1305	srl	$19,$30,11
1306	xor	$2,$20
1307	sll	$20,$30,21
1308	xor	$2,$19
1309	srl	$19,$30,25
1310	xor	$2,$20
1311	sll	$20,$30,26
1312	xor	$2,$19
1313	xor	$21,$1			# Ch(e,f,g)
1314	xor	$19,$20,$2			# Sigma1(e)
1315
1316	srl	$2,$3,2
1317	addu	$18,$21
1318	lw	$21,88($6)		# K[22]
1319	sll	$20,$3,10
1320	addu	$18,$19
1321	srl	$19,$3,13
1322	xor	$2,$20
1323	sll	$20,$3,19
1324	xor	$2,$19
1325	srl	$19,$3,22
1326	xor	$2,$20
1327	sll	$20,$3,30
1328	xor	$2,$19
1329	sw	$14,24($29)	# offload to ring buffer
1330	xor	$2,$20			# Sigma0(a)
1331
1332	or	$19,$3,$7
1333	and	$20,$3,$7
1334	and	$19,$24
1335	or	$20,$19			# Maj(a,b,c)
1336	addu	$18,$21			# +=K[22]
1337	addu	$2,$20
1338
1339	addu	$25,$18
1340	addu	$2,$18
1341	lw	$17,36($29)	# prefetch from ring buffer
1342	srl	$21,$16,3		# Xupdate(23)
1343	addu	$15,$8			# +=X[i+9]
1344	sll	$20,$16,14
1345	srl	$19,$16,7
1346	xor	$21,$20
1347	sll	$20,11
1348	xor	$21,$19
1349	srl	$19,$16,18
1350	xor	$21,$20
1351
1352	srl	$22,$13,10
1353	xor	$21,$19			# sigma0(X[i+1])
1354	sll	$20,$13,13
1355	addu	$15,$21
1356	srl	$19,$13,17
1357	xor	$22,$20
1358	sll	$20,2
1359	xor	$22,$19
1360	srl	$19,$13,19
1361	xor	$22,$20
1362
1363	xor	$22,$19			# sigma1(X[i+14])
1364	addu	$15,$22
1365	addu	$19,$15,$1			# 23
1366	srl	$1,$25,6
1367	xor	$22,$30,$31
1368	sll	$21,$25,7
1369	and	$22,$25
1370	srl	$20,$25,11
1371	xor	$1,$21
1372	sll	$21,$25,21
1373	xor	$1,$20
1374	srl	$20,$25,25
1375	xor	$1,$21
1376	sll	$21,$25,26
1377	xor	$1,$20
1378	xor	$22,$31			# Ch(e,f,g)
1379	xor	$20,$21,$1			# Sigma1(e)
1380
1381	srl	$1,$2,2
1382	addu	$19,$22
1383	lw	$22,92($6)		# K[23]
1384	sll	$21,$2,10
1385	addu	$19,$20
1386	srl	$20,$2,13
1387	xor	$1,$21
1388	sll	$21,$2,19
1389	xor	$1,$20
1390	srl	$20,$2,22
1391	xor	$1,$21
1392	sll	$21,$2,30
1393	xor	$1,$20
1394	sw	$15,28($29)	# offload to ring buffer
1395	xor	$1,$21			# Sigma0(a)
1396
1397	or	$20,$2,$3
1398	and	$21,$2,$3
1399	and	$20,$7
1400	or	$21,$20			# Maj(a,b,c)
1401	addu	$19,$22			# +=K[23]
1402	addu	$1,$21
1403
1404	addu	$24,$19
1405	addu	$1,$19
1406	lw	$18,40($29)	# prefetch from ring buffer
1407	srl	$22,$17,3		# Xupdate(24)
1408	addu	$16,$9			# +=X[i+9]
1409	sll	$21,$17,14
1410	srl	$20,$17,7
1411	xor	$22,$21
1412	sll	$21,11
1413	xor	$22,$20
1414	srl	$20,$17,18
1415	xor	$22,$21
1416
1417	srl	$23,$14,10
1418	xor	$22,$20			# sigma0(X[i+1])
1419	sll	$21,$14,13
1420	addu	$16,$22
1421	srl	$20,$14,17
1422	xor	$23,$21
1423	sll	$21,2
1424	xor	$23,$20
1425	srl	$20,$14,19
1426	xor	$23,$21
1427
1428	xor	$23,$20			# sigma1(X[i+14])
1429	addu	$16,$23
1430	addu	$20,$16,$31			# 24
1431	srl	$31,$24,6
1432	xor	$23,$25,$30
1433	sll	$22,$24,7
1434	and	$23,$24
1435	srl	$21,$24,11
1436	xor	$31,$22
1437	sll	$22,$24,21
1438	xor	$31,$21
1439	srl	$21,$24,25
1440	xor	$31,$22
1441	sll	$22,$24,26
1442	xor	$31,$21
1443	xor	$23,$30			# Ch(e,f,g)
1444	xor	$21,$22,$31			# Sigma1(e)
1445
1446	srl	$31,$1,2
1447	addu	$20,$23
1448	lw	$23,96($6)		# K[24]
1449	sll	$22,$1,10
1450	addu	$20,$21
1451	srl	$21,$1,13
1452	xor	$31,$22
1453	sll	$22,$1,19
1454	xor	$31,$21
1455	srl	$21,$1,22
1456	xor	$31,$22
1457	sll	$22,$1,30
1458	xor	$31,$21
1459	sw	$16,32($29)	# offload to ring buffer
1460	xor	$31,$22			# Sigma0(a)
1461
1462	or	$21,$1,$2
1463	and	$22,$1,$2
1464	and	$21,$3
1465	or	$22,$21			# Maj(a,b,c)
1466	addu	$20,$23			# +=K[24]
1467	addu	$31,$22
1468
1469	addu	$7,$20
1470	addu	$31,$20
1471	lw	$19,44($29)	# prefetch from ring buffer
1472	srl	$23,$18,3		# Xupdate(25)
1473	addu	$17,$10			# +=X[i+9]
1474	sll	$22,$18,14
1475	srl	$21,$18,7
1476	xor	$23,$22
1477	sll	$22,11
1478	xor	$23,$21
1479	srl	$21,$18,18
1480	xor	$23,$22
1481
1482	srl	$8,$15,10
1483	xor	$23,$21			# sigma0(X[i+1])
1484	sll	$22,$15,13
1485	addu	$17,$23
1486	srl	$21,$15,17
1487	xor	$8,$22
1488	sll	$22,2
1489	xor	$8,$21
1490	srl	$21,$15,19
1491	xor	$8,$22
1492
1493	xor	$8,$21			# sigma1(X[i+14])
1494	addu	$17,$8
1495	addu	$21,$17,$30			# 25
1496	srl	$30,$7,6
1497	xor	$8,$24,$25
1498	sll	$23,$7,7
1499	and	$8,$7
1500	srl	$22,$7,11
1501	xor	$30,$23
1502	sll	$23,$7,21
1503	xor	$30,$22
1504	srl	$22,$7,25
1505	xor	$30,$23
1506	sll	$23,$7,26
1507	xor	$30,$22
1508	xor	$8,$25			# Ch(e,f,g)
1509	xor	$22,$23,$30			# Sigma1(e)
1510
1511	srl	$30,$31,2
1512	addu	$21,$8
1513	lw	$8,100($6)		# K[25]
1514	sll	$23,$31,10
1515	addu	$21,$22
1516	srl	$22,$31,13
1517	xor	$30,$23
1518	sll	$23,$31,19
1519	xor	$30,$22
1520	srl	$22,$31,22
1521	xor	$30,$23
1522	sll	$23,$31,30
1523	xor	$30,$22
1524	sw	$17,36($29)	# offload to ring buffer
1525	xor	$30,$23			# Sigma0(a)
1526
1527	or	$22,$31,$1
1528	and	$23,$31,$1
1529	and	$22,$2
1530	or	$23,$22			# Maj(a,b,c)
1531	addu	$21,$8			# +=K[25]
1532	addu	$30,$23
1533
1534	addu	$3,$21
1535	addu	$30,$21
1536	lw	$20,48($29)	# prefetch from ring buffer
1537	srl	$8,$19,3		# Xupdate(26)
1538	addu	$18,$11			# +=X[i+9]
1539	sll	$23,$19,14
1540	srl	$22,$19,7
1541	xor	$8,$23
1542	sll	$23,11
1543	xor	$8,$22
1544	srl	$22,$19,18
1545	xor	$8,$23
1546
1547	srl	$9,$16,10
1548	xor	$8,$22			# sigma0(X[i+1])
1549	sll	$23,$16,13
1550	addu	$18,$8
1551	srl	$22,$16,17
1552	xor	$9,$23
1553	sll	$23,2
1554	xor	$9,$22
1555	srl	$22,$16,19
1556	xor	$9,$23
1557
1558	xor	$9,$22			# sigma1(X[i+14])
1559	addu	$18,$9
1560	addu	$22,$18,$25			# 26
1561	srl	$25,$3,6
1562	xor	$9,$7,$24
1563	sll	$8,$3,7
1564	and	$9,$3
1565	srl	$23,$3,11
1566	xor	$25,$8
1567	sll	$8,$3,21
1568	xor	$25,$23
1569	srl	$23,$3,25
1570	xor	$25,$8
1571	sll	$8,$3,26
1572	xor	$25,$23
1573	xor	$9,$24			# Ch(e,f,g)
1574	xor	$23,$8,$25			# Sigma1(e)
1575
1576	srl	$25,$30,2
1577	addu	$22,$9
1578	lw	$9,104($6)		# K[26]
1579	sll	$8,$30,10
1580	addu	$22,$23
1581	srl	$23,$30,13
1582	xor	$25,$8
1583	sll	$8,$30,19
1584	xor	$25,$23
1585	srl	$23,$30,22
1586	xor	$25,$8
1587	sll	$8,$30,30
1588	xor	$25,$23
1589	sw	$18,40($29)	# offload to ring buffer
1590	xor	$25,$8			# Sigma0(a)
1591
1592	or	$23,$30,$31
1593	and	$8,$30,$31
1594	and	$23,$1
1595	or	$8,$23			# Maj(a,b,c)
1596	addu	$22,$9			# +=K[26]
1597	addu	$25,$8
1598
1599	addu	$2,$22
1600	addu	$25,$22
1601	lw	$21,52($29)	# prefetch from ring buffer
1602	srl	$9,$20,3		# Xupdate(27)
1603	addu	$19,$12			# +=X[i+9]
1604	sll	$8,$20,14
1605	srl	$23,$20,7
1606	xor	$9,$8
1607	sll	$8,11
1608	xor	$9,$23
1609	srl	$23,$20,18
1610	xor	$9,$8
1611
1612	srl	$10,$17,10
1613	xor	$9,$23			# sigma0(X[i+1])
1614	sll	$8,$17,13
1615	addu	$19,$9
1616	srl	$23,$17,17
1617	xor	$10,$8
1618	sll	$8,2
1619	xor	$10,$23
1620	srl	$23,$17,19
1621	xor	$10,$8
1622
1623	xor	$10,$23			# sigma1(X[i+14])
1624	addu	$19,$10
1625	addu	$23,$19,$24			# 27
1626	srl	$24,$2,6
1627	xor	$10,$3,$7
1628	sll	$9,$2,7
1629	and	$10,$2
1630	srl	$8,$2,11
1631	xor	$24,$9
1632	sll	$9,$2,21
1633	xor	$24,$8
1634	srl	$8,$2,25
1635	xor	$24,$9
1636	sll	$9,$2,26
1637	xor	$24,$8
1638	xor	$10,$7			# Ch(e,f,g)
1639	xor	$8,$9,$24			# Sigma1(e)
1640
1641	srl	$24,$25,2
1642	addu	$23,$10
1643	lw	$10,108($6)		# K[27]
1644	sll	$9,$25,10
1645	addu	$23,$8
1646	srl	$8,$25,13
1647	xor	$24,$9
1648	sll	$9,$25,19
1649	xor	$24,$8
1650	srl	$8,$25,22
1651	xor	$24,$9
1652	sll	$9,$25,30
1653	xor	$24,$8
1654	sw	$19,44($29)	# offload to ring buffer
1655	xor	$24,$9			# Sigma0(a)
1656
1657	or	$8,$25,$30
1658	and	$9,$25,$30
1659	and	$8,$31
1660	or	$9,$8			# Maj(a,b,c)
1661	addu	$23,$10			# +=K[27]
1662	addu	$24,$9
1663
1664	addu	$1,$23
1665	addu	$24,$23
1666	lw	$22,56($29)	# prefetch from ring buffer
1667	srl	$10,$21,3		# Xupdate(28)
1668	addu	$20,$13			# +=X[i+9]
1669	sll	$9,$21,14
1670	srl	$8,$21,7
1671	xor	$10,$9
1672	sll	$9,11
1673	xor	$10,$8
1674	srl	$8,$21,18
1675	xor	$10,$9
1676
1677	srl	$11,$18,10
1678	xor	$10,$8			# sigma0(X[i+1])
1679	sll	$9,$18,13
1680	addu	$20,$10
1681	srl	$8,$18,17
1682	xor	$11,$9
1683	sll	$9,2
1684	xor	$11,$8
1685	srl	$8,$18,19
1686	xor	$11,$9
1687
1688	xor	$11,$8			# sigma1(X[i+14])
1689	addu	$20,$11
1690	addu	$8,$20,$7			# 28
1691	srl	$7,$1,6
1692	xor	$11,$2,$3
1693	sll	$10,$1,7
1694	and	$11,$1
1695	srl	$9,$1,11
1696	xor	$7,$10
1697	sll	$10,$1,21
1698	xor	$7,$9
1699	srl	$9,$1,25
1700	xor	$7,$10
1701	sll	$10,$1,26
1702	xor	$7,$9
1703	xor	$11,$3			# Ch(e,f,g)
1704	xor	$9,$10,$7			# Sigma1(e)
1705
1706	srl	$7,$24,2
1707	addu	$8,$11
1708	lw	$11,112($6)		# K[28]
1709	sll	$10,$24,10
1710	addu	$8,$9
1711	srl	$9,$24,13
1712	xor	$7,$10
1713	sll	$10,$24,19
1714	xor	$7,$9
1715	srl	$9,$24,22
1716	xor	$7,$10
1717	sll	$10,$24,30
1718	xor	$7,$9
1719	sw	$20,48($29)	# offload to ring buffer
1720	xor	$7,$10			# Sigma0(a)
1721
1722	or	$9,$24,$25
1723	and	$10,$24,$25
1724	and	$9,$30
1725	or	$10,$9			# Maj(a,b,c)
1726	addu	$8,$11			# +=K[28]
1727	addu	$7,$10
1728
1729	addu	$31,$8
1730	addu	$7,$8
1731	lw	$23,60($29)	# prefetch from ring buffer
1732	srl	$11,$22,3		# Xupdate(29)
1733	addu	$21,$14			# +=X[i+9]
1734	sll	$10,$22,14
1735	srl	$9,$22,7
1736	xor	$11,$10
1737	sll	$10,11
1738	xor	$11,$9
1739	srl	$9,$22,18
1740	xor	$11,$10
1741
1742	srl	$12,$19,10
1743	xor	$11,$9			# sigma0(X[i+1])
1744	sll	$10,$19,13
1745	addu	$21,$11
1746	srl	$9,$19,17
1747	xor	$12,$10
1748	sll	$10,2
1749	xor	$12,$9
1750	srl	$9,$19,19
1751	xor	$12,$10
1752
1753	xor	$12,$9			# sigma1(X[i+14])
1754	addu	$21,$12
1755	addu	$9,$21,$3			# 29
1756	srl	$3,$31,6
1757	xor	$12,$1,$2
1758	sll	$11,$31,7
1759	and	$12,$31
1760	srl	$10,$31,11
1761	xor	$3,$11
1762	sll	$11,$31,21
1763	xor	$3,$10
1764	srl	$10,$31,25
1765	xor	$3,$11
1766	sll	$11,$31,26
1767	xor	$3,$10
1768	xor	$12,$2			# Ch(e,f,g)
1769	xor	$10,$11,$3			# Sigma1(e)
1770
1771	srl	$3,$7,2
1772	addu	$9,$12
1773	lw	$12,116($6)		# K[29]
1774	sll	$11,$7,10
1775	addu	$9,$10
1776	srl	$10,$7,13
1777	xor	$3,$11
1778	sll	$11,$7,19
1779	xor	$3,$10
1780	srl	$10,$7,22
1781	xor	$3,$11
1782	sll	$11,$7,30
1783	xor	$3,$10
1784	sw	$21,52($29)	# offload to ring buffer
1785	xor	$3,$11			# Sigma0(a)
1786
1787	or	$10,$7,$24
1788	and	$11,$7,$24
1789	and	$10,$25
1790	or	$11,$10			# Maj(a,b,c)
1791	addu	$9,$12			# +=K[29]
1792	addu	$3,$11
1793
1794	addu	$30,$9
1795	addu	$3,$9
1796	lw	$8,0($29)	# prefetch from ring buffer
1797	srl	$12,$23,3		# Xupdate(30)
1798	addu	$22,$15			# +=X[i+9]
1799	sll	$11,$23,14
1800	srl	$10,$23,7
1801	xor	$12,$11
1802	sll	$11,11
1803	xor	$12,$10
1804	srl	$10,$23,18
1805	xor	$12,$11
1806
1807	srl	$13,$20,10
1808	xor	$12,$10			# sigma0(X[i+1])
1809	sll	$11,$20,13
1810	addu	$22,$12
1811	srl	$10,$20,17
1812	xor	$13,$11
1813	sll	$11,2
1814	xor	$13,$10
1815	srl	$10,$20,19
1816	xor	$13,$11
1817
1818	xor	$13,$10			# sigma1(X[i+14])
1819	addu	$22,$13
1820	addu	$10,$22,$2			# 30
1821	srl	$2,$30,6
1822	xor	$13,$31,$1
1823	sll	$12,$30,7
1824	and	$13,$30
1825	srl	$11,$30,11
1826	xor	$2,$12
1827	sll	$12,$30,21
1828	xor	$2,$11
1829	srl	$11,$30,25
1830	xor	$2,$12
1831	sll	$12,$30,26
1832	xor	$2,$11
1833	xor	$13,$1			# Ch(e,f,g)
1834	xor	$11,$12,$2			# Sigma1(e)
1835
1836	srl	$2,$3,2
1837	addu	$10,$13
1838	lw	$13,120($6)		# K[30]
1839	sll	$12,$3,10
1840	addu	$10,$11
1841	srl	$11,$3,13
1842	xor	$2,$12
1843	sll	$12,$3,19
1844	xor	$2,$11
1845	srl	$11,$3,22
1846	xor	$2,$12
1847	sll	$12,$3,30
1848	xor	$2,$11
1849	sw	$22,56($29)	# offload to ring buffer
1850	xor	$2,$12			# Sigma0(a)
1851
1852	or	$11,$3,$7
1853	and	$12,$3,$7
1854	and	$11,$24
1855	or	$12,$11			# Maj(a,b,c)
1856	addu	$10,$13			# +=K[30]
1857	addu	$2,$12
1858
1859	addu	$25,$10
1860	addu	$2,$10
1861	lw	$9,4($29)	# prefetch from ring buffer
1862	srl	$13,$8,3		# Xupdate(31)
1863	addu	$23,$16			# +=X[i+9]
1864	sll	$12,$8,14
1865	srl	$11,$8,7
1866	xor	$13,$12
1867	sll	$12,11
1868	xor	$13,$11
1869	srl	$11,$8,18
1870	xor	$13,$12
1871
1872	srl	$14,$21,10
1873	xor	$13,$11			# sigma0(X[i+1])
1874	sll	$12,$21,13
1875	addu	$23,$13
1876	srl	$11,$21,17
1877	xor	$14,$12
1878	sll	$12,2
1879	xor	$14,$11
1880	srl	$11,$21,19
1881	xor	$14,$12
1882
1883	xor	$14,$11			# sigma1(X[i+14])
1884	addu	$23,$14
1885	addu	$11,$23,$1			# 31
1886	srl	$1,$25,6
1887	xor	$14,$30,$31
1888	sll	$13,$25,7
1889	and	$14,$25
1890	srl	$12,$25,11
1891	xor	$1,$13
1892	sll	$13,$25,21
1893	xor	$1,$12
1894	srl	$12,$25,25
1895	xor	$1,$13
1896	sll	$13,$25,26
1897	xor	$1,$12
1898	xor	$14,$31			# Ch(e,f,g)
1899	xor	$12,$13,$1			# Sigma1(e)
1900
1901	srl	$1,$2,2
1902	addu	$11,$14
1903	lw	$14,124($6)		# K[31]
1904	sll	$13,$2,10
1905	addu	$11,$12
1906	srl	$12,$2,13
1907	xor	$1,$13
1908	sll	$13,$2,19
1909	xor	$1,$12
1910	srl	$12,$2,22
1911	xor	$1,$13
1912	sll	$13,$2,30
1913	xor	$1,$12
1914	sw	$23,60($29)	# offload to ring buffer
1915	xor	$1,$13			# Sigma0(a)
1916
1917	or	$12,$2,$3
1918	and	$13,$2,$3
1919	and	$12,$7
1920	or	$13,$12			# Maj(a,b,c)
1921	addu	$11,$14			# +=K[31]
1922	addu	$1,$13
1923
1924	addu	$24,$11
1925	addu	$1,$11
1926	lw	$10,8($29)	# prefetch from ring buffer
1927	and	$14,0xfff
1928	li	$15,2290
1929	.set	noreorder
1930	bne	$14,$15,.L16_xx
1931	add $6,16*4		# Ktbl+=16
1932
1933	lw	$23,16*4($29)	# restore pointer to the end of input
1934	lw	$8,0*4($4)
1935	lw	$9,1*4($4)
1936	lw	$10,2*4($4)
1937	add $5,16*4
1938	lw	$11,3*4($4)
1939	addu	$1,$8
1940	lw	$12,4*4($4)
1941	addu	$2,$9
1942	lw	$13,5*4($4)
1943	addu	$3,$10
1944	lw	$14,6*4($4)
1945	addu	$7,$11
1946	lw	$15,7*4($4)
1947	addu	$24,$12
1948	sw	$1,0*4($4)
1949	addu	$25,$13
1950	sw	$2,1*4($4)
1951	addu	$30,$14
1952	sw	$3,2*4($4)
1953	addu	$31,$15
1954	sw	$7,3*4($4)
1955	sw	$24,4*4($4)
1956	sw	$25,5*4($4)
1957	sw	$30,6*4($4)
1958	sw	$31,7*4($4)
1959
1960	bne	$5,$23,.Loop
1961	sub $6,192	# rewind $6
1962
1963	lw	$31,128-1*4($29)
1964	lw	$30,128-2*4($29)
1965	lw	$23,128-3*4($29)
1966	lw	$22,128-4*4($29)
1967	lw	$21,128-5*4($29)
1968	lw	$20,128-6*4($29)
1969	lw	$19,128-7*4($29)
1970	lw	$18,128-8*4($29)
1971	lw	$17,128-9*4($29)
1972	lw	$16,128-10*4($29)
1973	jr	$31
1974	add $29,128
1975.end	sha256_block_data_order
1976
1977.rdata
1978.align	5
1979K256:
1980	.word	0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
1981	.word	0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
1982	.word	0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
1983	.word	0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
1984	.word	0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
1985	.word	0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
1986	.word	0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
1987	.word	0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
1988	.word	0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
1989	.word	0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
1990	.word	0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
1991	.word	0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
1992	.word	0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
1993	.word	0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
1994	.word	0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
1995	.word	0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
1996.asciiz	"SHA256 for MIPS, CRYPTOGAMS by <appro@openssl.org>"
1997.align	5
1998
1999