1#ifdef OPENSSL_FIPSCANISTER
2# include <openssl/fipssyms.h>
3#endif
4
5.text
6.set	noat
7#if !defined(__vxworks) || defined(__pic__)
8.option	pic2
9#endif
10
11.align	5
12.globl	sha256_block_data_order
13.ent	sha256_block_data_order
14sha256_block_data_order:
15	.frame	$29,128,$31
16	.mask	3237937152,-4
17	.set	noreorder
18	sub $29,128
19	sw	$31,128-1*4($29)
20	sw	$30,128-2*4($29)
21	sw	$23,128-3*4($29)
22	sw	$22,128-4*4($29)
23	sw	$21,128-5*4($29)
24	sw	$20,128-6*4($29)
25	sw	$19,128-7*4($29)
26	sw	$18,128-8*4($29)
27	sw	$17,128-9*4($29)
28	sw	$16,128-10*4($29)
29	sll $23,$6,6
30	.cplocal	$6
31	.cpsetup	$25,$0,sha256_block_data_order
32	.set	reorder
33	la	$6,K256		# PIC-ified 'load address'
34
35	lw	$1,0*4($4)		# load context
36	lw	$2,1*4($4)
37	lw	$3,2*4($4)
38	lw	$7,3*4($4)
39	lw	$24,4*4($4)
40	lw	$25,5*4($4)
41	lw	$30,6*4($4)
42	lw	$31,7*4($4)
43
44	add $23,$5		# pointer to the end of input
45	sw	$23,16*4($29)
46	b	.Loop
47
48.align	5
49.Loop:
50	lwl	$8,3($5)
51	lwr	$8,0($5)
52	lwl	$9,7($5)
53	lwr	$9,4($5)
54	srl	$13,$8,24		# byte swap(0)
55	srl	$14,$8,8
56	andi	$15,$8,0xFF00
57	sll	$8,$8,24
58	andi	$14,0xFF00
59	sll	$15,$15,8
60	or	$8,$13
61	or	$14,$15
62	or	$8,$14
63	addu	$12,$8,$31			# 0
64	srl	$31,$24,6
65	xor	$15,$25,$30
66	sll	$14,$24,7
67	and	$15,$24
68	srl	$13,$24,11
69	xor	$31,$14
70	sll	$14,$24,21
71	xor	$31,$13
72	srl	$13,$24,25
73	xor	$31,$14
74	sll	$14,$24,26
75	xor	$31,$13
76	xor	$15,$30			# Ch(e,f,g)
77	xor	$13,$14,$31			# Sigma1(e)
78
79	srl	$31,$1,2
80	addu	$12,$15
81	lw	$15,0($6)		# K[0]
82	sll	$14,$1,10
83	addu	$12,$13
84	srl	$13,$1,13
85	xor	$31,$14
86	sll	$14,$1,19
87	xor	$31,$13
88	srl	$13,$1,22
89	xor	$31,$14
90	sll	$14,$1,30
91	xor	$31,$13
92	sw	$8,0($29)	# offload to ring buffer
93	xor	$31,$14			# Sigma0(a)
94
95	or	$13,$1,$2
96	and	$14,$1,$2
97	and	$13,$3
98	or	$14,$13			# Maj(a,b,c)
99	addu	$12,$15			# +=K[0]
100	addu	$31,$14
101
102	addu	$7,$12
103	addu	$31,$12
104	lwl	$10,11($5)
105	lwr	$10,8($5)
106	srl	$14,$9,24		# byte swap(1)
107	srl	$15,$9,8
108	andi	$16,$9,0xFF00
109	sll	$9,$9,24
110	andi	$15,0xFF00
111	sll	$16,$16,8
112	or	$9,$14
113	or	$15,$16
114	or	$9,$15
115	addu	$13,$9,$30			# 1
116	srl	$30,$7,6
117	xor	$16,$24,$25
118	sll	$15,$7,7
119	and	$16,$7
120	srl	$14,$7,11
121	xor	$30,$15
122	sll	$15,$7,21
123	xor	$30,$14
124	srl	$14,$7,25
125	xor	$30,$15
126	sll	$15,$7,26
127	xor	$30,$14
128	xor	$16,$25			# Ch(e,f,g)
129	xor	$14,$15,$30			# Sigma1(e)
130
131	srl	$30,$31,2
132	addu	$13,$16
133	lw	$16,4($6)		# K[1]
134	sll	$15,$31,10
135	addu	$13,$14
136	srl	$14,$31,13
137	xor	$30,$15
138	sll	$15,$31,19
139	xor	$30,$14
140	srl	$14,$31,22
141	xor	$30,$15
142	sll	$15,$31,30
143	xor	$30,$14
144	sw	$9,4($29)	# offload to ring buffer
145	xor	$30,$15			# Sigma0(a)
146
147	or	$14,$31,$1
148	and	$15,$31,$1
149	and	$14,$2
150	or	$15,$14			# Maj(a,b,c)
151	addu	$13,$16			# +=K[1]
152	addu	$30,$15
153
154	addu	$3,$13
155	addu	$30,$13
156	lwl	$11,15($5)
157	lwr	$11,12($5)
158	srl	$15,$10,24		# byte swap(2)
159	srl	$16,$10,8
160	andi	$17,$10,0xFF00
161	sll	$10,$10,24
162	andi	$16,0xFF00
163	sll	$17,$17,8
164	or	$10,$15
165	or	$16,$17
166	or	$10,$16
167	addu	$14,$10,$25			# 2
168	srl	$25,$3,6
169	xor	$17,$7,$24
170	sll	$16,$3,7
171	and	$17,$3
172	srl	$15,$3,11
173	xor	$25,$16
174	sll	$16,$3,21
175	xor	$25,$15
176	srl	$15,$3,25
177	xor	$25,$16
178	sll	$16,$3,26
179	xor	$25,$15
180	xor	$17,$24			# Ch(e,f,g)
181	xor	$15,$16,$25			# Sigma1(e)
182
183	srl	$25,$30,2
184	addu	$14,$17
185	lw	$17,8($6)		# K[2]
186	sll	$16,$30,10
187	addu	$14,$15
188	srl	$15,$30,13
189	xor	$25,$16
190	sll	$16,$30,19
191	xor	$25,$15
192	srl	$15,$30,22
193	xor	$25,$16
194	sll	$16,$30,30
195	xor	$25,$15
196	sw	$10,8($29)	# offload to ring buffer
197	xor	$25,$16			# Sigma0(a)
198
199	or	$15,$30,$31
200	and	$16,$30,$31
201	and	$15,$1
202	or	$16,$15			# Maj(a,b,c)
203	addu	$14,$17			# +=K[2]
204	addu	$25,$16
205
206	addu	$2,$14
207	addu	$25,$14
208	lwl	$12,19($5)
209	lwr	$12,16($5)
210	srl	$16,$11,24		# byte swap(3)
211	srl	$17,$11,8
212	andi	$18,$11,0xFF00
213	sll	$11,$11,24
214	andi	$17,0xFF00
215	sll	$18,$18,8
216	or	$11,$16
217	or	$17,$18
218	or	$11,$17
219	addu	$15,$11,$24			# 3
220	srl	$24,$2,6
221	xor	$18,$3,$7
222	sll	$17,$2,7
223	and	$18,$2
224	srl	$16,$2,11
225	xor	$24,$17
226	sll	$17,$2,21
227	xor	$24,$16
228	srl	$16,$2,25
229	xor	$24,$17
230	sll	$17,$2,26
231	xor	$24,$16
232	xor	$18,$7			# Ch(e,f,g)
233	xor	$16,$17,$24			# Sigma1(e)
234
235	srl	$24,$25,2
236	addu	$15,$18
237	lw	$18,12($6)		# K[3]
238	sll	$17,$25,10
239	addu	$15,$16
240	srl	$16,$25,13
241	xor	$24,$17
242	sll	$17,$25,19
243	xor	$24,$16
244	srl	$16,$25,22
245	xor	$24,$17
246	sll	$17,$25,30
247	xor	$24,$16
248	sw	$11,12($29)	# offload to ring buffer
249	xor	$24,$17			# Sigma0(a)
250
251	or	$16,$25,$30
252	and	$17,$25,$30
253	and	$16,$31
254	or	$17,$16			# Maj(a,b,c)
255	addu	$15,$18			# +=K[3]
256	addu	$24,$17
257
258	addu	$1,$15
259	addu	$24,$15
260	lwl	$13,23($5)
261	lwr	$13,20($5)
262	srl	$17,$12,24		# byte swap(4)
263	srl	$18,$12,8
264	andi	$19,$12,0xFF00
265	sll	$12,$12,24
266	andi	$18,0xFF00
267	sll	$19,$19,8
268	or	$12,$17
269	or	$18,$19
270	or	$12,$18
271	addu	$16,$12,$7			# 4
272	srl	$7,$1,6
273	xor	$19,$2,$3
274	sll	$18,$1,7
275	and	$19,$1
276	srl	$17,$1,11
277	xor	$7,$18
278	sll	$18,$1,21
279	xor	$7,$17
280	srl	$17,$1,25
281	xor	$7,$18
282	sll	$18,$1,26
283	xor	$7,$17
284	xor	$19,$3			# Ch(e,f,g)
285	xor	$17,$18,$7			# Sigma1(e)
286
287	srl	$7,$24,2
288	addu	$16,$19
289	lw	$19,16($6)		# K[4]
290	sll	$18,$24,10
291	addu	$16,$17
292	srl	$17,$24,13
293	xor	$7,$18
294	sll	$18,$24,19
295	xor	$7,$17
296	srl	$17,$24,22
297	xor	$7,$18
298	sll	$18,$24,30
299	xor	$7,$17
300	sw	$12,16($29)	# offload to ring buffer
301	xor	$7,$18			# Sigma0(a)
302
303	or	$17,$24,$25
304	and	$18,$24,$25
305	and	$17,$30
306	or	$18,$17			# Maj(a,b,c)
307	addu	$16,$19			# +=K[4]
308	addu	$7,$18
309
310	addu	$31,$16
311	addu	$7,$16
312	lwl	$14,27($5)
313	lwr	$14,24($5)
314	srl	$18,$13,24		# byte swap(5)
315	srl	$19,$13,8
316	andi	$20,$13,0xFF00
317	sll	$13,$13,24
318	andi	$19,0xFF00
319	sll	$20,$20,8
320	or	$13,$18
321	or	$19,$20
322	or	$13,$19
323	addu	$17,$13,$3			# 5
324	srl	$3,$31,6
325	xor	$20,$1,$2
326	sll	$19,$31,7
327	and	$20,$31
328	srl	$18,$31,11
329	xor	$3,$19
330	sll	$19,$31,21
331	xor	$3,$18
332	srl	$18,$31,25
333	xor	$3,$19
334	sll	$19,$31,26
335	xor	$3,$18
336	xor	$20,$2			# Ch(e,f,g)
337	xor	$18,$19,$3			# Sigma1(e)
338
339	srl	$3,$7,2
340	addu	$17,$20
341	lw	$20,20($6)		# K[5]
342	sll	$19,$7,10
343	addu	$17,$18
344	srl	$18,$7,13
345	xor	$3,$19
346	sll	$19,$7,19
347	xor	$3,$18
348	srl	$18,$7,22
349	xor	$3,$19
350	sll	$19,$7,30
351	xor	$3,$18
352	sw	$13,20($29)	# offload to ring buffer
353	xor	$3,$19			# Sigma0(a)
354
355	or	$18,$7,$24
356	and	$19,$7,$24
357	and	$18,$25
358	or	$19,$18			# Maj(a,b,c)
359	addu	$17,$20			# +=K[5]
360	addu	$3,$19
361
362	addu	$30,$17
363	addu	$3,$17
364	lwl	$15,31($5)
365	lwr	$15,28($5)
366	srl	$19,$14,24		# byte swap(6)
367	srl	$20,$14,8
368	andi	$21,$14,0xFF00
369	sll	$14,$14,24
370	andi	$20,0xFF00
371	sll	$21,$21,8
372	or	$14,$19
373	or	$20,$21
374	or	$14,$20
375	addu	$18,$14,$2			# 6
376	srl	$2,$30,6
377	xor	$21,$31,$1
378	sll	$20,$30,7
379	and	$21,$30
380	srl	$19,$30,11
381	xor	$2,$20
382	sll	$20,$30,21
383	xor	$2,$19
384	srl	$19,$30,25
385	xor	$2,$20
386	sll	$20,$30,26
387	xor	$2,$19
388	xor	$21,$1			# Ch(e,f,g)
389	xor	$19,$20,$2			# Sigma1(e)
390
391	srl	$2,$3,2
392	addu	$18,$21
393	lw	$21,24($6)		# K[6]
394	sll	$20,$3,10
395	addu	$18,$19
396	srl	$19,$3,13
397	xor	$2,$20
398	sll	$20,$3,19
399	xor	$2,$19
400	srl	$19,$3,22
401	xor	$2,$20
402	sll	$20,$3,30
403	xor	$2,$19
404	sw	$14,24($29)	# offload to ring buffer
405	xor	$2,$20			# Sigma0(a)
406
407	or	$19,$3,$7
408	and	$20,$3,$7
409	and	$19,$24
410	or	$20,$19			# Maj(a,b,c)
411	addu	$18,$21			# +=K[6]
412	addu	$2,$20
413
414	addu	$25,$18
415	addu	$2,$18
416	lwl	$16,35($5)
417	lwr	$16,32($5)
418	srl	$20,$15,24		# byte swap(7)
419	srl	$21,$15,8
420	andi	$22,$15,0xFF00
421	sll	$15,$15,24
422	andi	$21,0xFF00
423	sll	$22,$22,8
424	or	$15,$20
425	or	$21,$22
426	or	$15,$21
427	addu	$19,$15,$1			# 7
428	srl	$1,$25,6
429	xor	$22,$30,$31
430	sll	$21,$25,7
431	and	$22,$25
432	srl	$20,$25,11
433	xor	$1,$21
434	sll	$21,$25,21
435	xor	$1,$20
436	srl	$20,$25,25
437	xor	$1,$21
438	sll	$21,$25,26
439	xor	$1,$20
440	xor	$22,$31			# Ch(e,f,g)
441	xor	$20,$21,$1			# Sigma1(e)
442
443	srl	$1,$2,2
444	addu	$19,$22
445	lw	$22,28($6)		# K[7]
446	sll	$21,$2,10
447	addu	$19,$20
448	srl	$20,$2,13
449	xor	$1,$21
450	sll	$21,$2,19
451	xor	$1,$20
452	srl	$20,$2,22
453	xor	$1,$21
454	sll	$21,$2,30
455	xor	$1,$20
456	sw	$15,28($29)	# offload to ring buffer
457	xor	$1,$21			# Sigma0(a)
458
459	or	$20,$2,$3
460	and	$21,$2,$3
461	and	$20,$7
462	or	$21,$20			# Maj(a,b,c)
463	addu	$19,$22			# +=K[7]
464	addu	$1,$21
465
466	addu	$24,$19
467	addu	$1,$19
468	lwl	$17,39($5)
469	lwr	$17,36($5)
470	srl	$21,$16,24		# byte swap(8)
471	srl	$22,$16,8
472	andi	$23,$16,0xFF00
473	sll	$16,$16,24
474	andi	$22,0xFF00
475	sll	$23,$23,8
476	or	$16,$21
477	or	$22,$23
478	or	$16,$22
479	addu	$20,$16,$31			# 8
480	srl	$31,$24,6
481	xor	$23,$25,$30
482	sll	$22,$24,7
483	and	$23,$24
484	srl	$21,$24,11
485	xor	$31,$22
486	sll	$22,$24,21
487	xor	$31,$21
488	srl	$21,$24,25
489	xor	$31,$22
490	sll	$22,$24,26
491	xor	$31,$21
492	xor	$23,$30			# Ch(e,f,g)
493	xor	$21,$22,$31			# Sigma1(e)
494
495	srl	$31,$1,2
496	addu	$20,$23
497	lw	$23,32($6)		# K[8]
498	sll	$22,$1,10
499	addu	$20,$21
500	srl	$21,$1,13
501	xor	$31,$22
502	sll	$22,$1,19
503	xor	$31,$21
504	srl	$21,$1,22
505	xor	$31,$22
506	sll	$22,$1,30
507	xor	$31,$21
508	sw	$16,32($29)	# offload to ring buffer
509	xor	$31,$22			# Sigma0(a)
510
511	or	$21,$1,$2
512	and	$22,$1,$2
513	and	$21,$3
514	or	$22,$21			# Maj(a,b,c)
515	addu	$20,$23			# +=K[8]
516	addu	$31,$22
517
518	addu	$7,$20
519	addu	$31,$20
520	lwl	$18,43($5)
521	lwr	$18,40($5)
522	srl	$22,$17,24		# byte swap(9)
523	srl	$23,$17,8
524	andi	$8,$17,0xFF00
525	sll	$17,$17,24
526	andi	$23,0xFF00
527	sll	$8,$8,8
528	or	$17,$22
529	or	$23,$8
530	or	$17,$23
531	addu	$21,$17,$30			# 9
532	srl	$30,$7,6
533	xor	$8,$24,$25
534	sll	$23,$7,7
535	and	$8,$7
536	srl	$22,$7,11
537	xor	$30,$23
538	sll	$23,$7,21
539	xor	$30,$22
540	srl	$22,$7,25
541	xor	$30,$23
542	sll	$23,$7,26
543	xor	$30,$22
544	xor	$8,$25			# Ch(e,f,g)
545	xor	$22,$23,$30			# Sigma1(e)
546
547	srl	$30,$31,2
548	addu	$21,$8
549	lw	$8,36($6)		# K[9]
550	sll	$23,$31,10
551	addu	$21,$22
552	srl	$22,$31,13
553	xor	$30,$23
554	sll	$23,$31,19
555	xor	$30,$22
556	srl	$22,$31,22
557	xor	$30,$23
558	sll	$23,$31,30
559	xor	$30,$22
560	sw	$17,36($29)	# offload to ring buffer
561	xor	$30,$23			# Sigma0(a)
562
563	or	$22,$31,$1
564	and	$23,$31,$1
565	and	$22,$2
566	or	$23,$22			# Maj(a,b,c)
567	addu	$21,$8			# +=K[9]
568	addu	$30,$23
569
570	addu	$3,$21
571	addu	$30,$21
572	lwl	$19,47($5)
573	lwr	$19,44($5)
574	srl	$23,$18,24		# byte swap(10)
575	srl	$8,$18,8
576	andi	$9,$18,0xFF00
577	sll	$18,$18,24
578	andi	$8,0xFF00
579	sll	$9,$9,8
580	or	$18,$23
581	or	$8,$9
582	or	$18,$8
583	addu	$22,$18,$25			# 10
584	srl	$25,$3,6
585	xor	$9,$7,$24
586	sll	$8,$3,7
587	and	$9,$3
588	srl	$23,$3,11
589	xor	$25,$8
590	sll	$8,$3,21
591	xor	$25,$23
592	srl	$23,$3,25
593	xor	$25,$8
594	sll	$8,$3,26
595	xor	$25,$23
596	xor	$9,$24			# Ch(e,f,g)
597	xor	$23,$8,$25			# Sigma1(e)
598
599	srl	$25,$30,2
600	addu	$22,$9
601	lw	$9,40($6)		# K[10]
602	sll	$8,$30,10
603	addu	$22,$23
604	srl	$23,$30,13
605	xor	$25,$8
606	sll	$8,$30,19
607	xor	$25,$23
608	srl	$23,$30,22
609	xor	$25,$8
610	sll	$8,$30,30
611	xor	$25,$23
612	sw	$18,40($29)	# offload to ring buffer
613	xor	$25,$8			# Sigma0(a)
614
615	or	$23,$30,$31
616	and	$8,$30,$31
617	and	$23,$1
618	or	$8,$23			# Maj(a,b,c)
619	addu	$22,$9			# +=K[10]
620	addu	$25,$8
621
622	addu	$2,$22
623	addu	$25,$22
624	lwl	$20,51($5)
625	lwr	$20,48($5)
626	srl	$8,$19,24		# byte swap(11)
627	srl	$9,$19,8
628	andi	$10,$19,0xFF00
629	sll	$19,$19,24
630	andi	$9,0xFF00
631	sll	$10,$10,8
632	or	$19,$8
633	or	$9,$10
634	or	$19,$9
635	addu	$23,$19,$24			# 11
636	srl	$24,$2,6
637	xor	$10,$3,$7
638	sll	$9,$2,7
639	and	$10,$2
640	srl	$8,$2,11
641	xor	$24,$9
642	sll	$9,$2,21
643	xor	$24,$8
644	srl	$8,$2,25
645	xor	$24,$9
646	sll	$9,$2,26
647	xor	$24,$8
648	xor	$10,$7			# Ch(e,f,g)
649	xor	$8,$9,$24			# Sigma1(e)
650
651	srl	$24,$25,2
652	addu	$23,$10
653	lw	$10,44($6)		# K[11]
654	sll	$9,$25,10
655	addu	$23,$8
656	srl	$8,$25,13
657	xor	$24,$9
658	sll	$9,$25,19
659	xor	$24,$8
660	srl	$8,$25,22
661	xor	$24,$9
662	sll	$9,$25,30
663	xor	$24,$8
664	sw	$19,44($29)	# offload to ring buffer
665	xor	$24,$9			# Sigma0(a)
666
667	or	$8,$25,$30
668	and	$9,$25,$30
669	and	$8,$31
670	or	$9,$8			# Maj(a,b,c)
671	addu	$23,$10			# +=K[11]
672	addu	$24,$9
673
674	addu	$1,$23
675	addu	$24,$23
676	lwl	$21,55($5)
677	lwr	$21,52($5)
678	srl	$9,$20,24		# byte swap(12)
679	srl	$10,$20,8
680	andi	$11,$20,0xFF00
681	sll	$20,$20,24
682	andi	$10,0xFF00
683	sll	$11,$11,8
684	or	$20,$9
685	or	$10,$11
686	or	$20,$10
687	addu	$8,$20,$7			# 12
688	srl	$7,$1,6
689	xor	$11,$2,$3
690	sll	$10,$1,7
691	and	$11,$1
692	srl	$9,$1,11
693	xor	$7,$10
694	sll	$10,$1,21
695	xor	$7,$9
696	srl	$9,$1,25
697	xor	$7,$10
698	sll	$10,$1,26
699	xor	$7,$9
700	xor	$11,$3			# Ch(e,f,g)
701	xor	$9,$10,$7			# Sigma1(e)
702
703	srl	$7,$24,2
704	addu	$8,$11
705	lw	$11,48($6)		# K[12]
706	sll	$10,$24,10
707	addu	$8,$9
708	srl	$9,$24,13
709	xor	$7,$10
710	sll	$10,$24,19
711	xor	$7,$9
712	srl	$9,$24,22
713	xor	$7,$10
714	sll	$10,$24,30
715	xor	$7,$9
716	sw	$20,48($29)	# offload to ring buffer
717	xor	$7,$10			# Sigma0(a)
718
719	or	$9,$24,$25
720	and	$10,$24,$25
721	and	$9,$30
722	or	$10,$9			# Maj(a,b,c)
723	addu	$8,$11			# +=K[12]
724	addu	$7,$10
725
726	addu	$31,$8
727	addu	$7,$8
728	lwl	$22,59($5)
729	lwr	$22,56($5)
730	srl	$10,$21,24		# byte swap(13)
731	srl	$11,$21,8
732	andi	$12,$21,0xFF00
733	sll	$21,$21,24
734	andi	$11,0xFF00
735	sll	$12,$12,8
736	or	$21,$10
737	or	$11,$12
738	or	$21,$11
739	addu	$9,$21,$3			# 13
740	srl	$3,$31,6
741	xor	$12,$1,$2
742	sll	$11,$31,7
743	and	$12,$31
744	srl	$10,$31,11
745	xor	$3,$11
746	sll	$11,$31,21
747	xor	$3,$10
748	srl	$10,$31,25
749	xor	$3,$11
750	sll	$11,$31,26
751	xor	$3,$10
752	xor	$12,$2			# Ch(e,f,g)
753	xor	$10,$11,$3			# Sigma1(e)
754
755	srl	$3,$7,2
756	addu	$9,$12
757	lw	$12,52($6)		# K[13]
758	sll	$11,$7,10
759	addu	$9,$10
760	srl	$10,$7,13
761	xor	$3,$11
762	sll	$11,$7,19
763	xor	$3,$10
764	srl	$10,$7,22
765	xor	$3,$11
766	sll	$11,$7,30
767	xor	$3,$10
768	sw	$21,52($29)	# offload to ring buffer
769	xor	$3,$11			# Sigma0(a)
770
771	or	$10,$7,$24
772	and	$11,$7,$24
773	and	$10,$25
774	or	$11,$10			# Maj(a,b,c)
775	addu	$9,$12			# +=K[13]
776	addu	$3,$11
777
778	addu	$30,$9
779	addu	$3,$9
780	lw	$8,0($29)	# prefetch from ring buffer
781	lwl	$23,63($5)
782	lwr	$23,60($5)
783	srl	$11,$22,24		# byte swap(14)
784	srl	$12,$22,8
785	andi	$13,$22,0xFF00
786	sll	$22,$22,24
787	andi	$12,0xFF00
788	sll	$13,$13,8
789	or	$22,$11
790	or	$12,$13
791	or	$22,$12
792	addu	$10,$22,$2			# 14
793	srl	$2,$30,6
794	xor	$13,$31,$1
795	sll	$12,$30,7
796	and	$13,$30
797	srl	$11,$30,11
798	xor	$2,$12
799	sll	$12,$30,21
800	xor	$2,$11
801	srl	$11,$30,25
802	xor	$2,$12
803	sll	$12,$30,26
804	xor	$2,$11
805	xor	$13,$1			# Ch(e,f,g)
806	xor	$11,$12,$2			# Sigma1(e)
807
808	srl	$2,$3,2
809	addu	$10,$13
810	lw	$13,56($6)		# K[14]
811	sll	$12,$3,10
812	addu	$10,$11
813	srl	$11,$3,13
814	xor	$2,$12
815	sll	$12,$3,19
816	xor	$2,$11
817	srl	$11,$3,22
818	xor	$2,$12
819	sll	$12,$3,30
820	xor	$2,$11
821	sw	$22,56($29)	# offload to ring buffer
822	xor	$2,$12			# Sigma0(a)
823
824	or	$11,$3,$7
825	and	$12,$3,$7
826	and	$11,$24
827	or	$12,$11			# Maj(a,b,c)
828	addu	$10,$13			# +=K[14]
829	addu	$2,$12
830
831	addu	$25,$10
832	addu	$2,$10
833	lw	$9,4($29)	# prefetch from ring buffer
834	srl	$12,$23,24		# byte swap(15)
835	srl	$13,$23,8
836	andi	$14,$23,0xFF00
837	sll	$23,$23,24
838	andi	$13,0xFF00
839	sll	$14,$14,8
840	or	$23,$12
841	or	$13,$14
842	or	$23,$13
843	addu	$11,$23,$1			# 15
844	srl	$1,$25,6
845	xor	$14,$30,$31
846	sll	$13,$25,7
847	and	$14,$25
848	srl	$12,$25,11
849	xor	$1,$13
850	sll	$13,$25,21
851	xor	$1,$12
852	srl	$12,$25,25
853	xor	$1,$13
854	sll	$13,$25,26
855	xor	$1,$12
856	xor	$14,$31			# Ch(e,f,g)
857	xor	$12,$13,$1			# Sigma1(e)
858
859	srl	$1,$2,2
860	addu	$11,$14
861	lw	$14,60($6)		# K[15]
862	sll	$13,$2,10
863	addu	$11,$12
864	srl	$12,$2,13
865	xor	$1,$13
866	sll	$13,$2,19
867	xor	$1,$12
868	srl	$12,$2,22
869	xor	$1,$13
870	sll	$13,$2,30
871	xor	$1,$12
872	sw	$23,60($29)	# offload to ring buffer
873	xor	$1,$13			# Sigma0(a)
874
875	or	$12,$2,$3
876	and	$13,$2,$3
877	and	$12,$7
878	or	$13,$12			# Maj(a,b,c)
879	addu	$11,$14			# +=K[15]
880	addu	$1,$13
881
882	addu	$24,$11
883	addu	$1,$11
884	lw	$10,8($29)	# prefetch from ring buffer
885	b	.L16_xx
886.align	4
887.L16_xx:
888	srl	$14,$9,3		# Xupdate(16)
889	addu	$8,$17			# +=X[i+9]
890	sll	$13,$9,14
891	srl	$12,$9,7
892	xor	$14,$13
893	sll	$13,11
894	xor	$14,$12
895	srl	$12,$9,18
896	xor	$14,$13
897
898	srl	$15,$22,10
899	xor	$14,$12			# sigma0(X[i+1])
900	sll	$13,$22,13
901	addu	$8,$14
902	srl	$12,$22,17
903	xor	$15,$13
904	sll	$13,2
905	xor	$15,$12
906	srl	$12,$22,19
907	xor	$15,$13
908
909	xor	$15,$12			# sigma1(X[i+14])
910	addu	$8,$15
911	addu	$12,$8,$31			# 16
912	srl	$31,$24,6
913	xor	$15,$25,$30
914	sll	$14,$24,7
915	and	$15,$24
916	srl	$13,$24,11
917	xor	$31,$14
918	sll	$14,$24,21
919	xor	$31,$13
920	srl	$13,$24,25
921	xor	$31,$14
922	sll	$14,$24,26
923	xor	$31,$13
924	xor	$15,$30			# Ch(e,f,g)
925	xor	$13,$14,$31			# Sigma1(e)
926
927	srl	$31,$1,2
928	addu	$12,$15
929	lw	$15,64($6)		# K[16]
930	sll	$14,$1,10
931	addu	$12,$13
932	srl	$13,$1,13
933	xor	$31,$14
934	sll	$14,$1,19
935	xor	$31,$13
936	srl	$13,$1,22
937	xor	$31,$14
938	sll	$14,$1,30
939	xor	$31,$13
940	sw	$8,0($29)	# offload to ring buffer
941	xor	$31,$14			# Sigma0(a)
942
943	or	$13,$1,$2
944	and	$14,$1,$2
945	and	$13,$3
946	or	$14,$13			# Maj(a,b,c)
947	addu	$12,$15			# +=K[16]
948	addu	$31,$14
949
950	addu	$7,$12
951	addu	$31,$12
952	lw	$11,12($29)	# prefetch from ring buffer
953	srl	$15,$10,3		# Xupdate(17)
954	addu	$9,$18			# +=X[i+9]
955	sll	$14,$10,14
956	srl	$13,$10,7
957	xor	$15,$14
958	sll	$14,11
959	xor	$15,$13
960	srl	$13,$10,18
961	xor	$15,$14
962
963	srl	$16,$23,10
964	xor	$15,$13			# sigma0(X[i+1])
965	sll	$14,$23,13
966	addu	$9,$15
967	srl	$13,$23,17
968	xor	$16,$14
969	sll	$14,2
970	xor	$16,$13
971	srl	$13,$23,19
972	xor	$16,$14
973
974	xor	$16,$13			# sigma1(X[i+14])
975	addu	$9,$16
976	addu	$13,$9,$30			# 17
977	srl	$30,$7,6
978	xor	$16,$24,$25
979	sll	$15,$7,7
980	and	$16,$7
981	srl	$14,$7,11
982	xor	$30,$15
983	sll	$15,$7,21
984	xor	$30,$14
985	srl	$14,$7,25
986	xor	$30,$15
987	sll	$15,$7,26
988	xor	$30,$14
989	xor	$16,$25			# Ch(e,f,g)
990	xor	$14,$15,$30			# Sigma1(e)
991
992	srl	$30,$31,2
993	addu	$13,$16
994	lw	$16,68($6)		# K[17]
995	sll	$15,$31,10
996	addu	$13,$14
997	srl	$14,$31,13
998	xor	$30,$15
999	sll	$15,$31,19
1000	xor	$30,$14
1001	srl	$14,$31,22
1002	xor	$30,$15
1003	sll	$15,$31,30
1004	xor	$30,$14
1005	sw	$9,4($29)	# offload to ring buffer
1006	xor	$30,$15			# Sigma0(a)
1007
1008	or	$14,$31,$1
1009	and	$15,$31,$1
1010	and	$14,$2
1011	or	$15,$14			# Maj(a,b,c)
1012	addu	$13,$16			# +=K[17]
1013	addu	$30,$15
1014
1015	addu	$3,$13
1016	addu	$30,$13
1017	lw	$12,16($29)	# prefetch from ring buffer
1018	srl	$16,$11,3		# Xupdate(18)
1019	addu	$10,$19			# +=X[i+9]
1020	sll	$15,$11,14
1021	srl	$14,$11,7
1022	xor	$16,$15
1023	sll	$15,11
1024	xor	$16,$14
1025	srl	$14,$11,18
1026	xor	$16,$15
1027
1028	srl	$17,$8,10
1029	xor	$16,$14			# sigma0(X[i+1])
1030	sll	$15,$8,13
1031	addu	$10,$16
1032	srl	$14,$8,17
1033	xor	$17,$15
1034	sll	$15,2
1035	xor	$17,$14
1036	srl	$14,$8,19
1037	xor	$17,$15
1038
1039	xor	$17,$14			# sigma1(X[i+14])
1040	addu	$10,$17
1041	addu	$14,$10,$25			# 18
1042	srl	$25,$3,6
1043	xor	$17,$7,$24
1044	sll	$16,$3,7
1045	and	$17,$3
1046	srl	$15,$3,11
1047	xor	$25,$16
1048	sll	$16,$3,21
1049	xor	$25,$15
1050	srl	$15,$3,25
1051	xor	$25,$16
1052	sll	$16,$3,26
1053	xor	$25,$15
1054	xor	$17,$24			# Ch(e,f,g)
1055	xor	$15,$16,$25			# Sigma1(e)
1056
1057	srl	$25,$30,2
1058	addu	$14,$17
1059	lw	$17,72($6)		# K[18]
1060	sll	$16,$30,10
1061	addu	$14,$15
1062	srl	$15,$30,13
1063	xor	$25,$16
1064	sll	$16,$30,19
1065	xor	$25,$15
1066	srl	$15,$30,22
1067	xor	$25,$16
1068	sll	$16,$30,30
1069	xor	$25,$15
1070	sw	$10,8($29)	# offload to ring buffer
1071	xor	$25,$16			# Sigma0(a)
1072
1073	or	$15,$30,$31
1074	and	$16,$30,$31
1075	and	$15,$1
1076	or	$16,$15			# Maj(a,b,c)
1077	addu	$14,$17			# +=K[18]
1078	addu	$25,$16
1079
1080	addu	$2,$14
1081	addu	$25,$14
1082	lw	$13,20($29)	# prefetch from ring buffer
1083	srl	$17,$12,3		# Xupdate(19)
1084	addu	$11,$20			# +=X[i+9]
1085	sll	$16,$12,14
1086	srl	$15,$12,7
1087	xor	$17,$16
1088	sll	$16,11
1089	xor	$17,$15
1090	srl	$15,$12,18
1091	xor	$17,$16
1092
1093	srl	$18,$9,10
1094	xor	$17,$15			# sigma0(X[i+1])
1095	sll	$16,$9,13
1096	addu	$11,$17
1097	srl	$15,$9,17
1098	xor	$18,$16
1099	sll	$16,2
1100	xor	$18,$15
1101	srl	$15,$9,19
1102	xor	$18,$16
1103
1104	xor	$18,$15			# sigma1(X[i+14])
1105	addu	$11,$18
1106	addu	$15,$11,$24			# 19
1107	srl	$24,$2,6
1108	xor	$18,$3,$7
1109	sll	$17,$2,7
1110	and	$18,$2
1111	srl	$16,$2,11
1112	xor	$24,$17
1113	sll	$17,$2,21
1114	xor	$24,$16
1115	srl	$16,$2,25
1116	xor	$24,$17
1117	sll	$17,$2,26
1118	xor	$24,$16
1119	xor	$18,$7			# Ch(e,f,g)
1120	xor	$16,$17,$24			# Sigma1(e)
1121
1122	srl	$24,$25,2
1123	addu	$15,$18
1124	lw	$18,76($6)		# K[19]
1125	sll	$17,$25,10
1126	addu	$15,$16
1127	srl	$16,$25,13
1128	xor	$24,$17
1129	sll	$17,$25,19
1130	xor	$24,$16
1131	srl	$16,$25,22
1132	xor	$24,$17
1133	sll	$17,$25,30
1134	xor	$24,$16
1135	sw	$11,12($29)	# offload to ring buffer
1136	xor	$24,$17			# Sigma0(a)
1137
1138	or	$16,$25,$30
1139	and	$17,$25,$30
1140	and	$16,$31
1141	or	$17,$16			# Maj(a,b,c)
1142	addu	$15,$18			# +=K[19]
1143	addu	$24,$17
1144
1145	addu	$1,$15
1146	addu	$24,$15
1147	lw	$14,24($29)	# prefetch from ring buffer
1148	srl	$18,$13,3		# Xupdate(20)
1149	addu	$12,$21			# +=X[i+9]
1150	sll	$17,$13,14
1151	srl	$16,$13,7
1152	xor	$18,$17
1153	sll	$17,11
1154	xor	$18,$16
1155	srl	$16,$13,18
1156	xor	$18,$17
1157
1158	srl	$19,$10,10
1159	xor	$18,$16			# sigma0(X[i+1])
1160	sll	$17,$10,13
1161	addu	$12,$18
1162	srl	$16,$10,17
1163	xor	$19,$17
1164	sll	$17,2
1165	xor	$19,$16
1166	srl	$16,$10,19
1167	xor	$19,$17
1168
1169	xor	$19,$16			# sigma1(X[i+14])
1170	addu	$12,$19
1171	addu	$16,$12,$7			# 20
1172	srl	$7,$1,6
1173	xor	$19,$2,$3
1174	sll	$18,$1,7
1175	and	$19,$1
1176	srl	$17,$1,11
1177	xor	$7,$18
1178	sll	$18,$1,21
1179	xor	$7,$17
1180	srl	$17,$1,25
1181	xor	$7,$18
1182	sll	$18,$1,26
1183	xor	$7,$17
1184	xor	$19,$3			# Ch(e,f,g)
1185	xor	$17,$18,$7			# Sigma1(e)
1186
1187	srl	$7,$24,2
1188	addu	$16,$19
1189	lw	$19,80($6)		# K[20]
1190	sll	$18,$24,10
1191	addu	$16,$17
1192	srl	$17,$24,13
1193	xor	$7,$18
1194	sll	$18,$24,19
1195	xor	$7,$17
1196	srl	$17,$24,22
1197	xor	$7,$18
1198	sll	$18,$24,30
1199	xor	$7,$17
1200	sw	$12,16($29)	# offload to ring buffer
1201	xor	$7,$18			# Sigma0(a)
1202
1203	or	$17,$24,$25
1204	and	$18,$24,$25
1205	and	$17,$30
1206	or	$18,$17			# Maj(a,b,c)
1207	addu	$16,$19			# +=K[20]
1208	addu	$7,$18
1209
1210	addu	$31,$16
1211	addu	$7,$16
1212	lw	$15,28($29)	# prefetch from ring buffer
1213	srl	$19,$14,3		# Xupdate(21)
1214	addu	$13,$22			# +=X[i+9]
1215	sll	$18,$14,14
1216	srl	$17,$14,7
1217	xor	$19,$18
1218	sll	$18,11
1219	xor	$19,$17
1220	srl	$17,$14,18
1221	xor	$19,$18
1222
1223	srl	$20,$11,10
1224	xor	$19,$17			# sigma0(X[i+1])
1225	sll	$18,$11,13
1226	addu	$13,$19
1227	srl	$17,$11,17
1228	xor	$20,$18
1229	sll	$18,2
1230	xor	$20,$17
1231	srl	$17,$11,19
1232	xor	$20,$18
1233
1234	xor	$20,$17			# sigma1(X[i+14])
1235	addu	$13,$20
1236	addu	$17,$13,$3			# 21
1237	srl	$3,$31,6
1238	xor	$20,$1,$2
1239	sll	$19,$31,7
1240	and	$20,$31
1241	srl	$18,$31,11
1242	xor	$3,$19
1243	sll	$19,$31,21
1244	xor	$3,$18
1245	srl	$18,$31,25
1246	xor	$3,$19
1247	sll	$19,$31,26
1248	xor	$3,$18
1249	xor	$20,$2			# Ch(e,f,g)
1250	xor	$18,$19,$3			# Sigma1(e)
1251
1252	srl	$3,$7,2
1253	addu	$17,$20
1254	lw	$20,84($6)		# K[21]
1255	sll	$19,$7,10
1256	addu	$17,$18
1257	srl	$18,$7,13
1258	xor	$3,$19
1259	sll	$19,$7,19
1260	xor	$3,$18
1261	srl	$18,$7,22
1262	xor	$3,$19
1263	sll	$19,$7,30
1264	xor	$3,$18
1265	sw	$13,20($29)	# offload to ring buffer
1266	xor	$3,$19			# Sigma0(a)
1267
1268	or	$18,$7,$24
1269	and	$19,$7,$24
1270	and	$18,$25
1271	or	$19,$18			# Maj(a,b,c)
1272	addu	$17,$20			# +=K[21]
1273	addu	$3,$19
1274
1275	addu	$30,$17
1276	addu	$3,$17
1277	lw	$16,32($29)	# prefetch from ring buffer
1278	srl	$20,$15,3		# Xupdate(22)
1279	addu	$14,$23			# +=X[i+9]
1280	sll	$19,$15,14
1281	srl	$18,$15,7
1282	xor	$20,$19
1283	sll	$19,11
1284	xor	$20,$18
1285	srl	$18,$15,18
1286	xor	$20,$19
1287
1288	srl	$21,$12,10
1289	xor	$20,$18			# sigma0(X[i+1])
1290	sll	$19,$12,13
1291	addu	$14,$20
1292	srl	$18,$12,17
1293	xor	$21,$19
1294	sll	$19,2
1295	xor	$21,$18
1296	srl	$18,$12,19
1297	xor	$21,$19
1298
1299	xor	$21,$18			# sigma1(X[i+14])
1300	addu	$14,$21
1301	addu	$18,$14,$2			# 22
1302	srl	$2,$30,6
1303	xor	$21,$31,$1
1304	sll	$20,$30,7
1305	and	$21,$30
1306	srl	$19,$30,11
1307	xor	$2,$20
1308	sll	$20,$30,21
1309	xor	$2,$19
1310	srl	$19,$30,25
1311	xor	$2,$20
1312	sll	$20,$30,26
1313	xor	$2,$19
1314	xor	$21,$1			# Ch(e,f,g)
1315	xor	$19,$20,$2			# Sigma1(e)
1316
1317	srl	$2,$3,2
1318	addu	$18,$21
1319	lw	$21,88($6)		# K[22]
1320	sll	$20,$3,10
1321	addu	$18,$19
1322	srl	$19,$3,13
1323	xor	$2,$20
1324	sll	$20,$3,19
1325	xor	$2,$19
1326	srl	$19,$3,22
1327	xor	$2,$20
1328	sll	$20,$3,30
1329	xor	$2,$19
1330	sw	$14,24($29)	# offload to ring buffer
1331	xor	$2,$20			# Sigma0(a)
1332
1333	or	$19,$3,$7
1334	and	$20,$3,$7
1335	and	$19,$24
1336	or	$20,$19			# Maj(a,b,c)
1337	addu	$18,$21			# +=K[22]
1338	addu	$2,$20
1339
1340	addu	$25,$18
1341	addu	$2,$18
1342	lw	$17,36($29)	# prefetch from ring buffer
1343	srl	$21,$16,3		# Xupdate(23)
1344	addu	$15,$8			# +=X[i+9]
1345	sll	$20,$16,14
1346	srl	$19,$16,7
1347	xor	$21,$20
1348	sll	$20,11
1349	xor	$21,$19
1350	srl	$19,$16,18
1351	xor	$21,$20
1352
1353	srl	$22,$13,10
1354	xor	$21,$19			# sigma0(X[i+1])
1355	sll	$20,$13,13
1356	addu	$15,$21
1357	srl	$19,$13,17
1358	xor	$22,$20
1359	sll	$20,2
1360	xor	$22,$19
1361	srl	$19,$13,19
1362	xor	$22,$20
1363
1364	xor	$22,$19			# sigma1(X[i+14])
1365	addu	$15,$22
1366	addu	$19,$15,$1			# 23
1367	srl	$1,$25,6
1368	xor	$22,$30,$31
1369	sll	$21,$25,7
1370	and	$22,$25
1371	srl	$20,$25,11
1372	xor	$1,$21
1373	sll	$21,$25,21
1374	xor	$1,$20
1375	srl	$20,$25,25
1376	xor	$1,$21
1377	sll	$21,$25,26
1378	xor	$1,$20
1379	xor	$22,$31			# Ch(e,f,g)
1380	xor	$20,$21,$1			# Sigma1(e)
1381
1382	srl	$1,$2,2
1383	addu	$19,$22
1384	lw	$22,92($6)		# K[23]
1385	sll	$21,$2,10
1386	addu	$19,$20
1387	srl	$20,$2,13
1388	xor	$1,$21
1389	sll	$21,$2,19
1390	xor	$1,$20
1391	srl	$20,$2,22
1392	xor	$1,$21
1393	sll	$21,$2,30
1394	xor	$1,$20
1395	sw	$15,28($29)	# offload to ring buffer
1396	xor	$1,$21			# Sigma0(a)
1397
1398	or	$20,$2,$3
1399	and	$21,$2,$3
1400	and	$20,$7
1401	or	$21,$20			# Maj(a,b,c)
1402	addu	$19,$22			# +=K[23]
1403	addu	$1,$21
1404
1405	addu	$24,$19
1406	addu	$1,$19
1407	lw	$18,40($29)	# prefetch from ring buffer
1408	srl	$22,$17,3		# Xupdate(24)
1409	addu	$16,$9			# +=X[i+9]
1410	sll	$21,$17,14
1411	srl	$20,$17,7
1412	xor	$22,$21
1413	sll	$21,11
1414	xor	$22,$20
1415	srl	$20,$17,18
1416	xor	$22,$21
1417
1418	srl	$23,$14,10
1419	xor	$22,$20			# sigma0(X[i+1])
1420	sll	$21,$14,13
1421	addu	$16,$22
1422	srl	$20,$14,17
1423	xor	$23,$21
1424	sll	$21,2
1425	xor	$23,$20
1426	srl	$20,$14,19
1427	xor	$23,$21
1428
1429	xor	$23,$20			# sigma1(X[i+14])
1430	addu	$16,$23
1431	addu	$20,$16,$31			# 24
1432	srl	$31,$24,6
1433	xor	$23,$25,$30
1434	sll	$22,$24,7
1435	and	$23,$24
1436	srl	$21,$24,11
1437	xor	$31,$22
1438	sll	$22,$24,21
1439	xor	$31,$21
1440	srl	$21,$24,25
1441	xor	$31,$22
1442	sll	$22,$24,26
1443	xor	$31,$21
1444	xor	$23,$30			# Ch(e,f,g)
1445	xor	$21,$22,$31			# Sigma1(e)
1446
1447	srl	$31,$1,2
1448	addu	$20,$23
1449	lw	$23,96($6)		# K[24]
1450	sll	$22,$1,10
1451	addu	$20,$21
1452	srl	$21,$1,13
1453	xor	$31,$22
1454	sll	$22,$1,19
1455	xor	$31,$21
1456	srl	$21,$1,22
1457	xor	$31,$22
1458	sll	$22,$1,30
1459	xor	$31,$21
1460	sw	$16,32($29)	# offload to ring buffer
1461	xor	$31,$22			# Sigma0(a)
1462
1463	or	$21,$1,$2
1464	and	$22,$1,$2
1465	and	$21,$3
1466	or	$22,$21			# Maj(a,b,c)
1467	addu	$20,$23			# +=K[24]
1468	addu	$31,$22
1469
1470	addu	$7,$20
1471	addu	$31,$20
1472	lw	$19,44($29)	# prefetch from ring buffer
1473	srl	$23,$18,3		# Xupdate(25)
1474	addu	$17,$10			# +=X[i+9]
1475	sll	$22,$18,14
1476	srl	$21,$18,7
1477	xor	$23,$22
1478	sll	$22,11
1479	xor	$23,$21
1480	srl	$21,$18,18
1481	xor	$23,$22
1482
1483	srl	$8,$15,10
1484	xor	$23,$21			# sigma0(X[i+1])
1485	sll	$22,$15,13
1486	addu	$17,$23
1487	srl	$21,$15,17
1488	xor	$8,$22
1489	sll	$22,2
1490	xor	$8,$21
1491	srl	$21,$15,19
1492	xor	$8,$22
1493
1494	xor	$8,$21			# sigma1(X[i+14])
1495	addu	$17,$8
1496	addu	$21,$17,$30			# 25
1497	srl	$30,$7,6
1498	xor	$8,$24,$25
1499	sll	$23,$7,7
1500	and	$8,$7
1501	srl	$22,$7,11
1502	xor	$30,$23
1503	sll	$23,$7,21
1504	xor	$30,$22
1505	srl	$22,$7,25
1506	xor	$30,$23
1507	sll	$23,$7,26
1508	xor	$30,$22
1509	xor	$8,$25			# Ch(e,f,g)
1510	xor	$22,$23,$30			# Sigma1(e)
1511
1512	srl	$30,$31,2
1513	addu	$21,$8
1514	lw	$8,100($6)		# K[25]
1515	sll	$23,$31,10
1516	addu	$21,$22
1517	srl	$22,$31,13
1518	xor	$30,$23
1519	sll	$23,$31,19
1520	xor	$30,$22
1521	srl	$22,$31,22
1522	xor	$30,$23
1523	sll	$23,$31,30
1524	xor	$30,$22
1525	sw	$17,36($29)	# offload to ring buffer
1526	xor	$30,$23			# Sigma0(a)
1527
1528	or	$22,$31,$1
1529	and	$23,$31,$1
1530	and	$22,$2
1531	or	$23,$22			# Maj(a,b,c)
1532	addu	$21,$8			# +=K[25]
1533	addu	$30,$23
1534
1535	addu	$3,$21
1536	addu	$30,$21
1537	lw	$20,48($29)	# prefetch from ring buffer
1538	srl	$8,$19,3		# Xupdate(26)
1539	addu	$18,$11			# +=X[i+9]
1540	sll	$23,$19,14
1541	srl	$22,$19,7
1542	xor	$8,$23
1543	sll	$23,11
1544	xor	$8,$22
1545	srl	$22,$19,18
1546	xor	$8,$23
1547
1548	srl	$9,$16,10
1549	xor	$8,$22			# sigma0(X[i+1])
1550	sll	$23,$16,13
1551	addu	$18,$8
1552	srl	$22,$16,17
1553	xor	$9,$23
1554	sll	$23,2
1555	xor	$9,$22
1556	srl	$22,$16,19
1557	xor	$9,$23
1558
1559	xor	$9,$22			# sigma1(X[i+14])
1560	addu	$18,$9
1561	addu	$22,$18,$25			# 26
1562	srl	$25,$3,6
1563	xor	$9,$7,$24
1564	sll	$8,$3,7
1565	and	$9,$3
1566	srl	$23,$3,11
1567	xor	$25,$8
1568	sll	$8,$3,21
1569	xor	$25,$23
1570	srl	$23,$3,25
1571	xor	$25,$8
1572	sll	$8,$3,26
1573	xor	$25,$23
1574	xor	$9,$24			# Ch(e,f,g)
1575	xor	$23,$8,$25			# Sigma1(e)
1576
1577	srl	$25,$30,2
1578	addu	$22,$9
1579	lw	$9,104($6)		# K[26]
1580	sll	$8,$30,10
1581	addu	$22,$23
1582	srl	$23,$30,13
1583	xor	$25,$8
1584	sll	$8,$30,19
1585	xor	$25,$23
1586	srl	$23,$30,22
1587	xor	$25,$8
1588	sll	$8,$30,30
1589	xor	$25,$23
1590	sw	$18,40($29)	# offload to ring buffer
1591	xor	$25,$8			# Sigma0(a)
1592
1593	or	$23,$30,$31
1594	and	$8,$30,$31
1595	and	$23,$1
1596	or	$8,$23			# Maj(a,b,c)
1597	addu	$22,$9			# +=K[26]
1598	addu	$25,$8
1599
1600	addu	$2,$22
1601	addu	$25,$22
1602	lw	$21,52($29)	# prefetch from ring buffer
1603	srl	$9,$20,3		# Xupdate(27)
1604	addu	$19,$12			# +=X[i+9]
1605	sll	$8,$20,14
1606	srl	$23,$20,7
1607	xor	$9,$8
1608	sll	$8,11
1609	xor	$9,$23
1610	srl	$23,$20,18
1611	xor	$9,$8
1612
1613	srl	$10,$17,10
1614	xor	$9,$23			# sigma0(X[i+1])
1615	sll	$8,$17,13
1616	addu	$19,$9
1617	srl	$23,$17,17
1618	xor	$10,$8
1619	sll	$8,2
1620	xor	$10,$23
1621	srl	$23,$17,19
1622	xor	$10,$8
1623
1624	xor	$10,$23			# sigma1(X[i+14])
1625	addu	$19,$10
1626	addu	$23,$19,$24			# 27
1627	srl	$24,$2,6
1628	xor	$10,$3,$7
1629	sll	$9,$2,7
1630	and	$10,$2
1631	srl	$8,$2,11
1632	xor	$24,$9
1633	sll	$9,$2,21
1634	xor	$24,$8
1635	srl	$8,$2,25
1636	xor	$24,$9
1637	sll	$9,$2,26
1638	xor	$24,$8
1639	xor	$10,$7			# Ch(e,f,g)
1640	xor	$8,$9,$24			# Sigma1(e)
1641
1642	srl	$24,$25,2
1643	addu	$23,$10
1644	lw	$10,108($6)		# K[27]
1645	sll	$9,$25,10
1646	addu	$23,$8
1647	srl	$8,$25,13
1648	xor	$24,$9
1649	sll	$9,$25,19
1650	xor	$24,$8
1651	srl	$8,$25,22
1652	xor	$24,$9
1653	sll	$9,$25,30
1654	xor	$24,$8
1655	sw	$19,44($29)	# offload to ring buffer
1656	xor	$24,$9			# Sigma0(a)
1657
1658	or	$8,$25,$30
1659	and	$9,$25,$30
1660	and	$8,$31
1661	or	$9,$8			# Maj(a,b,c)
1662	addu	$23,$10			# +=K[27]
1663	addu	$24,$9
1664
1665	addu	$1,$23
1666	addu	$24,$23
1667	lw	$22,56($29)	# prefetch from ring buffer
1668	srl	$10,$21,3		# Xupdate(28)
1669	addu	$20,$13			# +=X[i+9]
1670	sll	$9,$21,14
1671	srl	$8,$21,7
1672	xor	$10,$9
1673	sll	$9,11
1674	xor	$10,$8
1675	srl	$8,$21,18
1676	xor	$10,$9
1677
1678	srl	$11,$18,10
1679	xor	$10,$8			# sigma0(X[i+1])
1680	sll	$9,$18,13
1681	addu	$20,$10
1682	srl	$8,$18,17
1683	xor	$11,$9
1684	sll	$9,2
1685	xor	$11,$8
1686	srl	$8,$18,19
1687	xor	$11,$9
1688
1689	xor	$11,$8			# sigma1(X[i+14])
1690	addu	$20,$11
1691	addu	$8,$20,$7			# 28
1692	srl	$7,$1,6
1693	xor	$11,$2,$3
1694	sll	$10,$1,7
1695	and	$11,$1
1696	srl	$9,$1,11
1697	xor	$7,$10
1698	sll	$10,$1,21
1699	xor	$7,$9
1700	srl	$9,$1,25
1701	xor	$7,$10
1702	sll	$10,$1,26
1703	xor	$7,$9
1704	xor	$11,$3			# Ch(e,f,g)
1705	xor	$9,$10,$7			# Sigma1(e)
1706
1707	srl	$7,$24,2
1708	addu	$8,$11
1709	lw	$11,112($6)		# K[28]
1710	sll	$10,$24,10
1711	addu	$8,$9
1712	srl	$9,$24,13
1713	xor	$7,$10
1714	sll	$10,$24,19
1715	xor	$7,$9
1716	srl	$9,$24,22
1717	xor	$7,$10
1718	sll	$10,$24,30
1719	xor	$7,$9
1720	sw	$20,48($29)	# offload to ring buffer
1721	xor	$7,$10			# Sigma0(a)
1722
1723	or	$9,$24,$25
1724	and	$10,$24,$25
1725	and	$9,$30
1726	or	$10,$9			# Maj(a,b,c)
1727	addu	$8,$11			# +=K[28]
1728	addu	$7,$10
1729
1730	addu	$31,$8
1731	addu	$7,$8
1732	lw	$23,60($29)	# prefetch from ring buffer
1733	srl	$11,$22,3		# Xupdate(29)
1734	addu	$21,$14			# +=X[i+9]
1735	sll	$10,$22,14
1736	srl	$9,$22,7
1737	xor	$11,$10
1738	sll	$10,11
1739	xor	$11,$9
1740	srl	$9,$22,18
1741	xor	$11,$10
1742
1743	srl	$12,$19,10
1744	xor	$11,$9			# sigma0(X[i+1])
1745	sll	$10,$19,13
1746	addu	$21,$11
1747	srl	$9,$19,17
1748	xor	$12,$10
1749	sll	$10,2
1750	xor	$12,$9
1751	srl	$9,$19,19
1752	xor	$12,$10
1753
1754	xor	$12,$9			# sigma1(X[i+14])
1755	addu	$21,$12
1756	addu	$9,$21,$3			# 29
1757	srl	$3,$31,6
1758	xor	$12,$1,$2
1759	sll	$11,$31,7
1760	and	$12,$31
1761	srl	$10,$31,11
1762	xor	$3,$11
1763	sll	$11,$31,21
1764	xor	$3,$10
1765	srl	$10,$31,25
1766	xor	$3,$11
1767	sll	$11,$31,26
1768	xor	$3,$10
1769	xor	$12,$2			# Ch(e,f,g)
1770	xor	$10,$11,$3			# Sigma1(e)
1771
1772	srl	$3,$7,2
1773	addu	$9,$12
1774	lw	$12,116($6)		# K[29]
1775	sll	$11,$7,10
1776	addu	$9,$10
1777	srl	$10,$7,13
1778	xor	$3,$11
1779	sll	$11,$7,19
1780	xor	$3,$10
1781	srl	$10,$7,22
1782	xor	$3,$11
1783	sll	$11,$7,30
1784	xor	$3,$10
1785	sw	$21,52($29)	# offload to ring buffer
1786	xor	$3,$11			# Sigma0(a)
1787
1788	or	$10,$7,$24
1789	and	$11,$7,$24
1790	and	$10,$25
1791	or	$11,$10			# Maj(a,b,c)
1792	addu	$9,$12			# +=K[29]
1793	addu	$3,$11
1794
1795	addu	$30,$9
1796	addu	$3,$9
1797	lw	$8,0($29)	# prefetch from ring buffer
1798	srl	$12,$23,3		# Xupdate(30)
1799	addu	$22,$15			# +=X[i+9]
1800	sll	$11,$23,14
1801	srl	$10,$23,7
1802	xor	$12,$11
1803	sll	$11,11
1804	xor	$12,$10
1805	srl	$10,$23,18
1806	xor	$12,$11
1807
1808	srl	$13,$20,10
1809	xor	$12,$10			# sigma0(X[i+1])
1810	sll	$11,$20,13
1811	addu	$22,$12
1812	srl	$10,$20,17
1813	xor	$13,$11
1814	sll	$11,2
1815	xor	$13,$10
1816	srl	$10,$20,19
1817	xor	$13,$11
1818
1819	xor	$13,$10			# sigma1(X[i+14])
1820	addu	$22,$13
1821	addu	$10,$22,$2			# 30
1822	srl	$2,$30,6
1823	xor	$13,$31,$1
1824	sll	$12,$30,7
1825	and	$13,$30
1826	srl	$11,$30,11
1827	xor	$2,$12
1828	sll	$12,$30,21
1829	xor	$2,$11
1830	srl	$11,$30,25
1831	xor	$2,$12
1832	sll	$12,$30,26
1833	xor	$2,$11
1834	xor	$13,$1			# Ch(e,f,g)
1835	xor	$11,$12,$2			# Sigma1(e)
1836
1837	srl	$2,$3,2
1838	addu	$10,$13
1839	lw	$13,120($6)		# K[30]
1840	sll	$12,$3,10
1841	addu	$10,$11
1842	srl	$11,$3,13
1843	xor	$2,$12
1844	sll	$12,$3,19
1845	xor	$2,$11
1846	srl	$11,$3,22
1847	xor	$2,$12
1848	sll	$12,$3,30
1849	xor	$2,$11
1850	sw	$22,56($29)	# offload to ring buffer
1851	xor	$2,$12			# Sigma0(a)
1852
1853	or	$11,$3,$7
1854	and	$12,$3,$7
1855	and	$11,$24
1856	or	$12,$11			# Maj(a,b,c)
1857	addu	$10,$13			# +=K[30]
1858	addu	$2,$12
1859
1860	addu	$25,$10
1861	addu	$2,$10
1862	lw	$9,4($29)	# prefetch from ring buffer
1863	srl	$13,$8,3		# Xupdate(31)
1864	addu	$23,$16			# +=X[i+9]
1865	sll	$12,$8,14
1866	srl	$11,$8,7
1867	xor	$13,$12
1868	sll	$12,11
1869	xor	$13,$11
1870	srl	$11,$8,18
1871	xor	$13,$12
1872
1873	srl	$14,$21,10
1874	xor	$13,$11			# sigma0(X[i+1])
1875	sll	$12,$21,13
1876	addu	$23,$13
1877	srl	$11,$21,17
1878	xor	$14,$12
1879	sll	$12,2
1880	xor	$14,$11
1881	srl	$11,$21,19
1882	xor	$14,$12
1883
1884	xor	$14,$11			# sigma1(X[i+14])
1885	addu	$23,$14
1886	addu	$11,$23,$1			# 31
1887	srl	$1,$25,6
1888	xor	$14,$30,$31
1889	sll	$13,$25,7
1890	and	$14,$25
1891	srl	$12,$25,11
1892	xor	$1,$13
1893	sll	$13,$25,21
1894	xor	$1,$12
1895	srl	$12,$25,25
1896	xor	$1,$13
1897	sll	$13,$25,26
1898	xor	$1,$12
1899	xor	$14,$31			# Ch(e,f,g)
1900	xor	$12,$13,$1			# Sigma1(e)
1901
1902	srl	$1,$2,2
1903	addu	$11,$14
1904	lw	$14,124($6)		# K[31]
1905	sll	$13,$2,10
1906	addu	$11,$12
1907	srl	$12,$2,13
1908	xor	$1,$13
1909	sll	$13,$2,19
1910	xor	$1,$12
1911	srl	$12,$2,22
1912	xor	$1,$13
1913	sll	$13,$2,30
1914	xor	$1,$12
1915	sw	$23,60($29)	# offload to ring buffer
1916	xor	$1,$13			# Sigma0(a)
1917
1918	or	$12,$2,$3
1919	and	$13,$2,$3
1920	and	$12,$7
1921	or	$13,$12			# Maj(a,b,c)
1922	addu	$11,$14			# +=K[31]
1923	addu	$1,$13
1924
1925	addu	$24,$11
1926	addu	$1,$11
1927	lw	$10,8($29)	# prefetch from ring buffer
1928	and	$14,0xfff
1929	li	$15,2290
1930	.set	noreorder
1931	bne	$14,$15,.L16_xx
1932	add $6,16*4		# Ktbl+=16
1933
1934	lw	$23,16*4($29)	# restore pointer to the end of input
1935	lw	$8,0*4($4)
1936	lw	$9,1*4($4)
1937	lw	$10,2*4($4)
1938	add $5,16*4
1939	lw	$11,3*4($4)
1940	addu	$1,$8
1941	lw	$12,4*4($4)
1942	addu	$2,$9
1943	lw	$13,5*4($4)
1944	addu	$3,$10
1945	lw	$14,6*4($4)
1946	addu	$7,$11
1947	lw	$15,7*4($4)
1948	addu	$24,$12
1949	sw	$1,0*4($4)
1950	addu	$25,$13
1951	sw	$2,1*4($4)
1952	addu	$30,$14
1953	sw	$3,2*4($4)
1954	addu	$31,$15
1955	sw	$7,3*4($4)
1956	sw	$24,4*4($4)
1957	sw	$25,5*4($4)
1958	sw	$30,6*4($4)
1959	sw	$31,7*4($4)
1960
1961	bnel	$5,$23,.Loop
1962	sub $6,192	# rewind $6
1963
1964	lw	$31,128-1*4($29)
1965	lw	$30,128-2*4($29)
1966	lw	$23,128-3*4($29)
1967	lw	$22,128-4*4($29)
1968	lw	$21,128-5*4($29)
1969	lw	$20,128-6*4($29)
1970	lw	$19,128-7*4($29)
1971	lw	$18,128-8*4($29)
1972	lw	$17,128-9*4($29)
1973	lw	$16,128-10*4($29)
1974	jr	$31
1975	add $29,128
1976.end	sha256_block_data_order
1977
1978.rdata
1979.align	5
1980K256:
1981	.word	0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
1982	.word	0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
1983	.word	0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
1984	.word	0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
1985	.word	0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
1986	.word	0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
1987	.word	0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
1988	.word	0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
1989	.word	0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
1990	.word	0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
1991	.word	0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
1992	.word	0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
1993	.word	0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
1994	.word	0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
1995	.word	0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
1996	.word	0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
1997.asciiz	"SHA256 for MIPS, CRYPTOGAMS by <appro@openssl.org>"
1998.align	5
1999
2000