aes-mips.pl revision 2178392bec7b69d0f469bce29f653c9fbd09b614
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# AES for MIPS
11
12# October 2010
13#
14# Code uses 1K[+256B] S-box and on single-issue core [such as R5000]
15# spends ~68 cycles per byte processed with 128-bit key. This is ~16%
16# faster than gcc-generated code, which is not very impressive. But
17# recall that compressed S-box requires extra processing, namely
18# additional rotations. Rotations are implemented with lwl/lwr pairs,
19# which is normally used for loading unaligned data. Another cool
20# thing about this module is its endian neutrality, which means that
21# it processes data without ever changing byte order...
22
23######################################################################
24# There is a number of MIPS ABI in use, O32 and N32/64 are most
25# widely used. Then there is a new contender: NUBI. It appears that if
26# one picks the latter, it's possible to arrange code in ABI neutral
27# manner. Therefore let's stick to NUBI register layout:
28#
29($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
30($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
31($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
32($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
33#
34# The return value is placed in $a0. Following coding rules facilitate
35# interoperability:
36#
37# - never ever touch $tp, "thread pointer", former $gp;
38# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
39#   old code];
40# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
41#
42# For reference here is register layout for N32/64 MIPS ABIs:
43#
44# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
45# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
46# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
47# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
48# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
49#
50$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64
51
52if ($flavour =~ /64|n32/i) {
53	$PTR_ADD="dadd";	# incidentally works even on n32
54	$PTR_SUB="dsub";	# incidentally works even on n32
55	$REG_S="sd";
56	$REG_L="ld";
57	$PTR_SLL="dsll";	# incidentally works even on n32
58	$SZREG=8;
59} else {
60	$PTR_ADD="add";
61	$PTR_SUB="sub";
62	$REG_S="sw";
63	$REG_L="lw";
64	$PTR_SLL="sll";
65	$SZREG=4;
66}
67$pf = ($flavour =~ /nubi/i) ? $t0 : $t2;
68#
69# <appro@openssl.org>
70#
71######################################################################
72
73$big_endian=(`echo MIPSEL | $ENV{CC} -E -P -`=~/MIPSEL/)?1:0;
74
75for (@ARGV) {	$output=$_ if (/^\w[\w\-]*\.\w+$/);	}
76open STDOUT,">$output";
77
78if (!defined($big_endian))
79{    $big_endian=(unpack('L',pack('N',1))==1);   }
80
81while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
82open STDOUT,">$output";
83
84my ($MSB,$LSB)=(0,3);	# automatically converted to little-endian
85
86$code.=<<___;
87.text
88#ifdef OPENSSL_FIPSCANISTER
89# include <openssl/fipssyms.h>
90#endif
91
92#if !defined(__vxworks) || defined(__pic__)
93.option	pic2
94#endif
95.set	noat
96___
97
98{{{
99my $FRAMESIZE=16*$SZREG;
100my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000;
101
102my ($inp,$out,$key,$Tbl,$s0,$s1,$s2,$s3)=($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7);
103my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
104my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$t9,$t10,$t11) = map("\$$_",(12..23));
105my ($key0,$cnt)=($gp,$fp);
106
107# instuction ordering is "stolen" from output from MIPSpro assembler
108# invoked with -mips3 -O3 arguments...
109$code.=<<___;
110.align	5
111.ent	_mips_AES_encrypt
112_mips_AES_encrypt:
113	.frame	$sp,0,$ra
114	.set	reorder
115	lw	$t0,0($key)
116	lw	$t1,4($key)
117	lw	$t2,8($key)
118	lw	$t3,12($key)
119	lw	$cnt,240($key)
120	$PTR_ADD $key0,$key,16
121
122	xor	$s0,$t0
123	xor	$s1,$t1
124	xor	$s2,$t2
125	xor	$s3,$t3
126
127	sub	$cnt,1
128	_xtr	$i0,$s1,16-2
129.Loop_enc:
130	_xtr	$i1,$s2,16-2
131	_xtr	$i2,$s3,16-2
132	_xtr	$i3,$s0,16-2
133	and	$i0,0x3fc
134	and	$i1,0x3fc
135	and	$i2,0x3fc
136	and	$i3,0x3fc
137	$PTR_ADD $i0,$Tbl
138	$PTR_ADD $i1,$Tbl
139	$PTR_ADD $i2,$Tbl
140	$PTR_ADD $i3,$Tbl
141	lwl	$t0,3($i0)		# Te1[s1>>16]
142	lwl	$t1,3($i1)		# Te1[s2>>16]
143	lwl	$t2,3($i2)		# Te1[s3>>16]
144	lwl	$t3,3($i3)		# Te1[s0>>16]
145	lwr	$t0,2($i0)		# Te1[s1>>16]
146	lwr	$t1,2($i1)		# Te1[s2>>16]
147	lwr	$t2,2($i2)		# Te1[s3>>16]
148	lwr	$t3,2($i3)		# Te1[s0>>16]
149
150	_xtr	$i0,$s2,8-2
151	_xtr	$i1,$s3,8-2
152	_xtr	$i2,$s0,8-2
153	_xtr	$i3,$s1,8-2
154	and	$i0,0x3fc
155	and	$i1,0x3fc
156	and	$i2,0x3fc
157	and	$i3,0x3fc
158	$PTR_ADD $i0,$Tbl
159	$PTR_ADD $i1,$Tbl
160	$PTR_ADD $i2,$Tbl
161	$PTR_ADD $i3,$Tbl
162	lwl	$t4,2($i0)		# Te2[s2>>8]
163	lwl	$t5,2($i1)		# Te2[s3>>8]
164	lwl	$t6,2($i2)		# Te2[s0>>8]
165	lwl	$t7,2($i3)		# Te2[s1>>8]
166	lwr	$t4,1($i0)		# Te2[s2>>8]
167	lwr	$t5,1($i1)		# Te2[s3>>8]
168	lwr	$t6,1($i2)		# Te2[s0>>8]
169	lwr	$t7,1($i3)		# Te2[s1>>8]
170
171	_xtr	$i0,$s3,0-2
172	_xtr	$i1,$s0,0-2
173	_xtr	$i2,$s1,0-2
174	_xtr	$i3,$s2,0-2
175	and	$i0,0x3fc
176	and	$i1,0x3fc
177	and	$i2,0x3fc
178	and	$i3,0x3fc
179	$PTR_ADD $i0,$Tbl
180	$PTR_ADD $i1,$Tbl
181	$PTR_ADD $i2,$Tbl
182	$PTR_ADD $i3,$Tbl
183	lwl	$t8,1($i0)		# Te3[s3]
184	lwl	$t9,1($i1)		# Te3[s0]
185	lwl	$t10,1($i2)		# Te3[s1]
186	lwl	$t11,1($i3)		# Te3[s2]
187	lwr	$t8,0($i0)		# Te3[s3]
188	lwr	$t9,0($i1)		# Te3[s0]
189	lwr	$t10,0($i2)		# Te3[s1]
190	lwr	$t11,0($i3)		# Te3[s2]
191
192	_xtr	$i0,$s0,24-2
193	_xtr	$i1,$s1,24-2
194	_xtr	$i2,$s2,24-2
195	_xtr	$i3,$s3,24-2
196	and	$i0,0x3fc
197	and	$i1,0x3fc
198	and	$i2,0x3fc
199	and	$i3,0x3fc
200	$PTR_ADD $i0,$Tbl
201	$PTR_ADD $i1,$Tbl
202	$PTR_ADD $i2,$Tbl
203	$PTR_ADD $i3,$Tbl
204	xor	$t0,$t4
205	xor	$t1,$t5
206	xor	$t2,$t6
207	xor	$t3,$t7
208	lw	$t4,0($i0)		# Te0[s0>>24]
209	lw	$t5,0($i1)		# Te0[s1>>24]
210	lw	$t6,0($i2)		# Te0[s2>>24]
211	lw	$t7,0($i3)		# Te0[s3>>24]
212
213	lw	$s0,0($key0)
214	lw	$s1,4($key0)
215	lw	$s2,8($key0)
216	lw	$s3,12($key0)
217
218	xor	$t0,$t8
219	xor	$t1,$t9
220	xor	$t2,$t10
221	xor	$t3,$t11
222
223	xor	$t0,$t4
224	xor	$t1,$t5
225	xor	$t2,$t6
226	xor	$t3,$t7
227
228	sub	$cnt,1
229	$PTR_ADD $key0,16
230	xor	$s0,$t0
231	xor	$s1,$t1
232	xor	$s2,$t2
233	xor	$s3,$t3
234	.set	noreorder
235	bnez	$cnt,.Loop_enc
236	_xtr	$i0,$s1,16-2
237
238	.set	reorder
239	_xtr	$i1,$s2,16-2
240	_xtr	$i2,$s3,16-2
241	_xtr	$i3,$s0,16-2
242	and	$i0,0x3fc
243	and	$i1,0x3fc
244	and	$i2,0x3fc
245	and	$i3,0x3fc
246	$PTR_ADD $i0,$Tbl
247	$PTR_ADD $i1,$Tbl
248	$PTR_ADD $i2,$Tbl
249	$PTR_ADD $i3,$Tbl
250	lbu	$t0,2($i0)		# Te4[s1>>16]
251	lbu	$t1,2($i1)		# Te4[s2>>16]
252	lbu	$t2,2($i2)		# Te4[s3>>16]
253	lbu	$t3,2($i3)		# Te4[s0>>16]
254
255	_xtr	$i0,$s2,8-2
256	_xtr	$i1,$s3,8-2
257	_xtr	$i2,$s0,8-2
258	_xtr	$i3,$s1,8-2
259	and	$i0,0x3fc
260	and	$i1,0x3fc
261	and	$i2,0x3fc
262	and	$i3,0x3fc
263	$PTR_ADD $i0,$Tbl
264	$PTR_ADD $i1,$Tbl
265	$PTR_ADD $i2,$Tbl
266	$PTR_ADD $i3,$Tbl
267	lbu	$t4,2($i0)		# Te4[s2>>8]
268	lbu	$t5,2($i1)		# Te4[s3>>8]
269	lbu	$t6,2($i2)		# Te4[s0>>8]
270	lbu	$t7,2($i3)		# Te4[s1>>8]
271
272	_xtr	$i0,$s0,24-2
273	_xtr	$i1,$s1,24-2
274	_xtr	$i2,$s2,24-2
275	_xtr	$i3,$s3,24-2
276	and	$i0,0x3fc
277	and	$i1,0x3fc
278	and	$i2,0x3fc
279	and	$i3,0x3fc
280	$PTR_ADD $i0,$Tbl
281	$PTR_ADD $i1,$Tbl
282	$PTR_ADD $i2,$Tbl
283	$PTR_ADD $i3,$Tbl
284	lbu	$t8,2($i0)		# Te4[s0>>24]
285	lbu	$t9,2($i1)		# Te4[s1>>24]
286	lbu	$t10,2($i2)		# Te4[s2>>24]
287	lbu	$t11,2($i3)		# Te4[s3>>24]
288
289	_xtr	$i0,$s3,0-2
290	_xtr	$i1,$s0,0-2
291	_xtr	$i2,$s1,0-2
292	_xtr	$i3,$s2,0-2
293	and	$i0,0x3fc
294	and	$i1,0x3fc
295	and	$i2,0x3fc
296	and	$i3,0x3fc
297
298	_ins	$t0,16
299	_ins	$t1,16
300	_ins	$t2,16
301	_ins	$t3,16
302
303	_ins	$t4,8
304	_ins	$t5,8
305	_ins	$t6,8
306	_ins	$t7,8
307
308	xor	$t0,$t4
309	xor	$t1,$t5
310	xor	$t2,$t6
311	xor	$t3,$t7
312
313	$PTR_ADD $i0,$Tbl
314	$PTR_ADD $i1,$Tbl
315	$PTR_ADD $i2,$Tbl
316	$PTR_ADD $i3,$Tbl
317	lbu	$t4,2($i0)		# Te4[s3]
318	lbu	$t5,2($i1)		# Te4[s0]
319	lbu	$t6,2($i2)		# Te4[s1]
320	lbu	$t7,2($i3)		# Te4[s2]
321
322	_ins	$t8,24
323	_ins	$t9,24
324	_ins	$t10,24
325	_ins	$t11,24
326
327	lw	$s0,0($key0)
328	lw	$s1,4($key0)
329	lw	$s2,8($key0)
330	lw	$s3,12($key0)
331
332	xor	$t0,$t8
333	xor	$t1,$t9
334	xor	$t2,$t10
335	xor	$t3,$t11
336
337	_ins	$t4,0
338	_ins	$t5,0
339	_ins	$t6,0
340	_ins	$t7,0
341
342	xor	$t0,$t4
343	xor	$t1,$t5
344	xor	$t2,$t6
345	xor	$t3,$t7
346
347	xor	$s0,$t0
348	xor	$s1,$t1
349	xor	$s2,$t2
350	xor	$s3,$t3
351
352	jr	$ra
353.end	_mips_AES_encrypt
354
355.align	5
356.globl	AES_encrypt
357.ent	AES_encrypt
358AES_encrypt:
359	.frame	$sp,$FRAMESIZE,$ra
360	.mask	$SAVED_REGS_MASK,-$SZREG
361	.set	noreorder
362___
363$code.=<<___ if ($flavour =~ /o32/i);	# o32 PIC-ification
364	.cpload	$pf
365___
366$code.=<<___;
367	$PTR_SUB $sp,$FRAMESIZE
368	$REG_S	$ra,$FRAMESIZE-1*$SZREG($sp)
369	$REG_S	$fp,$FRAMESIZE-2*$SZREG($sp)
370	$REG_S	$s11,$FRAMESIZE-3*$SZREG($sp)
371	$REG_S	$s10,$FRAMESIZE-4*$SZREG($sp)
372	$REG_S	$s9,$FRAMESIZE-5*$SZREG($sp)
373	$REG_S	$s8,$FRAMESIZE-6*$SZREG($sp)
374	$REG_S	$s7,$FRAMESIZE-7*$SZREG($sp)
375	$REG_S	$s6,$FRAMESIZE-8*$SZREG($sp)
376	$REG_S	$s5,$FRAMESIZE-9*$SZREG($sp)
377	$REG_S	$s4,$FRAMESIZE-10*$SZREG($sp)
378___
379$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
380	$REG_S	\$15,$FRAMESIZE-11*$SZREG($sp)
381	$REG_S	\$14,$FRAMESIZE-12*$SZREG($sp)
382	$REG_S	\$13,$FRAMESIZE-13*$SZREG($sp)
383	$REG_S	\$12,$FRAMESIZE-14*$SZREG($sp)
384	$REG_S	$gp,$FRAMESIZE-15*$SZREG($sp)
385___
386$code.=<<___ if ($flavour !~ /o32/i);	# non-o32 PIC-ification
387	.cplocal	$Tbl
388	.cpsetup	$pf,$zero,AES_encrypt
389___
390$code.=<<___;
391	.set	reorder
392	la	$Tbl,AES_Te		# PIC-ified 'load address'
393
394	lwl	$s0,0+$MSB($inp)
395	lwl	$s1,4+$MSB($inp)
396	lwl	$s2,8+$MSB($inp)
397	lwl	$s3,12+$MSB($inp)
398	lwr	$s0,0+$LSB($inp)
399	lwr	$s1,4+$LSB($inp)
400	lwr	$s2,8+$LSB($inp)
401	lwr	$s3,12+$LSB($inp)
402
403	bal	_mips_AES_encrypt
404
405	swr	$s0,0+$LSB($out)
406	swr	$s1,4+$LSB($out)
407	swr	$s2,8+$LSB($out)
408	swr	$s3,12+$LSB($out)
409	swl	$s0,0+$MSB($out)
410	swl	$s1,4+$MSB($out)
411	swl	$s2,8+$MSB($out)
412	swl	$s3,12+$MSB($out)
413
414	.set	noreorder
415	$REG_L	$ra,$FRAMESIZE-1*$SZREG($sp)
416	$REG_L	$fp,$FRAMESIZE-2*$SZREG($sp)
417	$REG_L	$s11,$FRAMESIZE-3*$SZREG($sp)
418	$REG_L	$s10,$FRAMESIZE-4*$SZREG($sp)
419	$REG_L	$s9,$FRAMESIZE-5*$SZREG($sp)
420	$REG_L	$s8,$FRAMESIZE-6*$SZREG($sp)
421	$REG_L	$s7,$FRAMESIZE-7*$SZREG($sp)
422	$REG_L	$s6,$FRAMESIZE-8*$SZREG($sp)
423	$REG_L	$s5,$FRAMESIZE-9*$SZREG($sp)
424	$REG_L	$s4,$FRAMESIZE-10*$SZREG($sp)
425___
426$code.=<<___ if ($flavour =~ /nubi/i);
427	$REG_L	\$15,$FRAMESIZE-11*$SZREG($sp)
428	$REG_L	\$14,$FRAMESIZE-12*$SZREG($sp)
429	$REG_L	\$13,$FRAMESIZE-13*$SZREG($sp)
430	$REG_L	\$12,$FRAMESIZE-14*$SZREG($sp)
431	$REG_L	$gp,$FRAMESIZE-15*$SZREG($sp)
432___
433$code.=<<___;
434	jr	$ra
435	$PTR_ADD $sp,$FRAMESIZE
436.end	AES_encrypt
437___
438
439$code.=<<___;
440.align	5
441.ent	_mips_AES_decrypt
442_mips_AES_decrypt:
443	.frame	$sp,0,$ra
444	.set	reorder
445	lw	$t0,0($key)
446	lw	$t1,4($key)
447	lw	$t2,8($key)
448	lw	$t3,12($key)
449	lw	$cnt,240($key)
450	$PTR_ADD $key0,$key,16
451
452	xor	$s0,$t0
453	xor	$s1,$t1
454	xor	$s2,$t2
455	xor	$s3,$t3
456
457	sub	$cnt,1
458	_xtr	$i0,$s3,16-2
459.Loop_dec:
460	_xtr	$i1,$s0,16-2
461	_xtr	$i2,$s1,16-2
462	_xtr	$i3,$s2,16-2
463	and	$i0,0x3fc
464	and	$i1,0x3fc
465	and	$i2,0x3fc
466	and	$i3,0x3fc
467	$PTR_ADD $i0,$Tbl
468	$PTR_ADD $i1,$Tbl
469	$PTR_ADD $i2,$Tbl
470	$PTR_ADD $i3,$Tbl
471	lwl	$t0,3($i0)		# Td1[s3>>16]
472	lwl	$t1,3($i1)		# Td1[s0>>16]
473	lwl	$t2,3($i2)		# Td1[s1>>16]
474	lwl	$t3,3($i3)		# Td1[s2>>16]
475	lwr	$t0,2($i0)		# Td1[s3>>16]
476	lwr	$t1,2($i1)		# Td1[s0>>16]
477	lwr	$t2,2($i2)		# Td1[s1>>16]
478	lwr	$t3,2($i3)		# Td1[s2>>16]
479
480	_xtr	$i0,$s2,8-2
481	_xtr	$i1,$s3,8-2
482	_xtr	$i2,$s0,8-2
483	_xtr	$i3,$s1,8-2
484	and	$i0,0x3fc
485	and	$i1,0x3fc
486	and	$i2,0x3fc
487	and	$i3,0x3fc
488	$PTR_ADD $i0,$Tbl
489	$PTR_ADD $i1,$Tbl
490	$PTR_ADD $i2,$Tbl
491	$PTR_ADD $i3,$Tbl
492	lwl	$t4,2($i0)		# Td2[s2>>8]
493	lwl	$t5,2($i1)		# Td2[s3>>8]
494	lwl	$t6,2($i2)		# Td2[s0>>8]
495	lwl	$t7,2($i3)		# Td2[s1>>8]
496	lwr	$t4,1($i0)		# Td2[s2>>8]
497	lwr	$t5,1($i1)		# Td2[s3>>8]
498	lwr	$t6,1($i2)		# Td2[s0>>8]
499	lwr	$t7,1($i3)		# Td2[s1>>8]
500
501	_xtr	$i0,$s1,0-2
502	_xtr	$i1,$s2,0-2
503	_xtr	$i2,$s3,0-2
504	_xtr	$i3,$s0,0-2
505	and	$i0,0x3fc
506	and	$i1,0x3fc
507	and	$i2,0x3fc
508	and	$i3,0x3fc
509	$PTR_ADD $i0,$Tbl
510	$PTR_ADD $i1,$Tbl
511	$PTR_ADD $i2,$Tbl
512	$PTR_ADD $i3,$Tbl
513	lwl	$t8,1($i0)		# Td3[s1]
514	lwl	$t9,1($i1)		# Td3[s2]
515	lwl	$t10,1($i2)		# Td3[s3]
516	lwl	$t11,1($i3)		# Td3[s0]
517	lwr	$t8,0($i0)		# Td3[s1]
518	lwr	$t9,0($i1)		# Td3[s2]
519	lwr	$t10,0($i2)		# Td3[s3]
520	lwr	$t11,0($i3)		# Td3[s0]
521
522	_xtr	$i0,$s0,24-2
523	_xtr	$i1,$s1,24-2
524	_xtr	$i2,$s2,24-2
525	_xtr	$i3,$s3,24-2
526	and	$i0,0x3fc
527	and	$i1,0x3fc
528	and	$i2,0x3fc
529	and	$i3,0x3fc
530	$PTR_ADD $i0,$Tbl
531	$PTR_ADD $i1,$Tbl
532	$PTR_ADD $i2,$Tbl
533	$PTR_ADD $i3,$Tbl
534
535	xor	$t0,$t4
536	xor	$t1,$t5
537	xor	$t2,$t6
538	xor	$t3,$t7
539
540
541	lw	$t4,0($i0)		# Td0[s0>>24]
542	lw	$t5,0($i1)		# Td0[s1>>24]
543	lw	$t6,0($i2)		# Td0[s2>>24]
544	lw	$t7,0($i3)		# Td0[s3>>24]
545
546	lw	$s0,0($key0)
547	lw	$s1,4($key0)
548	lw	$s2,8($key0)
549	lw	$s3,12($key0)
550
551	xor	$t0,$t8
552	xor	$t1,$t9
553	xor	$t2,$t10
554	xor	$t3,$t11
555
556	xor	$t0,$t4
557	xor	$t1,$t5
558	xor	$t2,$t6
559	xor	$t3,$t7
560
561	sub	$cnt,1
562	$PTR_ADD $key0,16
563	xor	$s0,$t0
564	xor	$s1,$t1
565	xor	$s2,$t2
566	xor	$s3,$t3
567	.set	noreorder
568	bnez	$cnt,.Loop_dec
569	_xtr	$i0,$s3,16-2
570
571	.set	reorder
572	lw	$t4,1024($Tbl)		# prefetch Td4
573	lw	$t5,1024+32($Tbl)
574	lw	$t6,1024+64($Tbl)
575	lw	$t7,1024+96($Tbl)
576	lw	$t8,1024+128($Tbl)
577	lw	$t9,1024+160($Tbl)
578	lw	$t10,1024+192($Tbl)
579	lw	$t11,1024+224($Tbl)
580
581	_xtr	$i0,$s3,16
582	_xtr	$i1,$s0,16
583	_xtr	$i2,$s1,16
584	_xtr	$i3,$s2,16
585	and	$i0,0xff
586	and	$i1,0xff
587	and	$i2,0xff
588	and	$i3,0xff
589	$PTR_ADD $i0,$Tbl
590	$PTR_ADD $i1,$Tbl
591	$PTR_ADD $i2,$Tbl
592	$PTR_ADD $i3,$Tbl
593	lbu	$t0,1024($i0)		# Td4[s3>>16]
594	lbu	$t1,1024($i1)		# Td4[s0>>16]
595	lbu	$t2,1024($i2)		# Td4[s1>>16]
596	lbu	$t3,1024($i3)		# Td4[s2>>16]
597
598	_xtr	$i0,$s2,8
599	_xtr	$i1,$s3,8
600	_xtr	$i2,$s0,8
601	_xtr	$i3,$s1,8
602	and	$i0,0xff
603	and	$i1,0xff
604	and	$i2,0xff
605	and	$i3,0xff
606	$PTR_ADD $i0,$Tbl
607	$PTR_ADD $i1,$Tbl
608	$PTR_ADD $i2,$Tbl
609	$PTR_ADD $i3,$Tbl
610	lbu	$t4,1024($i0)		# Td4[s2>>8]
611	lbu	$t5,1024($i1)		# Td4[s3>>8]
612	lbu	$t6,1024($i2)		# Td4[s0>>8]
613	lbu	$t7,1024($i3)		# Td4[s1>>8]
614
615	_xtr	$i0,$s0,24
616	_xtr	$i1,$s1,24
617	_xtr	$i2,$s2,24
618	_xtr	$i3,$s3,24
619	$PTR_ADD $i0,$Tbl
620	$PTR_ADD $i1,$Tbl
621	$PTR_ADD $i2,$Tbl
622	$PTR_ADD $i3,$Tbl
623	lbu	$t8,1024($i0)		# Td4[s0>>24]
624	lbu	$t9,1024($i1)		# Td4[s1>>24]
625	lbu	$t10,1024($i2)		# Td4[s2>>24]
626	lbu	$t11,1024($i3)		# Td4[s3>>24]
627
628	_xtr	$i0,$s1,0
629	_xtr	$i1,$s2,0
630	_xtr	$i2,$s3,0
631	_xtr	$i3,$s0,0
632
633	_ins	$t0,16
634	_ins	$t1,16
635	_ins	$t2,16
636	_ins	$t3,16
637
638	_ins	$t4,8
639	_ins	$t5,8
640	_ins	$t6,8
641	_ins	$t7,8
642
643	xor	$t0,$t4
644	xor	$t1,$t5
645	xor	$t2,$t6
646	xor	$t3,$t7
647
648	$PTR_ADD $i0,$Tbl
649	$PTR_ADD $i1,$Tbl
650	$PTR_ADD $i2,$Tbl
651	$PTR_ADD $i3,$Tbl
652	lbu	$t4,1024($i0)		# Td4[s1]
653	lbu	$t5,1024($i1)		# Td4[s2]
654	lbu	$t6,1024($i2)		# Td4[s3]
655	lbu	$t7,1024($i3)		# Td4[s0]
656
657	_ins	$t8,24
658	_ins	$t9,24
659	_ins	$t10,24
660	_ins	$t11,24
661
662	lw	$s0,0($key0)
663	lw	$s1,4($key0)
664	lw	$s2,8($key0)
665	lw	$s3,12($key0)
666
667	_ins	$t4,0
668	_ins	$t5,0
669	_ins	$t6,0
670	_ins	$t7,0
671
672
673	xor	$t0,$t8
674	xor	$t1,$t9
675	xor	$t2,$t10
676	xor	$t3,$t11
677
678	xor	$t0,$t4
679	xor	$t1,$t5
680	xor	$t2,$t6
681	xor	$t3,$t7
682
683	xor	$s0,$t0
684	xor	$s1,$t1
685	xor	$s2,$t2
686	xor	$s3,$t3
687
688	jr	$ra
689.end	_mips_AES_decrypt
690
691.align	5
692.globl	AES_decrypt
693.ent	AES_decrypt
694AES_decrypt:
695	.frame	$sp,$FRAMESIZE,$ra
696	.mask	$SAVED_REGS_MASK,-$SZREG
697	.set	noreorder
698___
699$code.=<<___ if ($flavour =~ /o32/i);	# o32 PIC-ification
700	.cpload	$pf
701___
702$code.=<<___;
703	$PTR_SUB $sp,$FRAMESIZE
704	$REG_S	$ra,$FRAMESIZE-1*$SZREG($sp)
705	$REG_S	$fp,$FRAMESIZE-2*$SZREG($sp)
706	$REG_S	$s11,$FRAMESIZE-3*$SZREG($sp)
707	$REG_S	$s10,$FRAMESIZE-4*$SZREG($sp)
708	$REG_S	$s9,$FRAMESIZE-5*$SZREG($sp)
709	$REG_S	$s8,$FRAMESIZE-6*$SZREG($sp)
710	$REG_S	$s7,$FRAMESIZE-7*$SZREG($sp)
711	$REG_S	$s6,$FRAMESIZE-8*$SZREG($sp)
712	$REG_S	$s5,$FRAMESIZE-9*$SZREG($sp)
713	$REG_S	$s4,$FRAMESIZE-10*$SZREG($sp)
714___
715$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
716	$REG_S	\$15,$FRAMESIZE-11*$SZREG($sp)
717	$REG_S	\$14,$FRAMESIZE-12*$SZREG($sp)
718	$REG_S	\$13,$FRAMESIZE-13*$SZREG($sp)
719	$REG_S	\$12,$FRAMESIZE-14*$SZREG($sp)
720	$REG_S	$gp,$FRAMESIZE-15*$SZREG($sp)
721___
722$code.=<<___ if ($flavour !~ /o32/i);	# non-o32 PIC-ification
723	.cplocal	$Tbl
724	.cpsetup	$pf,$zero,AES_decrypt
725___
726$code.=<<___;
727	.set	reorder
728	la	$Tbl,AES_Td		# PIC-ified 'load address'
729
730	lwl	$s0,0+$MSB($inp)
731	lwl	$s1,4+$MSB($inp)
732	lwl	$s2,8+$MSB($inp)
733	lwl	$s3,12+$MSB($inp)
734	lwr	$s0,0+$LSB($inp)
735	lwr	$s1,4+$LSB($inp)
736	lwr	$s2,8+$LSB($inp)
737	lwr	$s3,12+$LSB($inp)
738
739	bal	_mips_AES_decrypt
740
741	swr	$s0,0+$LSB($out)
742	swr	$s1,4+$LSB($out)
743	swr	$s2,8+$LSB($out)
744	swr	$s3,12+$LSB($out)
745	swl	$s0,0+$MSB($out)
746	swl	$s1,4+$MSB($out)
747	swl	$s2,8+$MSB($out)
748	swl	$s3,12+$MSB($out)
749
750	.set	noreorder
751	$REG_L	$ra,$FRAMESIZE-1*$SZREG($sp)
752	$REG_L	$fp,$FRAMESIZE-2*$SZREG($sp)
753	$REG_L	$s11,$FRAMESIZE-3*$SZREG($sp)
754	$REG_L	$s10,$FRAMESIZE-4*$SZREG($sp)
755	$REG_L	$s9,$FRAMESIZE-5*$SZREG($sp)
756	$REG_L	$s8,$FRAMESIZE-6*$SZREG($sp)
757	$REG_L	$s7,$FRAMESIZE-7*$SZREG($sp)
758	$REG_L	$s6,$FRAMESIZE-8*$SZREG($sp)
759	$REG_L	$s5,$FRAMESIZE-9*$SZREG($sp)
760	$REG_L	$s4,$FRAMESIZE-10*$SZREG($sp)
761___
762$code.=<<___ if ($flavour =~ /nubi/i);
763	$REG_L	\$15,$FRAMESIZE-11*$SZREG($sp)
764	$REG_L	\$14,$FRAMESIZE-12*$SZREG($sp)
765	$REG_L	\$13,$FRAMESIZE-13*$SZREG($sp)
766	$REG_L	\$12,$FRAMESIZE-14*$SZREG($sp)
767	$REG_L	$gp,$FRAMESIZE-15*$SZREG($sp)
768___
769$code.=<<___;
770	jr	$ra
771	$PTR_ADD $sp,$FRAMESIZE
772.end	AES_decrypt
773___
774}}}
775
776{{{
777my $FRAMESIZE=8*$SZREG;
778my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc000f008 : 0xc0000000;
779
780my ($inp,$bits,$key,$Tbl)=($a0,$a1,$a2,$a3);
781my ($rk0,$rk1,$rk2,$rk3,$rk4,$rk5,$rk6,$rk7)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
782my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
783my ($rcon,$cnt)=($gp,$fp);
784
785$code.=<<___;
786.align	5
787.ent	_mips_AES_set_encrypt_key
788_mips_AES_set_encrypt_key:
789	.frame	$sp,0,$ra
790	.set	noreorder
791	beqz	$inp,.Lekey_done
792	li	$t0,-1
793	beqz	$key,.Lekey_done
794	$PTR_ADD $rcon,$Tbl,1024+256
795
796	.set	reorder
797	lwl	$rk0,0+$MSB($inp)	# load 128 bits
798	lwl	$rk1,4+$MSB($inp)
799	lwl	$rk2,8+$MSB($inp)
800	lwl	$rk3,12+$MSB($inp)
801	li	$at,128
802	lwr	$rk0,0+$LSB($inp)
803	lwr	$rk1,4+$LSB($inp)
804	lwr	$rk2,8+$LSB($inp)
805	lwr	$rk3,12+$LSB($inp)
806	.set	noreorder
807	beq	$bits,$at,.L128bits
808	li	$cnt,10
809
810	.set	reorder
811	lwl	$rk4,16+$MSB($inp)	# load 192 bits
812	lwl	$rk5,20+$MSB($inp)
813	li	$at,192
814	lwr	$rk4,16+$LSB($inp)
815	lwr	$rk5,20+$LSB($inp)
816	.set	noreorder
817	beq	$bits,$at,.L192bits
818	li	$cnt,8
819
820	.set	reorder
821	lwl	$rk6,24+$MSB($inp)	# load 256 bits
822	lwl	$rk7,28+$MSB($inp)
823	li	$at,256
824	lwr	$rk6,24+$LSB($inp)
825	lwr	$rk7,28+$LSB($inp)
826	.set	noreorder
827	beq	$bits,$at,.L256bits
828	li	$cnt,7
829
830	b	.Lekey_done
831	li	$t0,-2
832
833.align	4
834.L128bits:
835	.set	reorder
836	srl	$i0,$rk3,16
837	srl	$i1,$rk3,8
838	and	$i0,0xff
839	and	$i1,0xff
840	and	$i2,$rk3,0xff
841	srl	$i3,$rk3,24
842	$PTR_ADD $i0,$Tbl
843	$PTR_ADD $i1,$Tbl
844	$PTR_ADD $i2,$Tbl
845	$PTR_ADD $i3,$Tbl
846	lbu	$i0,1024($i0)
847	lbu	$i1,1024($i1)
848	lbu	$i2,1024($i2)
849	lbu	$i3,1024($i3)
850
851	sw	$rk0,0($key)
852	sw	$rk1,4($key)
853	sw	$rk2,8($key)
854	sw	$rk3,12($key)
855	sub	$cnt,1
856	$PTR_ADD $key,16
857
858	_bias	$i0,24
859	_bias	$i1,16
860	_bias	$i2,8
861	_bias	$i3,0
862
863	xor	$rk0,$i0
864	lw	$i0,0($rcon)
865	xor	$rk0,$i1
866	xor	$rk0,$i2
867	xor	$rk0,$i3
868	xor	$rk0,$i0
869
870	xor	$rk1,$rk0
871	xor	$rk2,$rk1
872	xor	$rk3,$rk2
873
874	.set	noreorder
875	bnez	$cnt,.L128bits
876	$PTR_ADD $rcon,4
877
878	sw	$rk0,0($key)
879	sw	$rk1,4($key)
880	sw	$rk2,8($key)
881	li	$cnt,10
882	sw	$rk3,12($key)
883	li	$t0,0
884	sw	$cnt,80($key)
885	b	.Lekey_done
886	$PTR_SUB $key,10*16
887
888.align	4
889.L192bits:
890	.set	reorder
891	srl	$i0,$rk5,16
892	srl	$i1,$rk5,8
893	and	$i0,0xff
894	and	$i1,0xff
895	and	$i2,$rk5,0xff
896	srl	$i3,$rk5,24
897	$PTR_ADD $i0,$Tbl
898	$PTR_ADD $i1,$Tbl
899	$PTR_ADD $i2,$Tbl
900	$PTR_ADD $i3,$Tbl
901	lbu	$i0,1024($i0)
902	lbu	$i1,1024($i1)
903	lbu	$i2,1024($i2)
904	lbu	$i3,1024($i3)
905
906	sw	$rk0,0($key)
907	sw	$rk1,4($key)
908	sw	$rk2,8($key)
909	sw	$rk3,12($key)
910	sw	$rk4,16($key)
911	sw	$rk5,20($key)
912	sub	$cnt,1
913	$PTR_ADD $key,24
914
915	_bias	$i0,24
916	_bias	$i1,16
917	_bias	$i2,8
918	_bias	$i3,0
919
920	xor	$rk0,$i0
921	lw	$i0,0($rcon)
922	xor	$rk0,$i1
923	xor	$rk0,$i2
924	xor	$rk0,$i3
925	xor	$rk0,$i0
926
927	xor	$rk1,$rk0
928	xor	$rk2,$rk1
929	xor	$rk3,$rk2
930	xor	$rk4,$rk3
931	xor	$rk5,$rk4
932
933	.set	noreorder
934	bnez	$cnt,.L192bits
935	$PTR_ADD $rcon,4
936
937	sw	$rk0,0($key)
938	sw	$rk1,4($key)
939	sw	$rk2,8($key)
940	li	$cnt,12
941	sw	$rk3,12($key)
942	li	$t0,0
943	sw	$cnt,48($key)
944	b	.Lekey_done
945	$PTR_SUB $key,12*16
946
947.align	4
948.L256bits:
949	.set	reorder
950	srl	$i0,$rk7,16
951	srl	$i1,$rk7,8
952	and	$i0,0xff
953	and	$i1,0xff
954	and	$i2,$rk7,0xff
955	srl	$i3,$rk7,24
956	$PTR_ADD $i0,$Tbl
957	$PTR_ADD $i1,$Tbl
958	$PTR_ADD $i2,$Tbl
959	$PTR_ADD $i3,$Tbl
960	lbu	$i0,1024($i0)
961	lbu	$i1,1024($i1)
962	lbu	$i2,1024($i2)
963	lbu	$i3,1024($i3)
964
965	sw	$rk0,0($key)
966	sw	$rk1,4($key)
967	sw	$rk2,8($key)
968	sw	$rk3,12($key)
969	sw	$rk4,16($key)
970	sw	$rk5,20($key)
971	sw	$rk6,24($key)
972	sw	$rk7,28($key)
973	sub	$cnt,1
974
975	_bias	$i0,24
976	_bias	$i1,16
977	_bias	$i2,8
978	_bias	$i3,0
979
980	xor	$rk0,$i0
981	lw	$i0,0($rcon)
982	xor	$rk0,$i1
983	xor	$rk0,$i2
984	xor	$rk0,$i3
985	xor	$rk0,$i0
986
987	xor	$rk1,$rk0
988	xor	$rk2,$rk1
989	xor	$rk3,$rk2
990	beqz	$cnt,.L256bits_done
991
992	srl	$i0,$rk3,24
993	srl	$i1,$rk3,16
994	srl	$i2,$rk3,8
995	and	$i3,$rk3,0xff
996	and	$i1,0xff
997	and	$i2,0xff
998	$PTR_ADD $i0,$Tbl
999	$PTR_ADD $i1,$Tbl
1000	$PTR_ADD $i2,$Tbl
1001	$PTR_ADD $i3,$Tbl
1002	lbu	$i0,1024($i0)
1003	lbu	$i1,1024($i1)
1004	lbu	$i2,1024($i2)
1005	lbu	$i3,1024($i3)
1006	sll	$i0,24
1007	sll	$i1,16
1008	sll	$i2,8
1009
1010	xor	$rk4,$i0
1011	xor	$rk4,$i1
1012	xor	$rk4,$i2
1013	xor	$rk4,$i3
1014
1015	xor	$rk5,$rk4
1016	xor	$rk6,$rk5
1017	xor	$rk7,$rk6
1018
1019	$PTR_ADD $key,32
1020	.set	noreorder
1021	b	.L256bits
1022	$PTR_ADD $rcon,4
1023
1024.L256bits_done:
1025	sw	$rk0,32($key)
1026	sw	$rk1,36($key)
1027	sw	$rk2,40($key)
1028	li	$cnt,14
1029	sw	$rk3,44($key)
1030	li	$t0,0
1031	sw	$cnt,48($key)
1032	$PTR_SUB $key,12*16
1033
1034.Lekey_done:
1035	jr	$ra
1036	nop
1037.end	_mips_AES_set_encrypt_key
1038
1039.globl	private_AES_set_encrypt_key
1040.ent	private_AES_set_encrypt_key
1041private_AES_set_encrypt_key:
1042	.frame	$sp,$FRAMESIZE,$ra
1043	.mask	$SAVED_REGS_MASK,-$SZREG
1044	.set	noreorder
1045___
1046$code.=<<___ if ($flavour =~ /o32/i);	# o32 PIC-ification
1047	.cpload	$pf
1048___
1049$code.=<<___;
1050	$PTR_SUB $sp,$FRAMESIZE
1051	$REG_S	$ra,$FRAMESIZE-1*$SZREG($sp)
1052	$REG_S	$fp,$FRAMESIZE-2*$SZREG($sp)
1053___
1054$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
1055	$REG_S	$s3,$FRAMESIZE-3*$SZREG($sp)
1056	$REG_S	$s2,$FRAMESIZE-4*$SZREG($sp)
1057	$REG_S	$s1,$FRAMESIZE-5*$SZREG($sp)
1058	$REG_S	$s0,$FRAMESIZE-6*$SZREG($sp)
1059	$REG_S	$gp,$FRAMESIZE-7*$SZREG($sp)
1060___
1061$code.=<<___ if ($flavour !~ /o32/i);	# non-o32 PIC-ification
1062	.cplocal	$Tbl
1063	.cpsetup	$pf,$zero,private_AES_set_encrypt_key
1064___
1065$code.=<<___;
1066	.set	reorder
1067	la	$Tbl,AES_Te		# PIC-ified 'load address'
1068
1069	bal	_mips_AES_set_encrypt_key
1070
1071	.set	noreorder
1072	move	$a0,$t0
1073	$REG_L	$ra,$FRAMESIZE-1*$SZREG($sp)
1074	$REG_L	$fp,$FRAMESIZE-2*$SZREG($sp)
1075___
1076$code.=<<___ if ($flavour =~ /nubi/i);
1077	$REG_L	$s3,$FRAMESIZE-11*$SZREG($sp)
1078	$REG_L	$s2,$FRAMESIZE-12*$SZREG($sp)
1079	$REG_L	$s1,$FRAMESIZE-13*$SZREG($sp)
1080	$REG_L	$s0,$FRAMESIZE-14*$SZREG($sp)
1081	$REG_L	$gp,$FRAMESIZE-15*$SZREG($sp)
1082___
1083$code.=<<___;
1084	jr	$ra
1085	$PTR_ADD $sp,$FRAMESIZE
1086.end	private_AES_set_encrypt_key
1087___
1088
1089my ($head,$tail)=($inp,$bits);
1090my ($tp1,$tp2,$tp4,$tp8,$tp9,$tpb,$tpd,$tpe)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
1091my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2);
1092$code.=<<___;
1093.align	5
1094.globl	private_AES_set_decrypt_key
1095.ent	private_AES_set_decrypt_key
1096private_AES_set_decrypt_key:
1097	.frame	$sp,$FRAMESIZE,$ra
1098	.mask	$SAVED_REGS_MASK,-$SZREG
1099	.set	noreorder
1100___
1101$code.=<<___ if ($flavour =~ /o32/i);	# o32 PIC-ification
1102	.cpload	$pf
1103___
1104$code.=<<___;
1105	$PTR_SUB $sp,$FRAMESIZE
1106	$REG_S	$ra,$FRAMESIZE-1*$SZREG($sp)
1107	$REG_S	$fp,$FRAMESIZE-2*$SZREG($sp)
1108___
1109$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
1110	$REG_S	$s3,$FRAMESIZE-3*$SZREG($sp)
1111	$REG_S	$s2,$FRAMESIZE-4*$SZREG($sp)
1112	$REG_S	$s1,$FRAMESIZE-5*$SZREG($sp)
1113	$REG_S	$s0,$FRAMESIZE-6*$SZREG($sp)
1114	$REG_S	$gp,$FRAMESIZE-7*$SZREG($sp)
1115___
1116$code.=<<___ if ($flavour !~ /o32/i);	# non-o32 PIC-ification
1117	.cplocal	$Tbl
1118	.cpsetup	$pf,$zero,private_AES_set_decrypt_key
1119___
1120$code.=<<___;
1121	.set	reorder
1122	la	$Tbl,AES_Te		# PIC-ified 'load address'
1123
1124	bal	_mips_AES_set_encrypt_key
1125
1126	bltz	$t0,.Ldkey_done
1127
1128	sll	$at,$cnt,4
1129	$PTR_ADD $head,$key,0
1130	$PTR_ADD $tail,$key,$at
1131.align	4
1132.Lswap:
1133	lw	$rk0,0($head)
1134	lw	$rk1,4($head)
1135	lw	$rk2,8($head)
1136	lw	$rk3,12($head)
1137	lw	$rk4,0($tail)
1138	lw	$rk5,4($tail)
1139	lw	$rk6,8($tail)
1140	lw	$rk7,12($tail)
1141	sw	$rk0,0($tail)
1142	sw	$rk1,4($tail)
1143	sw	$rk2,8($tail)
1144	sw	$rk3,12($tail)
1145	$PTR_ADD $head,16
1146	$PTR_SUB $tail,16
1147	sw	$rk4,-16($head)
1148	sw	$rk5,-12($head)
1149	sw	$rk6,-8($head)
1150	sw	$rk7,-4($head)
1151	bne	$head,$tail,.Lswap
1152
1153	lw	$tp1,16($key)		# modulo-scheduled
1154	lui	$x80808080,0x8080
1155	sub	$cnt,1
1156	or	$x80808080,0x8080
1157	sll	$cnt,2
1158	$PTR_ADD $key,16
1159	lui	$x1b1b1b1b,0x1b1b
1160	nor	$x7f7f7f7f,$zero,$x80808080
1161	or	$x1b1b1b1b,0x1b1b
1162.align	4
1163.Lmix:
1164	and	$m,$tp1,$x80808080
1165	and	$tp2,$tp1,$x7f7f7f7f
1166	srl	$tp4,$m,7
1167	addu	$tp2,$tp2		# tp2<<1
1168	subu	$m,$tp4
1169	and	$m,$x1b1b1b1b
1170	xor	$tp2,$m
1171
1172	and	$m,$tp2,$x80808080
1173	and	$tp4,$tp2,$x7f7f7f7f
1174	srl	$tp8,$m,7
1175	addu	$tp4,$tp4		# tp4<<1
1176	subu	$m,$tp8
1177	and	$m,$x1b1b1b1b
1178	xor	$tp4,$m
1179
1180	and	$m,$tp4,$x80808080
1181	and	$tp8,$tp4,$x7f7f7f7f
1182	srl	$tp9,$m,7
1183	addu	$tp8,$tp8		# tp8<<1
1184	subu	$m,$tp9
1185	and	$m,$x1b1b1b1b
1186	xor	$tp8,$m
1187
1188	xor	$tp9,$tp8,$tp1
1189	xor	$tpe,$tp8,$tp4
1190	xor	$tpb,$tp9,$tp2
1191	xor	$tpd,$tp9,$tp4
1192
1193	_ror	$tp1,$tpd,16
1194	 xor	$tpe,$tp2
1195	_ror	$tp2,$tpd,-16
1196	xor	$tpe,$tp1
1197	_ror	$tp1,$tp9,8
1198	xor	$tpe,$tp2
1199	_ror	$tp2,$tp9,-24
1200	xor	$tpe,$tp1
1201	_ror	$tp1,$tpb,24
1202	xor	$tpe,$tp2
1203	_ror	$tp2,$tpb,-8
1204	xor	$tpe,$tp1
1205	lw	$tp1,4($key)		# modulo-scheduled
1206	xor	$tpe,$tp2
1207	sub	$cnt,1
1208	sw	$tpe,0($key)
1209	$PTR_ADD $key,4
1210	bnez	$cnt,.Lmix
1211
1212	li	$t0,0
1213.Ldkey_done:
1214	.set	noreorder
1215	move	$a0,$t0
1216	$REG_L	$ra,$FRAMESIZE-1*$SZREG($sp)
1217	$REG_L	$fp,$FRAMESIZE-2*$SZREG($sp)
1218___
1219$code.=<<___ if ($flavour =~ /nubi/i);
1220	$REG_L	$s3,$FRAMESIZE-11*$SZREG($sp)
1221	$REG_L	$s2,$FRAMESIZE-12*$SZREG($sp)
1222	$REG_L	$s1,$FRAMESIZE-13*$SZREG($sp)
1223	$REG_L	$s0,$FRAMESIZE-14*$SZREG($sp)
1224	$REG_L	$gp,$FRAMESIZE-15*$SZREG($sp)
1225___
1226$code.=<<___;
1227	jr	$ra
1228	$PTR_ADD $sp,$FRAMESIZE
1229.end	private_AES_set_decrypt_key
1230___
1231}}}
1232
1233######################################################################
1234# Tables are kept in endian-neutral manner
1235$code.=<<___;
1236.rdata
1237.align	6
1238AES_Te:
1239.byte	0xc6,0x63,0x63,0xa5,	0xf8,0x7c,0x7c,0x84	# Te0
1240.byte	0xee,0x77,0x77,0x99,	0xf6,0x7b,0x7b,0x8d
1241.byte	0xff,0xf2,0xf2,0x0d,	0xd6,0x6b,0x6b,0xbd
1242.byte	0xde,0x6f,0x6f,0xb1,	0x91,0xc5,0xc5,0x54
1243.byte	0x60,0x30,0x30,0x50,	0x02,0x01,0x01,0x03
1244.byte	0xce,0x67,0x67,0xa9,	0x56,0x2b,0x2b,0x7d
1245.byte	0xe7,0xfe,0xfe,0x19,	0xb5,0xd7,0xd7,0x62
1246.byte	0x4d,0xab,0xab,0xe6,	0xec,0x76,0x76,0x9a
1247.byte	0x8f,0xca,0xca,0x45,	0x1f,0x82,0x82,0x9d
1248.byte	0x89,0xc9,0xc9,0x40,	0xfa,0x7d,0x7d,0x87
1249.byte	0xef,0xfa,0xfa,0x15,	0xb2,0x59,0x59,0xeb
1250.byte	0x8e,0x47,0x47,0xc9,	0xfb,0xf0,0xf0,0x0b
1251.byte	0x41,0xad,0xad,0xec,	0xb3,0xd4,0xd4,0x67
1252.byte	0x5f,0xa2,0xa2,0xfd,	0x45,0xaf,0xaf,0xea
1253.byte	0x23,0x9c,0x9c,0xbf,	0x53,0xa4,0xa4,0xf7
1254.byte	0xe4,0x72,0x72,0x96,	0x9b,0xc0,0xc0,0x5b
1255.byte	0x75,0xb7,0xb7,0xc2,	0xe1,0xfd,0xfd,0x1c
1256.byte	0x3d,0x93,0x93,0xae,	0x4c,0x26,0x26,0x6a
1257.byte	0x6c,0x36,0x36,0x5a,	0x7e,0x3f,0x3f,0x41
1258.byte	0xf5,0xf7,0xf7,0x02,	0x83,0xcc,0xcc,0x4f
1259.byte	0x68,0x34,0x34,0x5c,	0x51,0xa5,0xa5,0xf4
1260.byte	0xd1,0xe5,0xe5,0x34,	0xf9,0xf1,0xf1,0x08
1261.byte	0xe2,0x71,0x71,0x93,	0xab,0xd8,0xd8,0x73
1262.byte	0x62,0x31,0x31,0x53,	0x2a,0x15,0x15,0x3f
1263.byte	0x08,0x04,0x04,0x0c,	0x95,0xc7,0xc7,0x52
1264.byte	0x46,0x23,0x23,0x65,	0x9d,0xc3,0xc3,0x5e
1265.byte	0x30,0x18,0x18,0x28,	0x37,0x96,0x96,0xa1
1266.byte	0x0a,0x05,0x05,0x0f,	0x2f,0x9a,0x9a,0xb5
1267.byte	0x0e,0x07,0x07,0x09,	0x24,0x12,0x12,0x36
1268.byte	0x1b,0x80,0x80,0x9b,	0xdf,0xe2,0xe2,0x3d
1269.byte	0xcd,0xeb,0xeb,0x26,	0x4e,0x27,0x27,0x69
1270.byte	0x7f,0xb2,0xb2,0xcd,	0xea,0x75,0x75,0x9f
1271.byte	0x12,0x09,0x09,0x1b,	0x1d,0x83,0x83,0x9e
1272.byte	0x58,0x2c,0x2c,0x74,	0x34,0x1a,0x1a,0x2e
1273.byte	0x36,0x1b,0x1b,0x2d,	0xdc,0x6e,0x6e,0xb2
1274.byte	0xb4,0x5a,0x5a,0xee,	0x5b,0xa0,0xa0,0xfb
1275.byte	0xa4,0x52,0x52,0xf6,	0x76,0x3b,0x3b,0x4d
1276.byte	0xb7,0xd6,0xd6,0x61,	0x7d,0xb3,0xb3,0xce
1277.byte	0x52,0x29,0x29,0x7b,	0xdd,0xe3,0xe3,0x3e
1278.byte	0x5e,0x2f,0x2f,0x71,	0x13,0x84,0x84,0x97
1279.byte	0xa6,0x53,0x53,0xf5,	0xb9,0xd1,0xd1,0x68
1280.byte	0x00,0x00,0x00,0x00,	0xc1,0xed,0xed,0x2c
1281.byte	0x40,0x20,0x20,0x60,	0xe3,0xfc,0xfc,0x1f
1282.byte	0x79,0xb1,0xb1,0xc8,	0xb6,0x5b,0x5b,0xed
1283.byte	0xd4,0x6a,0x6a,0xbe,	0x8d,0xcb,0xcb,0x46
1284.byte	0x67,0xbe,0xbe,0xd9,	0x72,0x39,0x39,0x4b
1285.byte	0x94,0x4a,0x4a,0xde,	0x98,0x4c,0x4c,0xd4
1286.byte	0xb0,0x58,0x58,0xe8,	0x85,0xcf,0xcf,0x4a
1287.byte	0xbb,0xd0,0xd0,0x6b,	0xc5,0xef,0xef,0x2a
1288.byte	0x4f,0xaa,0xaa,0xe5,	0xed,0xfb,0xfb,0x16
1289.byte	0x86,0x43,0x43,0xc5,	0x9a,0x4d,0x4d,0xd7
1290.byte	0x66,0x33,0x33,0x55,	0x11,0x85,0x85,0x94
1291.byte	0x8a,0x45,0x45,0xcf,	0xe9,0xf9,0xf9,0x10
1292.byte	0x04,0x02,0x02,0x06,	0xfe,0x7f,0x7f,0x81
1293.byte	0xa0,0x50,0x50,0xf0,	0x78,0x3c,0x3c,0x44
1294.byte	0x25,0x9f,0x9f,0xba,	0x4b,0xa8,0xa8,0xe3
1295.byte	0xa2,0x51,0x51,0xf3,	0x5d,0xa3,0xa3,0xfe
1296.byte	0x80,0x40,0x40,0xc0,	0x05,0x8f,0x8f,0x8a
1297.byte	0x3f,0x92,0x92,0xad,	0x21,0x9d,0x9d,0xbc
1298.byte	0x70,0x38,0x38,0x48,	0xf1,0xf5,0xf5,0x04
1299.byte	0x63,0xbc,0xbc,0xdf,	0x77,0xb6,0xb6,0xc1
1300.byte	0xaf,0xda,0xda,0x75,	0x42,0x21,0x21,0x63
1301.byte	0x20,0x10,0x10,0x30,	0xe5,0xff,0xff,0x1a
1302.byte	0xfd,0xf3,0xf3,0x0e,	0xbf,0xd2,0xd2,0x6d
1303.byte	0x81,0xcd,0xcd,0x4c,	0x18,0x0c,0x0c,0x14
1304.byte	0x26,0x13,0x13,0x35,	0xc3,0xec,0xec,0x2f
1305.byte	0xbe,0x5f,0x5f,0xe1,	0x35,0x97,0x97,0xa2
1306.byte	0x88,0x44,0x44,0xcc,	0x2e,0x17,0x17,0x39
1307.byte	0x93,0xc4,0xc4,0x57,	0x55,0xa7,0xa7,0xf2
1308.byte	0xfc,0x7e,0x7e,0x82,	0x7a,0x3d,0x3d,0x47
1309.byte	0xc8,0x64,0x64,0xac,	0xba,0x5d,0x5d,0xe7
1310.byte	0x32,0x19,0x19,0x2b,	0xe6,0x73,0x73,0x95
1311.byte	0xc0,0x60,0x60,0xa0,	0x19,0x81,0x81,0x98
1312.byte	0x9e,0x4f,0x4f,0xd1,	0xa3,0xdc,0xdc,0x7f
1313.byte	0x44,0x22,0x22,0x66,	0x54,0x2a,0x2a,0x7e
1314.byte	0x3b,0x90,0x90,0xab,	0x0b,0x88,0x88,0x83
1315.byte	0x8c,0x46,0x46,0xca,	0xc7,0xee,0xee,0x29
1316.byte	0x6b,0xb8,0xb8,0xd3,	0x28,0x14,0x14,0x3c
1317.byte	0xa7,0xde,0xde,0x79,	0xbc,0x5e,0x5e,0xe2
1318.byte	0x16,0x0b,0x0b,0x1d,	0xad,0xdb,0xdb,0x76
1319.byte	0xdb,0xe0,0xe0,0x3b,	0x64,0x32,0x32,0x56
1320.byte	0x74,0x3a,0x3a,0x4e,	0x14,0x0a,0x0a,0x1e
1321.byte	0x92,0x49,0x49,0xdb,	0x0c,0x06,0x06,0x0a
1322.byte	0x48,0x24,0x24,0x6c,	0xb8,0x5c,0x5c,0xe4
1323.byte	0x9f,0xc2,0xc2,0x5d,	0xbd,0xd3,0xd3,0x6e
1324.byte	0x43,0xac,0xac,0xef,	0xc4,0x62,0x62,0xa6
1325.byte	0x39,0x91,0x91,0xa8,	0x31,0x95,0x95,0xa4
1326.byte	0xd3,0xe4,0xe4,0x37,	0xf2,0x79,0x79,0x8b
1327.byte	0xd5,0xe7,0xe7,0x32,	0x8b,0xc8,0xc8,0x43
1328.byte	0x6e,0x37,0x37,0x59,	0xda,0x6d,0x6d,0xb7
1329.byte	0x01,0x8d,0x8d,0x8c,	0xb1,0xd5,0xd5,0x64
1330.byte	0x9c,0x4e,0x4e,0xd2,	0x49,0xa9,0xa9,0xe0
1331.byte	0xd8,0x6c,0x6c,0xb4,	0xac,0x56,0x56,0xfa
1332.byte	0xf3,0xf4,0xf4,0x07,	0xcf,0xea,0xea,0x25
1333.byte	0xca,0x65,0x65,0xaf,	0xf4,0x7a,0x7a,0x8e
1334.byte	0x47,0xae,0xae,0xe9,	0x10,0x08,0x08,0x18
1335.byte	0x6f,0xba,0xba,0xd5,	0xf0,0x78,0x78,0x88
1336.byte	0x4a,0x25,0x25,0x6f,	0x5c,0x2e,0x2e,0x72
1337.byte	0x38,0x1c,0x1c,0x24,	0x57,0xa6,0xa6,0xf1
1338.byte	0x73,0xb4,0xb4,0xc7,	0x97,0xc6,0xc6,0x51
1339.byte	0xcb,0xe8,0xe8,0x23,	0xa1,0xdd,0xdd,0x7c
1340.byte	0xe8,0x74,0x74,0x9c,	0x3e,0x1f,0x1f,0x21
1341.byte	0x96,0x4b,0x4b,0xdd,	0x61,0xbd,0xbd,0xdc
1342.byte	0x0d,0x8b,0x8b,0x86,	0x0f,0x8a,0x8a,0x85
1343.byte	0xe0,0x70,0x70,0x90,	0x7c,0x3e,0x3e,0x42
1344.byte	0x71,0xb5,0xb5,0xc4,	0xcc,0x66,0x66,0xaa
1345.byte	0x90,0x48,0x48,0xd8,	0x06,0x03,0x03,0x05
1346.byte	0xf7,0xf6,0xf6,0x01,	0x1c,0x0e,0x0e,0x12
1347.byte	0xc2,0x61,0x61,0xa3,	0x6a,0x35,0x35,0x5f
1348.byte	0xae,0x57,0x57,0xf9,	0x69,0xb9,0xb9,0xd0
1349.byte	0x17,0x86,0x86,0x91,	0x99,0xc1,0xc1,0x58
1350.byte	0x3a,0x1d,0x1d,0x27,	0x27,0x9e,0x9e,0xb9
1351.byte	0xd9,0xe1,0xe1,0x38,	0xeb,0xf8,0xf8,0x13
1352.byte	0x2b,0x98,0x98,0xb3,	0x22,0x11,0x11,0x33
1353.byte	0xd2,0x69,0x69,0xbb,	0xa9,0xd9,0xd9,0x70
1354.byte	0x07,0x8e,0x8e,0x89,	0x33,0x94,0x94,0xa7
1355.byte	0x2d,0x9b,0x9b,0xb6,	0x3c,0x1e,0x1e,0x22
1356.byte	0x15,0x87,0x87,0x92,	0xc9,0xe9,0xe9,0x20
1357.byte	0x87,0xce,0xce,0x49,	0xaa,0x55,0x55,0xff
1358.byte	0x50,0x28,0x28,0x78,	0xa5,0xdf,0xdf,0x7a
1359.byte	0x03,0x8c,0x8c,0x8f,	0x59,0xa1,0xa1,0xf8
1360.byte	0x09,0x89,0x89,0x80,	0x1a,0x0d,0x0d,0x17
1361.byte	0x65,0xbf,0xbf,0xda,	0xd7,0xe6,0xe6,0x31
1362.byte	0x84,0x42,0x42,0xc6,	0xd0,0x68,0x68,0xb8
1363.byte	0x82,0x41,0x41,0xc3,	0x29,0x99,0x99,0xb0
1364.byte	0x5a,0x2d,0x2d,0x77,	0x1e,0x0f,0x0f,0x11
1365.byte	0x7b,0xb0,0xb0,0xcb,	0xa8,0x54,0x54,0xfc
1366.byte	0x6d,0xbb,0xbb,0xd6,	0x2c,0x16,0x16,0x3a
1367
1368.byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5	# Te4
1369.byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
1370.byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
1371.byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
1372.byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
1373.byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
1374.byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
1375.byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
1376.byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
1377.byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
1378.byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
1379.byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
1380.byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
1381.byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
1382.byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
1383.byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
1384.byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
1385.byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
1386.byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
1387.byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
1388.byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
1389.byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
1390.byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
1391.byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
1392.byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
1393.byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
1394.byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
1395.byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
1396.byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
1397.byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
1398.byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
1399.byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
1400
1401.byte	0x01,0x00,0x00,0x00,	0x02,0x00,0x00,0x00	# rcon
1402.byte	0x04,0x00,0x00,0x00,	0x08,0x00,0x00,0x00
1403.byte	0x10,0x00,0x00,0x00,	0x20,0x00,0x00,0x00
1404.byte	0x40,0x00,0x00,0x00,	0x80,0x00,0x00,0x00
1405.byte	0x1B,0x00,0x00,0x00,	0x36,0x00,0x00,0x00
1406
1407.align	6
1408AES_Td:
1409.byte	0x51,0xf4,0xa7,0x50,	0x7e,0x41,0x65,0x53	# Td0
1410.byte	0x1a,0x17,0xa4,0xc3,	0x3a,0x27,0x5e,0x96
1411.byte	0x3b,0xab,0x6b,0xcb,	0x1f,0x9d,0x45,0xf1
1412.byte	0xac,0xfa,0x58,0xab,	0x4b,0xe3,0x03,0x93
1413.byte	0x20,0x30,0xfa,0x55,	0xad,0x76,0x6d,0xf6
1414.byte	0x88,0xcc,0x76,0x91,	0xf5,0x02,0x4c,0x25
1415.byte	0x4f,0xe5,0xd7,0xfc,	0xc5,0x2a,0xcb,0xd7
1416.byte	0x26,0x35,0x44,0x80,	0xb5,0x62,0xa3,0x8f
1417.byte	0xde,0xb1,0x5a,0x49,	0x25,0xba,0x1b,0x67
1418.byte	0x45,0xea,0x0e,0x98,	0x5d,0xfe,0xc0,0xe1
1419.byte	0xc3,0x2f,0x75,0x02,	0x81,0x4c,0xf0,0x12
1420.byte	0x8d,0x46,0x97,0xa3,	0x6b,0xd3,0xf9,0xc6
1421.byte	0x03,0x8f,0x5f,0xe7,	0x15,0x92,0x9c,0x95
1422.byte	0xbf,0x6d,0x7a,0xeb,	0x95,0x52,0x59,0xda
1423.byte	0xd4,0xbe,0x83,0x2d,	0x58,0x74,0x21,0xd3
1424.byte	0x49,0xe0,0x69,0x29,	0x8e,0xc9,0xc8,0x44
1425.byte	0x75,0xc2,0x89,0x6a,	0xf4,0x8e,0x79,0x78
1426.byte	0x99,0x58,0x3e,0x6b,	0x27,0xb9,0x71,0xdd
1427.byte	0xbe,0xe1,0x4f,0xb6,	0xf0,0x88,0xad,0x17
1428.byte	0xc9,0x20,0xac,0x66,	0x7d,0xce,0x3a,0xb4
1429.byte	0x63,0xdf,0x4a,0x18,	0xe5,0x1a,0x31,0x82
1430.byte	0x97,0x51,0x33,0x60,	0x62,0x53,0x7f,0x45
1431.byte	0xb1,0x64,0x77,0xe0,	0xbb,0x6b,0xae,0x84
1432.byte	0xfe,0x81,0xa0,0x1c,	0xf9,0x08,0x2b,0x94
1433.byte	0x70,0x48,0x68,0x58,	0x8f,0x45,0xfd,0x19
1434.byte	0x94,0xde,0x6c,0x87,	0x52,0x7b,0xf8,0xb7
1435.byte	0xab,0x73,0xd3,0x23,	0x72,0x4b,0x02,0xe2
1436.byte	0xe3,0x1f,0x8f,0x57,	0x66,0x55,0xab,0x2a
1437.byte	0xb2,0xeb,0x28,0x07,	0x2f,0xb5,0xc2,0x03
1438.byte	0x86,0xc5,0x7b,0x9a,	0xd3,0x37,0x08,0xa5
1439.byte	0x30,0x28,0x87,0xf2,	0x23,0xbf,0xa5,0xb2
1440.byte	0x02,0x03,0x6a,0xba,	0xed,0x16,0x82,0x5c
1441.byte	0x8a,0xcf,0x1c,0x2b,	0xa7,0x79,0xb4,0x92
1442.byte	0xf3,0x07,0xf2,0xf0,	0x4e,0x69,0xe2,0xa1
1443.byte	0x65,0xda,0xf4,0xcd,	0x06,0x05,0xbe,0xd5
1444.byte	0xd1,0x34,0x62,0x1f,	0xc4,0xa6,0xfe,0x8a
1445.byte	0x34,0x2e,0x53,0x9d,	0xa2,0xf3,0x55,0xa0
1446.byte	0x05,0x8a,0xe1,0x32,	0xa4,0xf6,0xeb,0x75
1447.byte	0x0b,0x83,0xec,0x39,	0x40,0x60,0xef,0xaa
1448.byte	0x5e,0x71,0x9f,0x06,	0xbd,0x6e,0x10,0x51
1449.byte	0x3e,0x21,0x8a,0xf9,	0x96,0xdd,0x06,0x3d
1450.byte	0xdd,0x3e,0x05,0xae,	0x4d,0xe6,0xbd,0x46
1451.byte	0x91,0x54,0x8d,0xb5,	0x71,0xc4,0x5d,0x05
1452.byte	0x04,0x06,0xd4,0x6f,	0x60,0x50,0x15,0xff
1453.byte	0x19,0x98,0xfb,0x24,	0xd6,0xbd,0xe9,0x97
1454.byte	0x89,0x40,0x43,0xcc,	0x67,0xd9,0x9e,0x77
1455.byte	0xb0,0xe8,0x42,0xbd,	0x07,0x89,0x8b,0x88
1456.byte	0xe7,0x19,0x5b,0x38,	0x79,0xc8,0xee,0xdb
1457.byte	0xa1,0x7c,0x0a,0x47,	0x7c,0x42,0x0f,0xe9
1458.byte	0xf8,0x84,0x1e,0xc9,	0x00,0x00,0x00,0x00
1459.byte	0x09,0x80,0x86,0x83,	0x32,0x2b,0xed,0x48
1460.byte	0x1e,0x11,0x70,0xac,	0x6c,0x5a,0x72,0x4e
1461.byte	0xfd,0x0e,0xff,0xfb,	0x0f,0x85,0x38,0x56
1462.byte	0x3d,0xae,0xd5,0x1e,	0x36,0x2d,0x39,0x27
1463.byte	0x0a,0x0f,0xd9,0x64,	0x68,0x5c,0xa6,0x21
1464.byte	0x9b,0x5b,0x54,0xd1,	0x24,0x36,0x2e,0x3a
1465.byte	0x0c,0x0a,0x67,0xb1,	0x93,0x57,0xe7,0x0f
1466.byte	0xb4,0xee,0x96,0xd2,	0x1b,0x9b,0x91,0x9e
1467.byte	0x80,0xc0,0xc5,0x4f,	0x61,0xdc,0x20,0xa2
1468.byte	0x5a,0x77,0x4b,0x69,	0x1c,0x12,0x1a,0x16
1469.byte	0xe2,0x93,0xba,0x0a,	0xc0,0xa0,0x2a,0xe5
1470.byte	0x3c,0x22,0xe0,0x43,	0x12,0x1b,0x17,0x1d
1471.byte	0x0e,0x09,0x0d,0x0b,	0xf2,0x8b,0xc7,0xad
1472.byte	0x2d,0xb6,0xa8,0xb9,	0x14,0x1e,0xa9,0xc8
1473.byte	0x57,0xf1,0x19,0x85,	0xaf,0x75,0x07,0x4c
1474.byte	0xee,0x99,0xdd,0xbb,	0xa3,0x7f,0x60,0xfd
1475.byte	0xf7,0x01,0x26,0x9f,	0x5c,0x72,0xf5,0xbc
1476.byte	0x44,0x66,0x3b,0xc5,	0x5b,0xfb,0x7e,0x34
1477.byte	0x8b,0x43,0x29,0x76,	0xcb,0x23,0xc6,0xdc
1478.byte	0xb6,0xed,0xfc,0x68,	0xb8,0xe4,0xf1,0x63
1479.byte	0xd7,0x31,0xdc,0xca,	0x42,0x63,0x85,0x10
1480.byte	0x13,0x97,0x22,0x40,	0x84,0xc6,0x11,0x20
1481.byte	0x85,0x4a,0x24,0x7d,	0xd2,0xbb,0x3d,0xf8
1482.byte	0xae,0xf9,0x32,0x11,	0xc7,0x29,0xa1,0x6d
1483.byte	0x1d,0x9e,0x2f,0x4b,	0xdc,0xb2,0x30,0xf3
1484.byte	0x0d,0x86,0x52,0xec,	0x77,0xc1,0xe3,0xd0
1485.byte	0x2b,0xb3,0x16,0x6c,	0xa9,0x70,0xb9,0x99
1486.byte	0x11,0x94,0x48,0xfa,	0x47,0xe9,0x64,0x22
1487.byte	0xa8,0xfc,0x8c,0xc4,	0xa0,0xf0,0x3f,0x1a
1488.byte	0x56,0x7d,0x2c,0xd8,	0x22,0x33,0x90,0xef
1489.byte	0x87,0x49,0x4e,0xc7,	0xd9,0x38,0xd1,0xc1
1490.byte	0x8c,0xca,0xa2,0xfe,	0x98,0xd4,0x0b,0x36
1491.byte	0xa6,0xf5,0x81,0xcf,	0xa5,0x7a,0xde,0x28
1492.byte	0xda,0xb7,0x8e,0x26,	0x3f,0xad,0xbf,0xa4
1493.byte	0x2c,0x3a,0x9d,0xe4,	0x50,0x78,0x92,0x0d
1494.byte	0x6a,0x5f,0xcc,0x9b,	0x54,0x7e,0x46,0x62
1495.byte	0xf6,0x8d,0x13,0xc2,	0x90,0xd8,0xb8,0xe8
1496.byte	0x2e,0x39,0xf7,0x5e,	0x82,0xc3,0xaf,0xf5
1497.byte	0x9f,0x5d,0x80,0xbe,	0x69,0xd0,0x93,0x7c
1498.byte	0x6f,0xd5,0x2d,0xa9,	0xcf,0x25,0x12,0xb3
1499.byte	0xc8,0xac,0x99,0x3b,	0x10,0x18,0x7d,0xa7
1500.byte	0xe8,0x9c,0x63,0x6e,	0xdb,0x3b,0xbb,0x7b
1501.byte	0xcd,0x26,0x78,0x09,	0x6e,0x59,0x18,0xf4
1502.byte	0xec,0x9a,0xb7,0x01,	0x83,0x4f,0x9a,0xa8
1503.byte	0xe6,0x95,0x6e,0x65,	0xaa,0xff,0xe6,0x7e
1504.byte	0x21,0xbc,0xcf,0x08,	0xef,0x15,0xe8,0xe6
1505.byte	0xba,0xe7,0x9b,0xd9,	0x4a,0x6f,0x36,0xce
1506.byte	0xea,0x9f,0x09,0xd4,	0x29,0xb0,0x7c,0xd6
1507.byte	0x31,0xa4,0xb2,0xaf,	0x2a,0x3f,0x23,0x31
1508.byte	0xc6,0xa5,0x94,0x30,	0x35,0xa2,0x66,0xc0
1509.byte	0x74,0x4e,0xbc,0x37,	0xfc,0x82,0xca,0xa6
1510.byte	0xe0,0x90,0xd0,0xb0,	0x33,0xa7,0xd8,0x15
1511.byte	0xf1,0x04,0x98,0x4a,	0x41,0xec,0xda,0xf7
1512.byte	0x7f,0xcd,0x50,0x0e,	0x17,0x91,0xf6,0x2f
1513.byte	0x76,0x4d,0xd6,0x8d,	0x43,0xef,0xb0,0x4d
1514.byte	0xcc,0xaa,0x4d,0x54,	0xe4,0x96,0x04,0xdf
1515.byte	0x9e,0xd1,0xb5,0xe3,	0x4c,0x6a,0x88,0x1b
1516.byte	0xc1,0x2c,0x1f,0xb8,	0x46,0x65,0x51,0x7f
1517.byte	0x9d,0x5e,0xea,0x04,	0x01,0x8c,0x35,0x5d
1518.byte	0xfa,0x87,0x74,0x73,	0xfb,0x0b,0x41,0x2e
1519.byte	0xb3,0x67,0x1d,0x5a,	0x92,0xdb,0xd2,0x52
1520.byte	0xe9,0x10,0x56,0x33,	0x6d,0xd6,0x47,0x13
1521.byte	0x9a,0xd7,0x61,0x8c,	0x37,0xa1,0x0c,0x7a
1522.byte	0x59,0xf8,0x14,0x8e,	0xeb,0x13,0x3c,0x89
1523.byte	0xce,0xa9,0x27,0xee,	0xb7,0x61,0xc9,0x35
1524.byte	0xe1,0x1c,0xe5,0xed,	0x7a,0x47,0xb1,0x3c
1525.byte	0x9c,0xd2,0xdf,0x59,	0x55,0xf2,0x73,0x3f
1526.byte	0x18,0x14,0xce,0x79,	0x73,0xc7,0x37,0xbf
1527.byte	0x53,0xf7,0xcd,0xea,	0x5f,0xfd,0xaa,0x5b
1528.byte	0xdf,0x3d,0x6f,0x14,	0x78,0x44,0xdb,0x86
1529.byte	0xca,0xaf,0xf3,0x81,	0xb9,0x68,0xc4,0x3e
1530.byte	0x38,0x24,0x34,0x2c,	0xc2,0xa3,0x40,0x5f
1531.byte	0x16,0x1d,0xc3,0x72,	0xbc,0xe2,0x25,0x0c
1532.byte	0x28,0x3c,0x49,0x8b,	0xff,0x0d,0x95,0x41
1533.byte	0x39,0xa8,0x01,0x71,	0x08,0x0c,0xb3,0xde
1534.byte	0xd8,0xb4,0xe4,0x9c,	0x64,0x56,0xc1,0x90
1535.byte	0x7b,0xcb,0x84,0x61,	0xd5,0x32,0xb6,0x70
1536.byte	0x48,0x6c,0x5c,0x74,	0xd0,0xb8,0x57,0x42
1537
1538.byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38	# Td4
1539.byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
1540.byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
1541.byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
1542.byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
1543.byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
1544.byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
1545.byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
1546.byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
1547.byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
1548.byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
1549.byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
1550.byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
1551.byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
1552.byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
1553.byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
1554.byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
1555.byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
1556.byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
1557.byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
1558.byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
1559.byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
1560.byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
1561.byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
1562.byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
1563.byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
1564.byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
1565.byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
1566.byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
1567.byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
1568.byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
1569.byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
1570___
1571
1572foreach (split("\n",$code)) {
1573	s/\`([^\`]*)\`/eval $1/ge;
1574
1575	# made-up _instructions, _xtr, _ins, _ror and _bias, cope
1576	# with byte order dependencies...
1577	if (/^\s+_/) {
1578	    s/(_[a-z]+\s+)(\$[0-9]+),([^,]+)(#.*)*$/$1$2,$2,$3/;
1579
1580	    s/_xtr\s+(\$[0-9]+),(\$[0-9]+),([0-9]+(\-2)*)/
1581		sprintf("srl\t$1,$2,%d",$big_endian ?	eval($3)
1582					:		eval("24-$3"))/e or
1583	    s/_ins\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
1584		sprintf("sll\t$1,$2,%d",$big_endian ?	eval($3)
1585					:		eval("24-$3"))/e or
1586	    s/_ror\s+(\$[0-9]+),(\$[0-9]+),(\-?[0-9]+)/
1587		sprintf("srl\t$1,$2,%d",$big_endian ?	eval($3)
1588					:		eval("$3*-1"))/e or
1589	    s/_bias\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
1590		sprintf("sll\t$1,$2,%d",$big_endian ?	eval($3)
1591					:		eval("($3-16)&31"))/e;
1592
1593	    s/srl\s+(\$[0-9]+),(\$[0-9]+),\-([0-9]+)/
1594		sprintf("sll\t$1,$2,$3")/e				or
1595	    s/srl\s+(\$[0-9]+),(\$[0-9]+),0/
1596		sprintf("and\t$1,$2,0xff")/e				or
1597	    s/(sll\s+\$[0-9]+,\$[0-9]+,0)/#$1/;
1598	}
1599
1600	# convert lwl/lwr and swr/swl to little-endian order
1601	if (!$big_endian && /^\s+[sl]w[lr]\s+/) {
1602	    s/([sl]wl.*)([0-9]+)\((\$[0-9]+)\)/
1603		sprintf("$1%d($3)",eval("$2-$2%4+($2%4-1)&3"))/e	or
1604	    s/([sl]wr.*)([0-9]+)\((\$[0-9]+)\)/
1605		sprintf("$1%d($3)",eval("$2-$2%4+($2%4+1)&3"))/e;
1606	}
1607
1608	print $_,"\n";
1609}
1610
1611close STDOUT;
1612