aes-armv4.pl revision 221304ee937bc0910948a8be1320cb8cc4eb6d36
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# AES for ARMv4
11
12# January 2007.
13#
14# Code uses single 1K S-box and is >2 times faster than code generated
15# by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which
16# allows to merge logical or arithmetic operation with shift or rotate
17# in one instruction and emit combined result every cycle. The module
18# is endian-neutral. The performance is ~42 cycles/byte for 128-bit
19# key.
20
21# May 2007.
22#
23# AES_set_[en|de]crypt_key is added.
24
25$s0="r0";
26$s1="r1";
27$s2="r2";
28$s3="r3";
29$t1="r4";
30$t2="r5";
31$t3="r6";
32$i1="r7";
33$i2="r8";
34$i3="r9";
35
36$tbl="r10";
37$key="r11";
38$rounds="r12";
39
40$code=<<___;
41.text
42.code	32
43
44.type	AES_Te,%object
45.align	5
46AES_Te:
47.word	0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d
48.word	0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554
49.word	0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d
50.word	0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a
51.word	0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87
52.word	0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b
53.word	0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea
54.word	0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b
55.word	0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a
56.word	0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f
57.word	0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108
58.word	0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f
59.word	0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e
60.word	0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5
61.word	0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d
62.word	0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f
63.word	0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e
64.word	0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb
65.word	0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce
66.word	0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497
67.word	0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c
68.word	0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed
69.word	0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b
70.word	0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a
71.word	0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16
72.word	0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594
73.word	0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81
74.word	0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3
75.word	0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a
76.word	0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504
77.word	0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163
78.word	0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d
79.word	0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f
80.word	0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739
81.word	0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47
82.word	0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395
83.word	0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f
84.word	0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883
85.word	0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c
86.word	0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76
87.word	0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e
88.word	0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4
89.word	0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6
90.word	0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b
91.word	0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7
92.word	0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0
93.word	0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25
94.word	0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818
95.word	0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72
96.word	0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651
97.word	0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21
98.word	0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85
99.word	0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa
100.word	0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12
101.word	0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0
102.word	0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9
103.word	0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133
104.word	0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7
105.word	0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920
106.word	0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a
107.word	0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17
108.word	0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8
109.word	0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11
110.word	0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
111@ Te4[256]
112.byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
113.byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
114.byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
115.byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
116.byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
117.byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
118.byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
119.byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
120.byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
121.byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
122.byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
123.byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
124.byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
125.byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
126.byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
127.byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
128.byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
129.byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
130.byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
131.byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
132.byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
133.byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
134.byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
135.byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
136.byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
137.byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
138.byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
139.byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
140.byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
141.byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
142.byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
143.byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
144@ rcon[]
145.word	0x01000000, 0x02000000, 0x04000000, 0x08000000
146.word	0x10000000, 0x20000000, 0x40000000, 0x80000000
147.word	0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
148.size	AES_Te,.-AES_Te
149
150@ void AES_encrypt(const unsigned char *in, unsigned char *out,
151@ 		 const AES_KEY *key) {
152.global AES_encrypt
153.type   AES_encrypt,%function
154.align	5
155AES_encrypt:
156	sub	r3,pc,#8		@ AES_encrypt
157	stmdb   sp!,{r1,r4-r12,lr}
158	mov	$rounds,r0		@ inp
159	mov	$key,r2
160	sub	$tbl,r3,#AES_encrypt-AES_Te	@ Te
161
162	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
163	ldrb	$t1,[$rounds,#2]	@ manner...
164	ldrb	$t2,[$rounds,#1]
165	ldrb	$t3,[$rounds,#0]
166	orr	$s0,$s0,$t1,lsl#8
167	orr	$s0,$s0,$t2,lsl#16
168	orr	$s0,$s0,$t3,lsl#24
169	ldrb	$s1,[$rounds,#7]
170	ldrb	$t1,[$rounds,#6]
171	ldrb	$t2,[$rounds,#5]
172	ldrb	$t3,[$rounds,#4]
173	orr	$s1,$s1,$t1,lsl#8
174	orr	$s1,$s1,$t2,lsl#16
175	orr	$s1,$s1,$t3,lsl#24
176	ldrb	$s2,[$rounds,#11]
177	ldrb	$t1,[$rounds,#10]
178	ldrb	$t2,[$rounds,#9]
179	ldrb	$t3,[$rounds,#8]
180	orr	$s2,$s2,$t1,lsl#8
181	orr	$s2,$s2,$t2,lsl#16
182	orr	$s2,$s2,$t3,lsl#24
183	ldrb	$s3,[$rounds,#15]
184	ldrb	$t1,[$rounds,#14]
185	ldrb	$t2,[$rounds,#13]
186	ldrb	$t3,[$rounds,#12]
187	orr	$s3,$s3,$t1,lsl#8
188	orr	$s3,$s3,$t2,lsl#16
189	orr	$s3,$s3,$t3,lsl#24
190
191	bl	_armv4_AES_encrypt
192
193	ldr	$rounds,[sp],#4		@ pop out
194	mov	$t1,$s0,lsr#24		@ write output in endian-neutral
195	mov	$t2,$s0,lsr#16		@ manner...
196	mov	$t3,$s0,lsr#8
197	strb	$t1,[$rounds,#0]
198	strb	$t2,[$rounds,#1]
199	strb	$t3,[$rounds,#2]
200	strb	$s0,[$rounds,#3]
201	mov	$t1,$s1,lsr#24
202	mov	$t2,$s1,lsr#16
203	mov	$t3,$s1,lsr#8
204	strb	$t1,[$rounds,#4]
205	strb	$t2,[$rounds,#5]
206	strb	$t3,[$rounds,#6]
207	strb	$s1,[$rounds,#7]
208	mov	$t1,$s2,lsr#24
209	mov	$t2,$s2,lsr#16
210	mov	$t3,$s2,lsr#8
211	strb	$t1,[$rounds,#8]
212	strb	$t2,[$rounds,#9]
213	strb	$t3,[$rounds,#10]
214	strb	$s2,[$rounds,#11]
215	mov	$t1,$s3,lsr#24
216	mov	$t2,$s3,lsr#16
217	mov	$t3,$s3,lsr#8
218	strb	$t1,[$rounds,#12]
219	strb	$t2,[$rounds,#13]
220	strb	$t3,[$rounds,#14]
221	strb	$s3,[$rounds,#15]
222
223	ldmia   sp!,{r4-r12,lr}
224	tst	lr,#1
225	moveq	pc,lr			@ be binary compatible with V4, yet
226	bx	lr			@ interoperable with Thumb ISA:-)
227.size	AES_encrypt,.-AES_encrypt
228
229.type   _armv4_AES_encrypt,%function
230.align	2
231_armv4_AES_encrypt:
232	str	lr,[sp,#-4]!		@ push lr
233	ldr	$t1,[$key],#16
234	ldr	$t2,[$key,#-12]
235	ldr	$t3,[$key,#-8]
236	ldr	$i1,[$key,#-4]
237	ldr	$rounds,[$key,#240-16]
238	eor	$s0,$s0,$t1
239	eor	$s1,$s1,$t2
240	eor	$s2,$s2,$t3
241	eor	$s3,$s3,$i1
242	sub	$rounds,$rounds,#1
243	mov	lr,#255
244
245.Lenc_loop:
246	and	$i2,lr,$s0,lsr#8
247	and	$i3,lr,$s0,lsr#16
248	and	$i1,lr,$s0
249	mov	$s0,$s0,lsr#24
250	ldr	$t1,[$tbl,$i1,lsl#2]	@ Te3[s0>>0]
251	ldr	$s0,[$tbl,$s0,lsl#2]	@ Te0[s0>>24]
252	ldr	$t2,[$tbl,$i2,lsl#2]	@ Te2[s0>>8]
253	ldr	$t3,[$tbl,$i3,lsl#2]	@ Te1[s0>>16]
254
255	and	$i1,lr,$s1,lsr#16	@ i0
256	and	$i2,lr,$s1
257	and	$i3,lr,$s1,lsr#8
258	mov	$s1,$s1,lsr#24
259	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te1[s1>>16]
260	ldr	$s1,[$tbl,$s1,lsl#2]	@ Te0[s1>>24]
261	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te3[s1>>0]
262	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te2[s1>>8]
263	eor	$s0,$s0,$i1,ror#8
264	eor	$s1,$s1,$t1,ror#24
265	eor	$t2,$t2,$i2,ror#8
266	eor	$t3,$t3,$i3,ror#8
267
268	and	$i1,lr,$s2,lsr#8	@ i0
269	and	$i2,lr,$s2,lsr#16	@ i1
270	and	$i3,lr,$s2
271	mov	$s2,$s2,lsr#24
272	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te2[s2>>8]
273	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te1[s2>>16]
274	ldr	$s2,[$tbl,$s2,lsl#2]	@ Te0[s2>>24]
275	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te3[s2>>0]
276	eor	$s0,$s0,$i1,ror#16
277	eor	$s1,$s1,$i2,ror#8
278	eor	$s2,$s2,$t2,ror#16
279	eor	$t3,$t3,$i3,ror#16
280
281	and	$i1,lr,$s3		@ i0
282	and	$i2,lr,$s3,lsr#8	@ i1
283	and	$i3,lr,$s3,lsr#16	@ i2
284	mov	$s3,$s3,lsr#24
285	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te3[s3>>0]
286	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te2[s3>>8]
287	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te1[s3>>16]
288	ldr	$s3,[$tbl,$s3,lsl#2]	@ Te0[s3>>24]
289	eor	$s0,$s0,$i1,ror#24
290	eor	$s1,$s1,$i2,ror#16
291	eor	$s2,$s2,$i3,ror#8
292	eor	$s3,$s3,$t3,ror#8
293
294	ldr	$t1,[$key],#16
295	ldr	$t2,[$key,#-12]
296	ldr	$t3,[$key,#-8]
297	ldr	$i1,[$key,#-4]
298	eor	$s0,$s0,$t1
299	eor	$s1,$s1,$t2
300	eor	$s2,$s2,$t3
301	eor	$s3,$s3,$i1
302
303	subs	$rounds,$rounds,#1
304	bne	.Lenc_loop
305
306	add	$tbl,$tbl,#2
307
308	and	$i1,lr,$s0
309	and	$i2,lr,$s0,lsr#8
310	and	$i3,lr,$s0,lsr#16
311	mov	$s0,$s0,lsr#24
312	ldrb	$t1,[$tbl,$i1,lsl#2]	@ Te4[s0>>0]
313	ldrb	$s0,[$tbl,$s0,lsl#2]	@ Te4[s0>>24]
314	ldrb	$t2,[$tbl,$i2,lsl#2]	@ Te4[s0>>8]
315	ldrb	$t3,[$tbl,$i3,lsl#2]	@ Te4[s0>>16]
316
317	and	$i1,lr,$s1,lsr#16	@ i0
318	and	$i2,lr,$s1
319	and	$i3,lr,$s1,lsr#8
320	mov	$s1,$s1,lsr#24
321	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s1>>16]
322	ldrb	$s1,[$tbl,$s1,lsl#2]	@ Te4[s1>>24]
323	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s1>>0]
324	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s1>>8]
325	eor	$s0,$i1,$s0,lsl#8
326	eor	$s1,$t1,$s1,lsl#24
327	eor	$t2,$i2,$t2,lsl#8
328	eor	$t3,$i3,$t3,lsl#8
329
330	and	$i1,lr,$s2,lsr#8	@ i0
331	and	$i2,lr,$s2,lsr#16	@ i1
332	and	$i3,lr,$s2
333	mov	$s2,$s2,lsr#24
334	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s2>>8]
335	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s2>>16]
336	ldrb	$s2,[$tbl,$s2,lsl#2]	@ Te4[s2>>24]
337	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s2>>0]
338	eor	$s0,$i1,$s0,lsl#8
339	eor	$s1,$s1,$i2,lsl#16
340	eor	$s2,$t2,$s2,lsl#24
341	eor	$t3,$i3,$t3,lsl#8
342
343	and	$i1,lr,$s3		@ i0
344	and	$i2,lr,$s3,lsr#8	@ i1
345	and	$i3,lr,$s3,lsr#16	@ i2
346	mov	$s3,$s3,lsr#24
347	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s3>>0]
348	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s3>>8]
349	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s3>>16]
350	ldrb	$s3,[$tbl,$s3,lsl#2]	@ Te4[s3>>24]
351	eor	$s0,$i1,$s0,lsl#8
352	eor	$s1,$s1,$i2,lsl#8
353	eor	$s2,$s2,$i3,lsl#16
354	eor	$s3,$t3,$s3,lsl#24
355
356	ldr	lr,[sp],#4		@ pop lr
357	ldr	$t1,[$key,#0]
358	ldr	$t2,[$key,#4]
359	ldr	$t3,[$key,#8]
360	ldr	$i1,[$key,#12]
361	eor	$s0,$s0,$t1
362	eor	$s1,$s1,$t2
363	eor	$s2,$s2,$t3
364	eor	$s3,$s3,$i1
365
366	sub	$tbl,$tbl,#2
367	mov	pc,lr			@ return
368.size	_armv4_AES_encrypt,.-_armv4_AES_encrypt
369
370.global AES_set_encrypt_key
371.type   AES_set_encrypt_key,%function
372.align	5
373AES_set_encrypt_key:
374	sub	r3,pc,#8		@ AES_set_encrypt_key
375	teq	r0,#0
376	moveq	r0,#-1
377	beq	.Labrt
378	teq	r2,#0
379	moveq	r0,#-1
380	beq	.Labrt
381
382	teq	r1,#128
383	beq	.Lok
384	teq	r1,#192
385	beq	.Lok
386	teq	r1,#256
387	movne	r0,#-1
388	bne	.Labrt
389
390.Lok:	stmdb   sp!,{r4-r12,lr}
391	sub	$tbl,r3,#AES_set_encrypt_key-AES_Te-1024	@ Te4
392
393	mov	$rounds,r0		@ inp
394	mov	lr,r1			@ bits
395	mov	$key,r2			@ key
396
397	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
398	ldrb	$t1,[$rounds,#2]	@ manner...
399	ldrb	$t2,[$rounds,#1]
400	ldrb	$t3,[$rounds,#0]
401	orr	$s0,$s0,$t1,lsl#8
402	orr	$s0,$s0,$t2,lsl#16
403	orr	$s0,$s0,$t3,lsl#24
404	ldrb	$s1,[$rounds,#7]
405	ldrb	$t1,[$rounds,#6]
406	ldrb	$t2,[$rounds,#5]
407	ldrb	$t3,[$rounds,#4]
408	orr	$s1,$s1,$t1,lsl#8
409	orr	$s1,$s1,$t2,lsl#16
410	orr	$s1,$s1,$t3,lsl#24
411	ldrb	$s2,[$rounds,#11]
412	ldrb	$t1,[$rounds,#10]
413	ldrb	$t2,[$rounds,#9]
414	ldrb	$t3,[$rounds,#8]
415	orr	$s2,$s2,$t1,lsl#8
416	orr	$s2,$s2,$t2,lsl#16
417	orr	$s2,$s2,$t3,lsl#24
418	ldrb	$s3,[$rounds,#15]
419	ldrb	$t1,[$rounds,#14]
420	ldrb	$t2,[$rounds,#13]
421	ldrb	$t3,[$rounds,#12]
422	orr	$s3,$s3,$t1,lsl#8
423	orr	$s3,$s3,$t2,lsl#16
424	orr	$s3,$s3,$t3,lsl#24
425	str	$s0,[$key],#16
426	str	$s1,[$key,#-12]
427	str	$s2,[$key,#-8]
428	str	$s3,[$key,#-4]
429
430	teq	lr,#128
431	bne	.Lnot128
432	mov	$rounds,#10
433	str	$rounds,[$key,#240-16]
434	add	$t3,$tbl,#256			@ rcon
435	mov	lr,#255
436
437.L128_loop:
438	and	$t2,lr,$s3,lsr#24
439	and	$i1,lr,$s3,lsr#16
440	and	$i2,lr,$s3,lsr#8
441	and	$i3,lr,$s3
442	ldrb	$t2,[$tbl,$t2]
443	ldrb	$i1,[$tbl,$i1]
444	ldrb	$i2,[$tbl,$i2]
445	ldrb	$i3,[$tbl,$i3]
446	ldr	$t1,[$t3],#4			@ rcon[i++]
447	orr	$t2,$t2,$i1,lsl#24
448	orr	$t2,$t2,$i2,lsl#16
449	orr	$t2,$t2,$i3,lsl#8
450	eor	$t2,$t2,$t1
451	eor	$s0,$s0,$t2			@ rk[4]=rk[0]^...
452	eor	$s1,$s1,$s0			@ rk[5]=rk[1]^rk[4]
453	eor	$s2,$s2,$s1			@ rk[6]=rk[2]^rk[5]
454	eor	$s3,$s3,$s2			@ rk[7]=rk[3]^rk[6]
455	str	$s0,[$key],#16
456	str	$s1,[$key,#-12]
457	str	$s2,[$key,#-8]
458	str	$s3,[$key,#-4]
459
460	subs	$rounds,$rounds,#1
461	bne	.L128_loop
462	sub	r2,$key,#176
463	b	.Ldone
464
465.Lnot128:
466	ldrb	$i2,[$rounds,#19]
467	ldrb	$t1,[$rounds,#18]
468	ldrb	$t2,[$rounds,#17]
469	ldrb	$t3,[$rounds,#16]
470	orr	$i2,$i2,$t1,lsl#8
471	orr	$i2,$i2,$t2,lsl#16
472	orr	$i2,$i2,$t3,lsl#24
473	ldrb	$i3,[$rounds,#23]
474	ldrb	$t1,[$rounds,#22]
475	ldrb	$t2,[$rounds,#21]
476	ldrb	$t3,[$rounds,#20]
477	orr	$i3,$i3,$t1,lsl#8
478	orr	$i3,$i3,$t2,lsl#16
479	orr	$i3,$i3,$t3,lsl#24
480	str	$i2,[$key],#8
481	str	$i3,[$key,#-4]
482
483	teq	lr,#192
484	bne	.Lnot192
485	mov	$rounds,#12
486	str	$rounds,[$key,#240-24]
487	add	$t3,$tbl,#256			@ rcon
488	mov	lr,#255
489	mov	$rounds,#8
490
491.L192_loop:
492	and	$t2,lr,$i3,lsr#24
493	and	$i1,lr,$i3,lsr#16
494	and	$i2,lr,$i3,lsr#8
495	and	$i3,lr,$i3
496	ldrb	$t2,[$tbl,$t2]
497	ldrb	$i1,[$tbl,$i1]
498	ldrb	$i2,[$tbl,$i2]
499	ldrb	$i3,[$tbl,$i3]
500	ldr	$t1,[$t3],#4			@ rcon[i++]
501	orr	$t2,$t2,$i1,lsl#24
502	orr	$t2,$t2,$i2,lsl#16
503	orr	$t2,$t2,$i3,lsl#8
504	eor	$i3,$t2,$t1
505	eor	$s0,$s0,$i3			@ rk[6]=rk[0]^...
506	eor	$s1,$s1,$s0			@ rk[7]=rk[1]^rk[6]
507	eor	$s2,$s2,$s1			@ rk[8]=rk[2]^rk[7]
508	eor	$s3,$s3,$s2			@ rk[9]=rk[3]^rk[8]
509	str	$s0,[$key],#24
510	str	$s1,[$key,#-20]
511	str	$s2,[$key,#-16]
512	str	$s3,[$key,#-12]
513
514	subs	$rounds,$rounds,#1
515	subeq	r2,$key,#216
516	beq	.Ldone
517
518	ldr	$i1,[$key,#-32]
519	ldr	$i2,[$key,#-28]
520	eor	$i1,$i1,$s3			@ rk[10]=rk[4]^rk[9]
521	eor	$i3,$i2,$i1			@ rk[11]=rk[5]^rk[10]
522	str	$i1,[$key,#-8]
523	str	$i3,[$key,#-4]
524	b	.L192_loop
525
526.Lnot192:
527	ldrb	$i2,[$rounds,#27]
528	ldrb	$t1,[$rounds,#26]
529	ldrb	$t2,[$rounds,#25]
530	ldrb	$t3,[$rounds,#24]
531	orr	$i2,$i2,$t1,lsl#8
532	orr	$i2,$i2,$t2,lsl#16
533	orr	$i2,$i2,$t3,lsl#24
534	ldrb	$i3,[$rounds,#31]
535	ldrb	$t1,[$rounds,#30]
536	ldrb	$t2,[$rounds,#29]
537	ldrb	$t3,[$rounds,#28]
538	orr	$i3,$i3,$t1,lsl#8
539	orr	$i3,$i3,$t2,lsl#16
540	orr	$i3,$i3,$t3,lsl#24
541	str	$i2,[$key],#8
542	str	$i3,[$key,#-4]
543
544	mov	$rounds,#14
545	str	$rounds,[$key,#240-32]
546	add	$t3,$tbl,#256			@ rcon
547	mov	lr,#255
548	mov	$rounds,#7
549
550.L256_loop:
551	and	$t2,lr,$i3,lsr#24
552	and	$i1,lr,$i3,lsr#16
553	and	$i2,lr,$i3,lsr#8
554	and	$i3,lr,$i3
555	ldrb	$t2,[$tbl,$t2]
556	ldrb	$i1,[$tbl,$i1]
557	ldrb	$i2,[$tbl,$i2]
558	ldrb	$i3,[$tbl,$i3]
559	ldr	$t1,[$t3],#4			@ rcon[i++]
560	orr	$t2,$t2,$i1,lsl#24
561	orr	$t2,$t2,$i2,lsl#16
562	orr	$t2,$t2,$i3,lsl#8
563	eor	$i3,$t2,$t1
564	eor	$s0,$s0,$i3			@ rk[8]=rk[0]^...
565	eor	$s1,$s1,$s0			@ rk[9]=rk[1]^rk[8]
566	eor	$s2,$s2,$s1			@ rk[10]=rk[2]^rk[9]
567	eor	$s3,$s3,$s2			@ rk[11]=rk[3]^rk[10]
568	str	$s0,[$key],#32
569	str	$s1,[$key,#-28]
570	str	$s2,[$key,#-24]
571	str	$s3,[$key,#-20]
572
573	subs	$rounds,$rounds,#1
574	subeq	r2,$key,#256
575	beq	.Ldone
576
577	and	$t2,lr,$s3
578	and	$i1,lr,$s3,lsr#8
579	and	$i2,lr,$s3,lsr#16
580	and	$i3,lr,$s3,lsr#24
581	ldrb	$t2,[$tbl,$t2]
582	ldrb	$i1,[$tbl,$i1]
583	ldrb	$i2,[$tbl,$i2]
584	ldrb	$i3,[$tbl,$i3]
585	orr	$t2,$t2,$i1,lsl#8
586	orr	$t2,$t2,$i2,lsl#16
587	orr	$t2,$t2,$i3,lsl#24
588
589	ldr	$t1,[$key,#-48]
590	ldr	$i1,[$key,#-44]
591	ldr	$i2,[$key,#-40]
592	ldr	$i3,[$key,#-36]
593	eor	$t1,$t1,$t2			@ rk[12]=rk[4]^...
594	eor	$i1,$i1,$t1			@ rk[13]=rk[5]^rk[12]
595	eor	$i2,$i2,$i1			@ rk[14]=rk[6]^rk[13]
596	eor	$i3,$i3,$i2			@ rk[15]=rk[7]^rk[14]
597	str	$t1,[$key,#-16]
598	str	$i1,[$key,#-12]
599	str	$i2,[$key,#-8]
600	str	$i3,[$key,#-4]
601	b	.L256_loop
602
603.Ldone:	mov	r0,#0
604	ldmia   sp!,{r4-r12,lr}
605.Labrt:	tst	lr,#1
606	moveq	pc,lr			@ be binary compatible with V4, yet
607	bx	lr			@ interoperable with Thumb ISA:-)
608.size	AES_set_encrypt_key,.-AES_set_encrypt_key
609
610.global AES_set_decrypt_key
611.type   AES_set_decrypt_key,%function
612.align	5
613AES_set_decrypt_key:
614	str	lr,[sp,#-4]!            @ push lr
615	bl	AES_set_encrypt_key
616	teq	r0,#0
617	ldrne	lr,[sp],#4              @ pop lr
618	bne	.Labrt
619
620	stmdb   sp!,{r4-r12}
621
622	ldr	$rounds,[r2,#240]	@ AES_set_encrypt_key preserves r2,
623	mov	$key,r2			@ which is AES_KEY *key
624	mov	$i1,r2
625	add	$i2,r2,$rounds,lsl#4
626
627.Linv:	ldr	$s0,[$i1]
628	ldr	$s1,[$i1,#4]
629	ldr	$s2,[$i1,#8]
630	ldr	$s3,[$i1,#12]
631	ldr	$t1,[$i2]
632	ldr	$t2,[$i2,#4]
633	ldr	$t3,[$i2,#8]
634	ldr	$i3,[$i2,#12]
635	str	$s0,[$i2],#-16
636	str	$s1,[$i2,#16+4]
637	str	$s2,[$i2,#16+8]
638	str	$s3,[$i2,#16+12]
639	str	$t1,[$i1],#16
640	str	$t2,[$i1,#-12]
641	str	$t3,[$i1,#-8]
642	str	$i3,[$i1,#-4]
643	teq	$i1,$i2
644	bne	.Linv
645___
646$mask80=$i1;
647$mask1b=$i2;
648$mask7f=$i3;
649$code.=<<___;
650	ldr	$s0,[$key,#16]!		@ prefetch tp1
651	mov	$mask80,#0x80
652	mov	$mask1b,#0x1b
653	orr	$mask80,$mask80,#0x8000
654	orr	$mask1b,$mask1b,#0x1b00
655	orr	$mask80,$mask80,$mask80,lsl#16
656	orr	$mask1b,$mask1b,$mask1b,lsl#16
657	sub	$rounds,$rounds,#1
658	mvn	$mask7f,$mask80
659	mov	$rounds,$rounds,lsl#2	@ (rounds-1)*4
660
661.Lmix:	and	$t1,$s0,$mask80
662	and	$s1,$s0,$mask7f
663	sub	$t1,$t1,$t1,lsr#7
664	and	$t1,$t1,$mask1b
665	eor	$s1,$t1,$s1,lsl#1	@ tp2
666
667	and	$t1,$s1,$mask80
668	and	$s2,$s1,$mask7f
669	sub	$t1,$t1,$t1,lsr#7
670	and	$t1,$t1,$mask1b
671	eor	$s2,$t1,$s2,lsl#1	@ tp4
672
673	and	$t1,$s2,$mask80
674	and	$s3,$s2,$mask7f
675	sub	$t1,$t1,$t1,lsr#7
676	and	$t1,$t1,$mask1b
677	eor	$s3,$t1,$s3,lsl#1	@ tp8
678
679	eor	$t1,$s1,$s2
680	eor	$t2,$s0,$s3		@ tp9
681	eor	$t1,$t1,$s3		@ tpe
682	eor	$t1,$t1,$s1,ror#24
683	eor	$t1,$t1,$t2,ror#24	@ ^= ROTATE(tpb=tp9^tp2,8)
684	eor	$t1,$t1,$s2,ror#16
685	eor	$t1,$t1,$t2,ror#16	@ ^= ROTATE(tpd=tp9^tp4,16)
686	eor	$t1,$t1,$t2,ror#8	@ ^= ROTATE(tp9,24)
687
688	ldr	$s0,[$key,#4]		@ prefetch tp1
689	str	$t1,[$key],#4
690	subs	$rounds,$rounds,#1
691	bne	.Lmix
692
693	mov	r0,#0
694	ldmia   sp!,{r4-r12,lr}
695	tst	lr,#1
696	moveq	pc,lr			@ be binary compatible with V4, yet
697	bx	lr			@ interoperable with Thumb ISA:-)
698.size	AES_set_decrypt_key,.-AES_set_decrypt_key
699
700.type	AES_Td,%object
701.align	5
702AES_Td:
703.word	0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96
704.word	0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393
705.word	0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25
706.word	0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f
707.word	0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1
708.word	0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6
709.word	0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da
710.word	0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844
711.word	0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd
712.word	0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4
713.word	0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45
714.word	0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94
715.word	0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7
716.word	0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a
717.word	0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5
718.word	0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c
719.word	0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1
720.word	0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a
721.word	0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75
722.word	0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051
723.word	0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46
724.word	0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff
725.word	0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77
726.word	0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb
727.word	0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000
728.word	0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e
729.word	0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927
730.word	0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a
731.word	0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e
732.word	0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16
733.word	0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d
734.word	0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8
735.word	0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd
736.word	0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34
737.word	0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163
738.word	0xd731dcca, 0x42638510, 0x13972240, 0x84c61120
739.word	0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d
740.word	0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0
741.word	0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422
742.word	0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef
743.word	0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36
744.word	0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4
745.word	0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662
746.word	0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5
747.word	0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3
748.word	0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b
749.word	0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8
750.word	0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6
751.word	0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6
752.word	0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0
753.word	0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815
754.word	0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f
755.word	0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df
756.word	0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f
757.word	0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e
758.word	0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713
759.word	0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89
760.word	0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c
761.word	0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf
762.word	0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86
763.word	0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f
764.word	0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541
765.word	0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190
766.word	0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
767@ Td4[256]
768.byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
769.byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
770.byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
771.byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
772.byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
773.byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
774.byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
775.byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
776.byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
777.byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
778.byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
779.byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
780.byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
781.byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
782.byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
783.byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
784.byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
785.byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
786.byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
787.byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
788.byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
789.byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
790.byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
791.byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
792.byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
793.byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
794.byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
795.byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
796.byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
797.byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
798.byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
799.byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
800.size	AES_Td,.-AES_Td
801
802@ void AES_decrypt(const unsigned char *in, unsigned char *out,
803@ 		 const AES_KEY *key) {
804.global AES_decrypt
805.type   AES_decrypt,%function
806.align	5
807AES_decrypt:
808	sub	r3,pc,#8		@ AES_decrypt
809	stmdb   sp!,{r1,r4-r12,lr}
810	mov	$rounds,r0		@ inp
811	mov	$key,r2
812	sub	$tbl,r3,#AES_decrypt-AES_Td		@ Td
813
814	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
815	ldrb	$t1,[$rounds,#2]	@ manner...
816	ldrb	$t2,[$rounds,#1]
817	ldrb	$t3,[$rounds,#0]
818	orr	$s0,$s0,$t1,lsl#8
819	orr	$s0,$s0,$t2,lsl#16
820	orr	$s0,$s0,$t3,lsl#24
821	ldrb	$s1,[$rounds,#7]
822	ldrb	$t1,[$rounds,#6]
823	ldrb	$t2,[$rounds,#5]
824	ldrb	$t3,[$rounds,#4]
825	orr	$s1,$s1,$t1,lsl#8
826	orr	$s1,$s1,$t2,lsl#16
827	orr	$s1,$s1,$t3,lsl#24
828	ldrb	$s2,[$rounds,#11]
829	ldrb	$t1,[$rounds,#10]
830	ldrb	$t2,[$rounds,#9]
831	ldrb	$t3,[$rounds,#8]
832	orr	$s2,$s2,$t1,lsl#8
833	orr	$s2,$s2,$t2,lsl#16
834	orr	$s2,$s2,$t3,lsl#24
835	ldrb	$s3,[$rounds,#15]
836	ldrb	$t1,[$rounds,#14]
837	ldrb	$t2,[$rounds,#13]
838	ldrb	$t3,[$rounds,#12]
839	orr	$s3,$s3,$t1,lsl#8
840	orr	$s3,$s3,$t2,lsl#16
841	orr	$s3,$s3,$t3,lsl#24
842
843	bl	_armv4_AES_decrypt
844
845	ldr	$rounds,[sp],#4		@ pop out
846	mov	$t1,$s0,lsr#24		@ write output in endian-neutral
847	mov	$t2,$s0,lsr#16		@ manner...
848	mov	$t3,$s0,lsr#8
849	strb	$t1,[$rounds,#0]
850	strb	$t2,[$rounds,#1]
851	strb	$t3,[$rounds,#2]
852	strb	$s0,[$rounds,#3]
853	mov	$t1,$s1,lsr#24
854	mov	$t2,$s1,lsr#16
855	mov	$t3,$s1,lsr#8
856	strb	$t1,[$rounds,#4]
857	strb	$t2,[$rounds,#5]
858	strb	$t3,[$rounds,#6]
859	strb	$s1,[$rounds,#7]
860	mov	$t1,$s2,lsr#24
861	mov	$t2,$s2,lsr#16
862	mov	$t3,$s2,lsr#8
863	strb	$t1,[$rounds,#8]
864	strb	$t2,[$rounds,#9]
865	strb	$t3,[$rounds,#10]
866	strb	$s2,[$rounds,#11]
867	mov	$t1,$s3,lsr#24
868	mov	$t2,$s3,lsr#16
869	mov	$t3,$s3,lsr#8
870	strb	$t1,[$rounds,#12]
871	strb	$t2,[$rounds,#13]
872	strb	$t3,[$rounds,#14]
873	strb	$s3,[$rounds,#15]
874
875	ldmia   sp!,{r4-r12,lr}
876	tst	lr,#1
877	moveq	pc,lr			@ be binary compatible with V4, yet
878	bx	lr			@ interoperable with Thumb ISA:-)
879.size	AES_decrypt,.-AES_decrypt
880
881.type   _armv4_AES_decrypt,%function
882.align	2
883_armv4_AES_decrypt:
884	str	lr,[sp,#-4]!		@ push lr
885	ldr	$t1,[$key],#16
886	ldr	$t2,[$key,#-12]
887	ldr	$t3,[$key,#-8]
888	ldr	$i1,[$key,#-4]
889	ldr	$rounds,[$key,#240-16]
890	eor	$s0,$s0,$t1
891	eor	$s1,$s1,$t2
892	eor	$s2,$s2,$t3
893	eor	$s3,$s3,$i1
894	sub	$rounds,$rounds,#1
895	mov	lr,#255
896
897.Ldec_loop:
898	and	$i1,lr,$s0,lsr#16
899	and	$i2,lr,$s0,lsr#8
900	and	$i3,lr,$s0
901	mov	$s0,$s0,lsr#24
902	ldr	$t1,[$tbl,$i1,lsl#2]	@ Td1[s0>>16]
903	ldr	$s0,[$tbl,$s0,lsl#2]	@ Td0[s0>>24]
904	ldr	$t2,[$tbl,$i2,lsl#2]	@ Td2[s0>>8]
905	ldr	$t3,[$tbl,$i3,lsl#2]	@ Td3[s0>>0]
906
907	and	$i1,lr,$s1		@ i0
908	and	$i2,lr,$s1,lsr#16
909	and	$i3,lr,$s1,lsr#8
910	mov	$s1,$s1,lsr#24
911	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td3[s1>>0]
912	ldr	$s1,[$tbl,$s1,lsl#2]	@ Td0[s1>>24]
913	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td1[s1>>16]
914	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td2[s1>>8]
915	eor	$s0,$s0,$i1,ror#24
916	eor	$s1,$s1,$t1,ror#8
917	eor	$t2,$i2,$t2,ror#8
918	eor	$t3,$i3,$t3,ror#8
919
920	and	$i1,lr,$s2,lsr#8	@ i0
921	and	$i2,lr,$s2		@ i1
922	and	$i3,lr,$s2,lsr#16
923	mov	$s2,$s2,lsr#24
924	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td2[s2>>8]
925	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td3[s2>>0]
926	ldr	$s2,[$tbl,$s2,lsl#2]	@ Td0[s2>>24]
927	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td1[s2>>16]
928	eor	$s0,$s0,$i1,ror#16
929	eor	$s1,$s1,$i2,ror#24
930	eor	$s2,$s2,$t2,ror#8
931	eor	$t3,$i3,$t3,ror#8
932
933	and	$i1,lr,$s3,lsr#16	@ i0
934	and	$i2,lr,$s3,lsr#8	@ i1
935	and	$i3,lr,$s3		@ i2
936	mov	$s3,$s3,lsr#24
937	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td1[s3>>16]
938	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td2[s3>>8]
939	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td3[s3>>0]
940	ldr	$s3,[$tbl,$s3,lsl#2]	@ Td0[s3>>24]
941	eor	$s0,$s0,$i1,ror#8
942	eor	$s1,$s1,$i2,ror#16
943	eor	$s2,$s2,$i3,ror#24
944	eor	$s3,$s3,$t3,ror#8
945
946	ldr	$t1,[$key],#16
947	ldr	$t2,[$key,#-12]
948	ldr	$t3,[$key,#-8]
949	ldr	$i1,[$key,#-4]
950	eor	$s0,$s0,$t1
951	eor	$s1,$s1,$t2
952	eor	$s2,$s2,$t3
953	eor	$s3,$s3,$i1
954
955	subs	$rounds,$rounds,#1
956	bne	.Ldec_loop
957
958	add	$tbl,$tbl,#1024
959
960	ldr	$t1,[$tbl,#0]		@ prefetch Td4
961	ldr	$t2,[$tbl,#32]
962	ldr	$t3,[$tbl,#64]
963	ldr	$i1,[$tbl,#96]
964	ldr	$i2,[$tbl,#128]
965	ldr	$i3,[$tbl,#160]
966	ldr	$t1,[$tbl,#192]
967	ldr	$t2,[$tbl,#224]
968
969	and	$i1,lr,$s0,lsr#16
970	and	$i2,lr,$s0,lsr#8
971	and	$i3,lr,$s0
972	ldrb	$s0,[$tbl,$s0,lsr#24]	@ Td4[s0>>24]
973	ldrb	$t1,[$tbl,$i1]		@ Td4[s0>>16]
974	ldrb	$t2,[$tbl,$i2]		@ Td4[s0>>8]
975	ldrb	$t3,[$tbl,$i3]		@ Td4[s0>>0]
976
977	and	$i1,lr,$s1		@ i0
978	and	$i2,lr,$s1,lsr#16
979	and	$i3,lr,$s1,lsr#8
980	ldrb	$i1,[$tbl,$i1]		@ Td4[s1>>0]
981	ldrb	$s1,[$tbl,$s1,lsr#24]	@ Td4[s1>>24]
982	ldrb	$i2,[$tbl,$i2]		@ Td4[s1>>16]
983	ldrb	$i3,[$tbl,$i3]		@ Td4[s1>>8]
984	eor	$s0,$i1,$s0,lsl#24
985	eor	$s1,$t1,$s1,lsl#8
986	eor	$t2,$t2,$i2,lsl#8
987	eor	$t3,$t3,$i3,lsl#8
988
989	and	$i1,lr,$s2,lsr#8	@ i0
990	and	$i2,lr,$s2		@ i1
991	and	$i3,lr,$s2,lsr#16
992	ldrb	$i1,[$tbl,$i1]		@ Td4[s2>>8]
993	ldrb	$i2,[$tbl,$i2]		@ Td4[s2>>0]
994	ldrb	$s2,[$tbl,$s2,lsr#24]	@ Td4[s2>>24]
995	ldrb	$i3,[$tbl,$i3]		@ Td4[s2>>16]
996	eor	$s0,$s0,$i1,lsl#8
997	eor	$s1,$i2,$s1,lsl#16
998	eor	$s2,$t2,$s2,lsl#16
999	eor	$t3,$t3,$i3,lsl#16
1000
1001	and	$i1,lr,$s3,lsr#16	@ i0
1002	and	$i2,lr,$s3,lsr#8	@ i1
1003	and	$i3,lr,$s3		@ i2
1004	ldrb	$i1,[$tbl,$i1]		@ Td4[s3>>16]
1005	ldrb	$i2,[$tbl,$i2]		@ Td4[s3>>8]
1006	ldrb	$i3,[$tbl,$i3]		@ Td4[s3>>0]
1007	ldrb	$s3,[$tbl,$s3,lsr#24]	@ Td4[s3>>24]
1008	eor	$s0,$s0,$i1,lsl#16
1009	eor	$s1,$s1,$i2,lsl#8
1010	eor	$s2,$i3,$s2,lsl#8
1011	eor	$s3,$t3,$s3,lsl#24
1012
1013	ldr	lr,[sp],#4		@ pop lr
1014	ldr	$t1,[$key,#0]
1015	ldr	$t2,[$key,#4]
1016	ldr	$t3,[$key,#8]
1017	ldr	$i1,[$key,#12]
1018	eor	$s0,$s0,$t1
1019	eor	$s1,$s1,$t2
1020	eor	$s2,$s2,$t3
1021	eor	$s3,$s3,$i1
1022
1023	sub	$tbl,$tbl,#1024
1024	mov	pc,lr			@ return
1025.size	_armv4_AES_decrypt,.-_armv4_AES_decrypt
1026.asciz	"AES for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
1027.align	2
1028___
1029
1030$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;	# make it possible to compile with -march=armv4
1031print $code;
1032