1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# AES for ARMv4
11
12# January 2007.
13#
14# Code uses single 1K S-box and is >2 times faster than code generated
15# by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which
16# allows to merge logical or arithmetic operation with shift or rotate
17# in one instruction and emit combined result every cycle. The module
18# is endian-neutral. The performance is ~42 cycles/byte for 128-bit
19# key [on single-issue Xscale PXA250 core].
20
21# May 2007.
22#
23# AES_set_[en|de]crypt_key is added.
24
25# July 2010.
26#
27# Rescheduling for dual-issue pipeline resulted in 12% improvement on
28# Cortex A8 core and ~25 cycles per byte processed with 128-bit key.
29
30# February 2011.
31#
32# Profiler-assisted and platform-specific optimization resulted in 16%
33# improvement on Cortex A8 core and ~21.5 cycles per byte.
34
35while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
36open STDOUT,">$output";
37
38$s0="r0";
39$s1="r1";
40$s2="r2";
41$s3="r3";
42$t1="r4";
43$t2="r5";
44$t3="r6";
45$i1="r7";
46$i2="r8";
47$i3="r9";
48
49$tbl="r10";
50$key="r11";
51$rounds="r12";
52
53$code=<<___;
54#include "arm_arch.h"
55.text
56.code	32
57
58.type	AES_Te,%object
59.align	5
60AES_Te:
61.word	0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d
62.word	0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554
63.word	0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d
64.word	0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a
65.word	0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87
66.word	0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b
67.word	0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea
68.word	0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b
69.word	0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a
70.word	0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f
71.word	0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108
72.word	0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f
73.word	0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e
74.word	0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5
75.word	0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d
76.word	0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f
77.word	0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e
78.word	0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb
79.word	0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce
80.word	0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497
81.word	0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c
82.word	0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed
83.word	0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b
84.word	0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a
85.word	0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16
86.word	0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594
87.word	0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81
88.word	0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3
89.word	0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a
90.word	0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504
91.word	0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163
92.word	0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d
93.word	0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f
94.word	0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739
95.word	0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47
96.word	0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395
97.word	0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f
98.word	0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883
99.word	0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c
100.word	0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76
101.word	0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e
102.word	0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4
103.word	0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6
104.word	0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b
105.word	0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7
106.word	0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0
107.word	0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25
108.word	0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818
109.word	0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72
110.word	0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651
111.word	0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21
112.word	0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85
113.word	0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa
114.word	0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12
115.word	0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0
116.word	0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9
117.word	0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133
118.word	0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7
119.word	0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920
120.word	0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a
121.word	0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17
122.word	0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8
123.word	0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11
124.word	0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
125@ Te4[256]
126.byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
127.byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
128.byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
129.byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
130.byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
131.byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
132.byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
133.byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
134.byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
135.byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
136.byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
137.byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
138.byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
139.byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
140.byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
141.byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
142.byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
143.byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
144.byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
145.byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
146.byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
147.byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
148.byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
149.byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
150.byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
151.byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
152.byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
153.byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
154.byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
155.byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
156.byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
157.byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
158@ rcon[]
159.word	0x01000000, 0x02000000, 0x04000000, 0x08000000
160.word	0x10000000, 0x20000000, 0x40000000, 0x80000000
161.word	0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
162.size	AES_Te,.-AES_Te
163
164@ void AES_encrypt(const unsigned char *in, unsigned char *out,
165@ 		 const AES_KEY *key) {
166.global AES_encrypt
167.type   AES_encrypt,%function
168.align	5
169AES_encrypt:
170	sub	r3,pc,#8		@ AES_encrypt
171	stmdb   sp!,{r1,r4-r12,lr}
172	mov	$rounds,r0		@ inp
173	mov	$key,r2
174	sub	$tbl,r3,#AES_encrypt-AES_Te	@ Te
175#if __ARM_ARCH__<7
176	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
177	ldrb	$t1,[$rounds,#2]	@ manner...
178	ldrb	$t2,[$rounds,#1]
179	ldrb	$t3,[$rounds,#0]
180	orr	$s0,$s0,$t1,lsl#8
181	ldrb	$s1,[$rounds,#7]
182	orr	$s0,$s0,$t2,lsl#16
183	ldrb	$t1,[$rounds,#6]
184	orr	$s0,$s0,$t3,lsl#24
185	ldrb	$t2,[$rounds,#5]
186	ldrb	$t3,[$rounds,#4]
187	orr	$s1,$s1,$t1,lsl#8
188	ldrb	$s2,[$rounds,#11]
189	orr	$s1,$s1,$t2,lsl#16
190	ldrb	$t1,[$rounds,#10]
191	orr	$s1,$s1,$t3,lsl#24
192	ldrb	$t2,[$rounds,#9]
193	ldrb	$t3,[$rounds,#8]
194	orr	$s2,$s2,$t1,lsl#8
195	ldrb	$s3,[$rounds,#15]
196	orr	$s2,$s2,$t2,lsl#16
197	ldrb	$t1,[$rounds,#14]
198	orr	$s2,$s2,$t3,lsl#24
199	ldrb	$t2,[$rounds,#13]
200	ldrb	$t3,[$rounds,#12]
201	orr	$s3,$s3,$t1,lsl#8
202	orr	$s3,$s3,$t2,lsl#16
203	orr	$s3,$s3,$t3,lsl#24
204#else
205	ldr	$s0,[$rounds,#0]
206	ldr	$s1,[$rounds,#4]
207	ldr	$s2,[$rounds,#8]
208	ldr	$s3,[$rounds,#12]
209#ifdef __ARMEL__
210	rev	$s0,$s0
211	rev	$s1,$s1
212	rev	$s2,$s2
213	rev	$s3,$s3
214#endif
215#endif
216	bl	_armv4_AES_encrypt
217
218	ldr	$rounds,[sp],#4		@ pop out
219#if __ARM_ARCH__>=7
220#ifdef __ARMEL__
221	rev	$s0,$s0
222	rev	$s1,$s1
223	rev	$s2,$s2
224	rev	$s3,$s3
225#endif
226	str	$s0,[$rounds,#0]
227	str	$s1,[$rounds,#4]
228	str	$s2,[$rounds,#8]
229	str	$s3,[$rounds,#12]
230#else
231	mov	$t1,$s0,lsr#24		@ write output in endian-neutral
232	mov	$t2,$s0,lsr#16		@ manner...
233	mov	$t3,$s0,lsr#8
234	strb	$t1,[$rounds,#0]
235	strb	$t2,[$rounds,#1]
236	mov	$t1,$s1,lsr#24
237	strb	$t3,[$rounds,#2]
238	mov	$t2,$s1,lsr#16
239	strb	$s0,[$rounds,#3]
240	mov	$t3,$s1,lsr#8
241	strb	$t1,[$rounds,#4]
242	strb	$t2,[$rounds,#5]
243	mov	$t1,$s2,lsr#24
244	strb	$t3,[$rounds,#6]
245	mov	$t2,$s2,lsr#16
246	strb	$s1,[$rounds,#7]
247	mov	$t3,$s2,lsr#8
248	strb	$t1,[$rounds,#8]
249	strb	$t2,[$rounds,#9]
250	mov	$t1,$s3,lsr#24
251	strb	$t3,[$rounds,#10]
252	mov	$t2,$s3,lsr#16
253	strb	$s2,[$rounds,#11]
254	mov	$t3,$s3,lsr#8
255	strb	$t1,[$rounds,#12]
256	strb	$t2,[$rounds,#13]
257	strb	$t3,[$rounds,#14]
258	strb	$s3,[$rounds,#15]
259#endif
260#if __ARM_ARCH__>=5
261	ldmia	sp!,{r4-r12,pc}
262#else
263	ldmia   sp!,{r4-r12,lr}
264	tst	lr,#1
265	moveq	pc,lr			@ be binary compatible with V4, yet
266	bx	lr			@ interoperable with Thumb ISA:-)
267#endif
268.size	AES_encrypt,.-AES_encrypt
269
270.type   _armv4_AES_encrypt,%function
271.align	2
272_armv4_AES_encrypt:
273	str	lr,[sp,#-4]!		@ push lr
274	ldmia	$key!,{$t1-$i1}
275	eor	$s0,$s0,$t1
276	ldr	$rounds,[$key,#240-16]
277	eor	$s1,$s1,$t2
278	eor	$s2,$s2,$t3
279	eor	$s3,$s3,$i1
280	sub	$rounds,$rounds,#1
281	mov	lr,#255
282
283	and	$i1,lr,$s0
284	and	$i2,lr,$s0,lsr#8
285	and	$i3,lr,$s0,lsr#16
286	mov	$s0,$s0,lsr#24
287.Lenc_loop:
288	ldr	$t1,[$tbl,$i1,lsl#2]	@ Te3[s0>>0]
289	and	$i1,lr,$s1,lsr#16	@ i0
290	ldr	$t2,[$tbl,$i2,lsl#2]	@ Te2[s0>>8]
291	and	$i2,lr,$s1
292	ldr	$t3,[$tbl,$i3,lsl#2]	@ Te1[s0>>16]
293	and	$i3,lr,$s1,lsr#8
294	ldr	$s0,[$tbl,$s0,lsl#2]	@ Te0[s0>>24]
295	mov	$s1,$s1,lsr#24
296
297	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te1[s1>>16]
298	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te3[s1>>0]
299	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te2[s1>>8]
300	eor	$s0,$s0,$i1,ror#8
301	ldr	$s1,[$tbl,$s1,lsl#2]	@ Te0[s1>>24]
302	and	$i1,lr,$s2,lsr#8	@ i0
303	eor	$t2,$t2,$i2,ror#8
304	and	$i2,lr,$s2,lsr#16	@ i1
305	eor	$t3,$t3,$i3,ror#8
306	and	$i3,lr,$s2
307	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te2[s2>>8]
308	eor	$s1,$s1,$t1,ror#24
309	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te1[s2>>16]
310	mov	$s2,$s2,lsr#24
311
312	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te3[s2>>0]
313	eor	$s0,$s0,$i1,ror#16
314	ldr	$s2,[$tbl,$s2,lsl#2]	@ Te0[s2>>24]
315	and	$i1,lr,$s3		@ i0
316	eor	$s1,$s1,$i2,ror#8
317	and	$i2,lr,$s3,lsr#8	@ i1
318	eor	$t3,$t3,$i3,ror#16
319	and	$i3,lr,$s3,lsr#16	@ i2
320	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te3[s3>>0]
321	eor	$s2,$s2,$t2,ror#16
322	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te2[s3>>8]
323	mov	$s3,$s3,lsr#24
324
325	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te1[s3>>16]
326	eor	$s0,$s0,$i1,ror#24
327	ldr	$i1,[$key],#16
328	eor	$s1,$s1,$i2,ror#16
329	ldr	$s3,[$tbl,$s3,lsl#2]	@ Te0[s3>>24]
330	eor	$s2,$s2,$i3,ror#8
331	ldr	$t1,[$key,#-12]
332	eor	$s3,$s3,$t3,ror#8
333
334	ldr	$t2,[$key,#-8]
335	eor	$s0,$s0,$i1
336	ldr	$t3,[$key,#-4]
337	and	$i1,lr,$s0
338	eor	$s1,$s1,$t1
339	and	$i2,lr,$s0,lsr#8
340	eor	$s2,$s2,$t2
341	and	$i3,lr,$s0,lsr#16
342	eor	$s3,$s3,$t3
343	mov	$s0,$s0,lsr#24
344
345	subs	$rounds,$rounds,#1
346	bne	.Lenc_loop
347
348	add	$tbl,$tbl,#2
349
350	ldrb	$t1,[$tbl,$i1,lsl#2]	@ Te4[s0>>0]
351	and	$i1,lr,$s1,lsr#16	@ i0
352	ldrb	$t2,[$tbl,$i2,lsl#2]	@ Te4[s0>>8]
353	and	$i2,lr,$s1
354	ldrb	$t3,[$tbl,$i3,lsl#2]	@ Te4[s0>>16]
355	and	$i3,lr,$s1,lsr#8
356	ldrb	$s0,[$tbl,$s0,lsl#2]	@ Te4[s0>>24]
357	mov	$s1,$s1,lsr#24
358
359	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s1>>16]
360	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s1>>0]
361	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s1>>8]
362	eor	$s0,$i1,$s0,lsl#8
363	ldrb	$s1,[$tbl,$s1,lsl#2]	@ Te4[s1>>24]
364	and	$i1,lr,$s2,lsr#8	@ i0
365	eor	$t2,$i2,$t2,lsl#8
366	and	$i2,lr,$s2,lsr#16	@ i1
367	eor	$t3,$i3,$t3,lsl#8
368	and	$i3,lr,$s2
369	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s2>>8]
370	eor	$s1,$t1,$s1,lsl#24
371	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s2>>16]
372	mov	$s2,$s2,lsr#24
373
374	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s2>>0]
375	eor	$s0,$i1,$s0,lsl#8
376	ldrb	$s2,[$tbl,$s2,lsl#2]	@ Te4[s2>>24]
377	and	$i1,lr,$s3		@ i0
378	eor	$s1,$s1,$i2,lsl#16
379	and	$i2,lr,$s3,lsr#8	@ i1
380	eor	$t3,$i3,$t3,lsl#8
381	and	$i3,lr,$s3,lsr#16	@ i2
382	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s3>>0]
383	eor	$s2,$t2,$s2,lsl#24
384	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s3>>8]
385	mov	$s3,$s3,lsr#24
386
387	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s3>>16]
388	eor	$s0,$i1,$s0,lsl#8
389	ldr	$i1,[$key,#0]
390	ldrb	$s3,[$tbl,$s3,lsl#2]	@ Te4[s3>>24]
391	eor	$s1,$s1,$i2,lsl#8
392	ldr	$t1,[$key,#4]
393	eor	$s2,$s2,$i3,lsl#16
394	ldr	$t2,[$key,#8]
395	eor	$s3,$t3,$s3,lsl#24
396	ldr	$t3,[$key,#12]
397
398	eor	$s0,$s0,$i1
399	eor	$s1,$s1,$t1
400	eor	$s2,$s2,$t2
401	eor	$s3,$s3,$t3
402
403	sub	$tbl,$tbl,#2
404	ldr	pc,[sp],#4		@ pop and return
405.size	_armv4_AES_encrypt,.-_armv4_AES_encrypt
406
407.global private_AES_set_encrypt_key
408.type   private_AES_set_encrypt_key,%function
409.align	5
410private_AES_set_encrypt_key:
411_armv4_AES_set_encrypt_key:
412	sub	r3,pc,#8		@ AES_set_encrypt_key
413	teq	r0,#0
414	moveq	r0,#-1
415	beq	.Labrt
416	teq	r2,#0
417	moveq	r0,#-1
418	beq	.Labrt
419
420	teq	r1,#128
421	beq	.Lok
422	teq	r1,#192
423	beq	.Lok
424	teq	r1,#256
425	movne	r0,#-1
426	bne	.Labrt
427
428.Lok:	stmdb   sp!,{r4-r12,lr}
429	sub	$tbl,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024	@ Te4
430
431	mov	$rounds,r0		@ inp
432	mov	lr,r1			@ bits
433	mov	$key,r2			@ key
434
435#if __ARM_ARCH__<7
436	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
437	ldrb	$t1,[$rounds,#2]	@ manner...
438	ldrb	$t2,[$rounds,#1]
439	ldrb	$t3,[$rounds,#0]
440	orr	$s0,$s0,$t1,lsl#8
441	ldrb	$s1,[$rounds,#7]
442	orr	$s0,$s0,$t2,lsl#16
443	ldrb	$t1,[$rounds,#6]
444	orr	$s0,$s0,$t3,lsl#24
445	ldrb	$t2,[$rounds,#5]
446	ldrb	$t3,[$rounds,#4]
447	orr	$s1,$s1,$t1,lsl#8
448	ldrb	$s2,[$rounds,#11]
449	orr	$s1,$s1,$t2,lsl#16
450	ldrb	$t1,[$rounds,#10]
451	orr	$s1,$s1,$t3,lsl#24
452	ldrb	$t2,[$rounds,#9]
453	ldrb	$t3,[$rounds,#8]
454	orr	$s2,$s2,$t1,lsl#8
455	ldrb	$s3,[$rounds,#15]
456	orr	$s2,$s2,$t2,lsl#16
457	ldrb	$t1,[$rounds,#14]
458	orr	$s2,$s2,$t3,lsl#24
459	ldrb	$t2,[$rounds,#13]
460	ldrb	$t3,[$rounds,#12]
461	orr	$s3,$s3,$t1,lsl#8
462	str	$s0,[$key],#16
463	orr	$s3,$s3,$t2,lsl#16
464	str	$s1,[$key,#-12]
465	orr	$s3,$s3,$t3,lsl#24
466	str	$s2,[$key,#-8]
467	str	$s3,[$key,#-4]
468#else
469	ldr	$s0,[$rounds,#0]
470	ldr	$s1,[$rounds,#4]
471	ldr	$s2,[$rounds,#8]
472	ldr	$s3,[$rounds,#12]
473#ifdef __ARMEL__
474	rev	$s0,$s0
475	rev	$s1,$s1
476	rev	$s2,$s2
477	rev	$s3,$s3
478#endif
479	str	$s0,[$key],#16
480	str	$s1,[$key,#-12]
481	str	$s2,[$key,#-8]
482	str	$s3,[$key,#-4]
483#endif
484
485	teq	lr,#128
486	bne	.Lnot128
487	mov	$rounds,#10
488	str	$rounds,[$key,#240-16]
489	add	$t3,$tbl,#256			@ rcon
490	mov	lr,#255
491
492.L128_loop:
493	and	$t2,lr,$s3,lsr#24
494	and	$i1,lr,$s3,lsr#16
495	ldrb	$t2,[$tbl,$t2]
496	and	$i2,lr,$s3,lsr#8
497	ldrb	$i1,[$tbl,$i1]
498	and	$i3,lr,$s3
499	ldrb	$i2,[$tbl,$i2]
500	orr	$t2,$t2,$i1,lsl#24
501	ldrb	$i3,[$tbl,$i3]
502	orr	$t2,$t2,$i2,lsl#16
503	ldr	$t1,[$t3],#4			@ rcon[i++]
504	orr	$t2,$t2,$i3,lsl#8
505	eor	$t2,$t2,$t1
506	eor	$s0,$s0,$t2			@ rk[4]=rk[0]^...
507	eor	$s1,$s1,$s0			@ rk[5]=rk[1]^rk[4]
508	str	$s0,[$key],#16
509	eor	$s2,$s2,$s1			@ rk[6]=rk[2]^rk[5]
510	str	$s1,[$key,#-12]
511	eor	$s3,$s3,$s2			@ rk[7]=rk[3]^rk[6]
512	str	$s2,[$key,#-8]
513	subs	$rounds,$rounds,#1
514	str	$s3,[$key,#-4]
515	bne	.L128_loop
516	sub	r2,$key,#176
517	b	.Ldone
518
519.Lnot128:
520#if __ARM_ARCH__<7
521	ldrb	$i2,[$rounds,#19]
522	ldrb	$t1,[$rounds,#18]
523	ldrb	$t2,[$rounds,#17]
524	ldrb	$t3,[$rounds,#16]
525	orr	$i2,$i2,$t1,lsl#8
526	ldrb	$i3,[$rounds,#23]
527	orr	$i2,$i2,$t2,lsl#16
528	ldrb	$t1,[$rounds,#22]
529	orr	$i2,$i2,$t3,lsl#24
530	ldrb	$t2,[$rounds,#21]
531	ldrb	$t3,[$rounds,#20]
532	orr	$i3,$i3,$t1,lsl#8
533	orr	$i3,$i3,$t2,lsl#16
534	str	$i2,[$key],#8
535	orr	$i3,$i3,$t3,lsl#24
536	str	$i3,[$key,#-4]
537#else
538	ldr	$i2,[$rounds,#16]
539	ldr	$i3,[$rounds,#20]
540#ifdef __ARMEL__
541	rev	$i2,$i2
542	rev	$i3,$i3
543#endif
544	str	$i2,[$key],#8
545	str	$i3,[$key,#-4]
546#endif
547
548	teq	lr,#192
549	bne	.Lnot192
550	mov	$rounds,#12
551	str	$rounds,[$key,#240-24]
552	add	$t3,$tbl,#256			@ rcon
553	mov	lr,#255
554	mov	$rounds,#8
555
556.L192_loop:
557	and	$t2,lr,$i3,lsr#24
558	and	$i1,lr,$i3,lsr#16
559	ldrb	$t2,[$tbl,$t2]
560	and	$i2,lr,$i3,lsr#8
561	ldrb	$i1,[$tbl,$i1]
562	and	$i3,lr,$i3
563	ldrb	$i2,[$tbl,$i2]
564	orr	$t2,$t2,$i1,lsl#24
565	ldrb	$i3,[$tbl,$i3]
566	orr	$t2,$t2,$i2,lsl#16
567	ldr	$t1,[$t3],#4			@ rcon[i++]
568	orr	$t2,$t2,$i3,lsl#8
569	eor	$i3,$t2,$t1
570	eor	$s0,$s0,$i3			@ rk[6]=rk[0]^...
571	eor	$s1,$s1,$s0			@ rk[7]=rk[1]^rk[6]
572	str	$s0,[$key],#24
573	eor	$s2,$s2,$s1			@ rk[8]=rk[2]^rk[7]
574	str	$s1,[$key,#-20]
575	eor	$s3,$s3,$s2			@ rk[9]=rk[3]^rk[8]
576	str	$s2,[$key,#-16]
577	subs	$rounds,$rounds,#1
578	str	$s3,[$key,#-12]
579	subeq	r2,$key,#216
580	beq	.Ldone
581
582	ldr	$i1,[$key,#-32]
583	ldr	$i2,[$key,#-28]
584	eor	$i1,$i1,$s3			@ rk[10]=rk[4]^rk[9]
585	eor	$i3,$i2,$i1			@ rk[11]=rk[5]^rk[10]
586	str	$i1,[$key,#-8]
587	str	$i3,[$key,#-4]
588	b	.L192_loop
589
590.Lnot192:
591#if __ARM_ARCH__<7
592	ldrb	$i2,[$rounds,#27]
593	ldrb	$t1,[$rounds,#26]
594	ldrb	$t2,[$rounds,#25]
595	ldrb	$t3,[$rounds,#24]
596	orr	$i2,$i2,$t1,lsl#8
597	ldrb	$i3,[$rounds,#31]
598	orr	$i2,$i2,$t2,lsl#16
599	ldrb	$t1,[$rounds,#30]
600	orr	$i2,$i2,$t3,lsl#24
601	ldrb	$t2,[$rounds,#29]
602	ldrb	$t3,[$rounds,#28]
603	orr	$i3,$i3,$t1,lsl#8
604	orr	$i3,$i3,$t2,lsl#16
605	str	$i2,[$key],#8
606	orr	$i3,$i3,$t3,lsl#24
607	str	$i3,[$key,#-4]
608#else
609	ldr	$i2,[$rounds,#24]
610	ldr	$i3,[$rounds,#28]
611#ifdef __ARMEL__
612	rev	$i2,$i2
613	rev	$i3,$i3
614#endif
615	str	$i2,[$key],#8
616	str	$i3,[$key,#-4]
617#endif
618
619	mov	$rounds,#14
620	str	$rounds,[$key,#240-32]
621	add	$t3,$tbl,#256			@ rcon
622	mov	lr,#255
623	mov	$rounds,#7
624
625.L256_loop:
626	and	$t2,lr,$i3,lsr#24
627	and	$i1,lr,$i3,lsr#16
628	ldrb	$t2,[$tbl,$t2]
629	and	$i2,lr,$i3,lsr#8
630	ldrb	$i1,[$tbl,$i1]
631	and	$i3,lr,$i3
632	ldrb	$i2,[$tbl,$i2]
633	orr	$t2,$t2,$i1,lsl#24
634	ldrb	$i3,[$tbl,$i3]
635	orr	$t2,$t2,$i2,lsl#16
636	ldr	$t1,[$t3],#4			@ rcon[i++]
637	orr	$t2,$t2,$i3,lsl#8
638	eor	$i3,$t2,$t1
639	eor	$s0,$s0,$i3			@ rk[8]=rk[0]^...
640	eor	$s1,$s1,$s0			@ rk[9]=rk[1]^rk[8]
641	str	$s0,[$key],#32
642	eor	$s2,$s2,$s1			@ rk[10]=rk[2]^rk[9]
643	str	$s1,[$key,#-28]
644	eor	$s3,$s3,$s2			@ rk[11]=rk[3]^rk[10]
645	str	$s2,[$key,#-24]
646	subs	$rounds,$rounds,#1
647	str	$s3,[$key,#-20]
648	subeq	r2,$key,#256
649	beq	.Ldone
650
651	and	$t2,lr,$s3
652	and	$i1,lr,$s3,lsr#8
653	ldrb	$t2,[$tbl,$t2]
654	and	$i2,lr,$s3,lsr#16
655	ldrb	$i1,[$tbl,$i1]
656	and	$i3,lr,$s3,lsr#24
657	ldrb	$i2,[$tbl,$i2]
658	orr	$t2,$t2,$i1,lsl#8
659	ldrb	$i3,[$tbl,$i3]
660	orr	$t2,$t2,$i2,lsl#16
661	ldr	$t1,[$key,#-48]
662	orr	$t2,$t2,$i3,lsl#24
663
664	ldr	$i1,[$key,#-44]
665	ldr	$i2,[$key,#-40]
666	eor	$t1,$t1,$t2			@ rk[12]=rk[4]^...
667	ldr	$i3,[$key,#-36]
668	eor	$i1,$i1,$t1			@ rk[13]=rk[5]^rk[12]
669	str	$t1,[$key,#-16]
670	eor	$i2,$i2,$i1			@ rk[14]=rk[6]^rk[13]
671	str	$i1,[$key,#-12]
672	eor	$i3,$i3,$i2			@ rk[15]=rk[7]^rk[14]
673	str	$i2,[$key,#-8]
674	str	$i3,[$key,#-4]
675	b	.L256_loop
676
677.Ldone:	mov	r0,#0
678	ldmia   sp!,{r4-r12,lr}
679.Labrt:	tst	lr,#1
680	moveq	pc,lr			@ be binary compatible with V4, yet
681	bx	lr			@ interoperable with Thumb ISA:-)
682.size	private_AES_set_encrypt_key,.-private_AES_set_encrypt_key
683
684.global private_AES_set_decrypt_key
685.type   private_AES_set_decrypt_key,%function
686.align	5
687private_AES_set_decrypt_key:
688	str	lr,[sp,#-4]!            @ push lr
689	bl	_armv4_AES_set_encrypt_key
690	teq	r0,#0
691	ldrne	lr,[sp],#4              @ pop lr
692	bne	.Labrt
693
694	stmdb   sp!,{r4-r12}
695
696	ldr	$rounds,[r2,#240]	@ AES_set_encrypt_key preserves r2,
697	mov	$key,r2			@ which is AES_KEY *key
698	mov	$i1,r2
699	add	$i2,r2,$rounds,lsl#4
700
701.Linv:	ldr	$s0,[$i1]
702	ldr	$s1,[$i1,#4]
703	ldr	$s2,[$i1,#8]
704	ldr	$s3,[$i1,#12]
705	ldr	$t1,[$i2]
706	ldr	$t2,[$i2,#4]
707	ldr	$t3,[$i2,#8]
708	ldr	$i3,[$i2,#12]
709	str	$s0,[$i2],#-16
710	str	$s1,[$i2,#16+4]
711	str	$s2,[$i2,#16+8]
712	str	$s3,[$i2,#16+12]
713	str	$t1,[$i1],#16
714	str	$t2,[$i1,#-12]
715	str	$t3,[$i1,#-8]
716	str	$i3,[$i1,#-4]
717	teq	$i1,$i2
718	bne	.Linv
719___
720$mask80=$i1;
721$mask1b=$i2;
722$mask7f=$i3;
723$code.=<<___;
724	ldr	$s0,[$key,#16]!		@ prefetch tp1
725	mov	$mask80,#0x80
726	mov	$mask1b,#0x1b
727	orr	$mask80,$mask80,#0x8000
728	orr	$mask1b,$mask1b,#0x1b00
729	orr	$mask80,$mask80,$mask80,lsl#16
730	orr	$mask1b,$mask1b,$mask1b,lsl#16
731	sub	$rounds,$rounds,#1
732	mvn	$mask7f,$mask80
733	mov	$rounds,$rounds,lsl#2	@ (rounds-1)*4
734
735.Lmix:	and	$t1,$s0,$mask80
736	and	$s1,$s0,$mask7f
737	sub	$t1,$t1,$t1,lsr#7
738	and	$t1,$t1,$mask1b
739	eor	$s1,$t1,$s1,lsl#1	@ tp2
740
741	and	$t1,$s1,$mask80
742	and	$s2,$s1,$mask7f
743	sub	$t1,$t1,$t1,lsr#7
744	and	$t1,$t1,$mask1b
745	eor	$s2,$t1,$s2,lsl#1	@ tp4
746
747	and	$t1,$s2,$mask80
748	and	$s3,$s2,$mask7f
749	sub	$t1,$t1,$t1,lsr#7
750	and	$t1,$t1,$mask1b
751	eor	$s3,$t1,$s3,lsl#1	@ tp8
752
753	eor	$t1,$s1,$s2
754	eor	$t2,$s0,$s3		@ tp9
755	eor	$t1,$t1,$s3		@ tpe
756	eor	$t1,$t1,$s1,ror#24
757	eor	$t1,$t1,$t2,ror#24	@ ^= ROTATE(tpb=tp9^tp2,8)
758	eor	$t1,$t1,$s2,ror#16
759	eor	$t1,$t1,$t2,ror#16	@ ^= ROTATE(tpd=tp9^tp4,16)
760	eor	$t1,$t1,$t2,ror#8	@ ^= ROTATE(tp9,24)
761
762	ldr	$s0,[$key,#4]		@ prefetch tp1
763	str	$t1,[$key],#4
764	subs	$rounds,$rounds,#1
765	bne	.Lmix
766
767	mov	r0,#0
768#if __ARM_ARCH__>=5
769	ldmia	sp!,{r4-r12,pc}
770#else
771	ldmia   sp!,{r4-r12,lr}
772	tst	lr,#1
773	moveq	pc,lr			@ be binary compatible with V4, yet
774	bx	lr			@ interoperable with Thumb ISA:-)
775#endif
776.size	private_AES_set_decrypt_key,.-private_AES_set_decrypt_key
777
778.type	AES_Td,%object
779.align	5
780AES_Td:
781.word	0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96
782.word	0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393
783.word	0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25
784.word	0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f
785.word	0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1
786.word	0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6
787.word	0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da
788.word	0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844
789.word	0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd
790.word	0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4
791.word	0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45
792.word	0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94
793.word	0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7
794.word	0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a
795.word	0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5
796.word	0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c
797.word	0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1
798.word	0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a
799.word	0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75
800.word	0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051
801.word	0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46
802.word	0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff
803.word	0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77
804.word	0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb
805.word	0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000
806.word	0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e
807.word	0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927
808.word	0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a
809.word	0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e
810.word	0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16
811.word	0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d
812.word	0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8
813.word	0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd
814.word	0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34
815.word	0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163
816.word	0xd731dcca, 0x42638510, 0x13972240, 0x84c61120
817.word	0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d
818.word	0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0
819.word	0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422
820.word	0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef
821.word	0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36
822.word	0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4
823.word	0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662
824.word	0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5
825.word	0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3
826.word	0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b
827.word	0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8
828.word	0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6
829.word	0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6
830.word	0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0
831.word	0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815
832.word	0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f
833.word	0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df
834.word	0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f
835.word	0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e
836.word	0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713
837.word	0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89
838.word	0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c
839.word	0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf
840.word	0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86
841.word	0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f
842.word	0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541
843.word	0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190
844.word	0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
845@ Td4[256]
846.byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
847.byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
848.byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
849.byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
850.byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
851.byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
852.byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
853.byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
854.byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
855.byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
856.byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
857.byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
858.byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
859.byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
860.byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
861.byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
862.byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
863.byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
864.byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
865.byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
866.byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
867.byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
868.byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
869.byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
870.byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
871.byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
872.byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
873.byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
874.byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
875.byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
876.byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
877.byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
878.size	AES_Td,.-AES_Td
879
880@ void AES_decrypt(const unsigned char *in, unsigned char *out,
881@ 		 const AES_KEY *key) {
882.global AES_decrypt
883.type   AES_decrypt,%function
884.align	5
885AES_decrypt:
886	sub	r3,pc,#8		@ AES_decrypt
887	stmdb   sp!,{r1,r4-r12,lr}
888	mov	$rounds,r0		@ inp
889	mov	$key,r2
890	sub	$tbl,r3,#AES_decrypt-AES_Td		@ Td
891#if __ARM_ARCH__<7
892	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
893	ldrb	$t1,[$rounds,#2]	@ manner...
894	ldrb	$t2,[$rounds,#1]
895	ldrb	$t3,[$rounds,#0]
896	orr	$s0,$s0,$t1,lsl#8
897	ldrb	$s1,[$rounds,#7]
898	orr	$s0,$s0,$t2,lsl#16
899	ldrb	$t1,[$rounds,#6]
900	orr	$s0,$s0,$t3,lsl#24
901	ldrb	$t2,[$rounds,#5]
902	ldrb	$t3,[$rounds,#4]
903	orr	$s1,$s1,$t1,lsl#8
904	ldrb	$s2,[$rounds,#11]
905	orr	$s1,$s1,$t2,lsl#16
906	ldrb	$t1,[$rounds,#10]
907	orr	$s1,$s1,$t3,lsl#24
908	ldrb	$t2,[$rounds,#9]
909	ldrb	$t3,[$rounds,#8]
910	orr	$s2,$s2,$t1,lsl#8
911	ldrb	$s3,[$rounds,#15]
912	orr	$s2,$s2,$t2,lsl#16
913	ldrb	$t1,[$rounds,#14]
914	orr	$s2,$s2,$t3,lsl#24
915	ldrb	$t2,[$rounds,#13]
916	ldrb	$t3,[$rounds,#12]
917	orr	$s3,$s3,$t1,lsl#8
918	orr	$s3,$s3,$t2,lsl#16
919	orr	$s3,$s3,$t3,lsl#24
920#else
921	ldr	$s0,[$rounds,#0]
922	ldr	$s1,[$rounds,#4]
923	ldr	$s2,[$rounds,#8]
924	ldr	$s3,[$rounds,#12]
925#ifdef __ARMEL__
926	rev	$s0,$s0
927	rev	$s1,$s1
928	rev	$s2,$s2
929	rev	$s3,$s3
930#endif
931#endif
932	bl	_armv4_AES_decrypt
933
934	ldr	$rounds,[sp],#4		@ pop out
935#if __ARM_ARCH__>=7
936#ifdef __ARMEL__
937	rev	$s0,$s0
938	rev	$s1,$s1
939	rev	$s2,$s2
940	rev	$s3,$s3
941#endif
942	str	$s0,[$rounds,#0]
943	str	$s1,[$rounds,#4]
944	str	$s2,[$rounds,#8]
945	str	$s3,[$rounds,#12]
946#else
947	mov	$t1,$s0,lsr#24		@ write output in endian-neutral
948	mov	$t2,$s0,lsr#16		@ manner...
949	mov	$t3,$s0,lsr#8
950	strb	$t1,[$rounds,#0]
951	strb	$t2,[$rounds,#1]
952	mov	$t1,$s1,lsr#24
953	strb	$t3,[$rounds,#2]
954	mov	$t2,$s1,lsr#16
955	strb	$s0,[$rounds,#3]
956	mov	$t3,$s1,lsr#8
957	strb	$t1,[$rounds,#4]
958	strb	$t2,[$rounds,#5]
959	mov	$t1,$s2,lsr#24
960	strb	$t3,[$rounds,#6]
961	mov	$t2,$s2,lsr#16
962	strb	$s1,[$rounds,#7]
963	mov	$t3,$s2,lsr#8
964	strb	$t1,[$rounds,#8]
965	strb	$t2,[$rounds,#9]
966	mov	$t1,$s3,lsr#24
967	strb	$t3,[$rounds,#10]
968	mov	$t2,$s3,lsr#16
969	strb	$s2,[$rounds,#11]
970	mov	$t3,$s3,lsr#8
971	strb	$t1,[$rounds,#12]
972	strb	$t2,[$rounds,#13]
973	strb	$t3,[$rounds,#14]
974	strb	$s3,[$rounds,#15]
975#endif
976#if __ARM_ARCH__>=5
977	ldmia	sp!,{r4-r12,pc}
978#else
979	ldmia   sp!,{r4-r12,lr}
980	tst	lr,#1
981	moveq	pc,lr			@ be binary compatible with V4, yet
982	bx	lr			@ interoperable with Thumb ISA:-)
983#endif
984.size	AES_decrypt,.-AES_decrypt
985
986.type   _armv4_AES_decrypt,%function
987.align	2
988_armv4_AES_decrypt:
989	str	lr,[sp,#-4]!		@ push lr
990	ldmia	$key!,{$t1-$i1}
991	eor	$s0,$s0,$t1
992	ldr	$rounds,[$key,#240-16]
993	eor	$s1,$s1,$t2
994	eor	$s2,$s2,$t3
995	eor	$s3,$s3,$i1
996	sub	$rounds,$rounds,#1
997	mov	lr,#255
998
999	and	$i1,lr,$s0,lsr#16
1000	and	$i2,lr,$s0,lsr#8
1001	and	$i3,lr,$s0
1002	mov	$s0,$s0,lsr#24
1003.Ldec_loop:
1004	ldr	$t1,[$tbl,$i1,lsl#2]	@ Td1[s0>>16]
1005	and	$i1,lr,$s1		@ i0
1006	ldr	$t2,[$tbl,$i2,lsl#2]	@ Td2[s0>>8]
1007	and	$i2,lr,$s1,lsr#16
1008	ldr	$t3,[$tbl,$i3,lsl#2]	@ Td3[s0>>0]
1009	and	$i3,lr,$s1,lsr#8
1010	ldr	$s0,[$tbl,$s0,lsl#2]	@ Td0[s0>>24]
1011	mov	$s1,$s1,lsr#24
1012
1013	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td3[s1>>0]
1014	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td1[s1>>16]
1015	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td2[s1>>8]
1016	eor	$s0,$s0,$i1,ror#24
1017	ldr	$s1,[$tbl,$s1,lsl#2]	@ Td0[s1>>24]
1018	and	$i1,lr,$s2,lsr#8	@ i0
1019	eor	$t2,$i2,$t2,ror#8
1020	and	$i2,lr,$s2		@ i1
1021	eor	$t3,$i3,$t3,ror#8
1022	and	$i3,lr,$s2,lsr#16
1023	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td2[s2>>8]
1024	eor	$s1,$s1,$t1,ror#8
1025	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td3[s2>>0]
1026	mov	$s2,$s2,lsr#24
1027
1028	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td1[s2>>16]
1029	eor	$s0,$s0,$i1,ror#16
1030	ldr	$s2,[$tbl,$s2,lsl#2]	@ Td0[s2>>24]
1031	and	$i1,lr,$s3,lsr#16	@ i0
1032	eor	$s1,$s1,$i2,ror#24
1033	and	$i2,lr,$s3,lsr#8	@ i1
1034	eor	$t3,$i3,$t3,ror#8
1035	and	$i3,lr,$s3		@ i2
1036	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td1[s3>>16]
1037	eor	$s2,$s2,$t2,ror#8
1038	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td2[s3>>8]
1039	mov	$s3,$s3,lsr#24
1040
1041	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td3[s3>>0]
1042	eor	$s0,$s0,$i1,ror#8
1043	ldr	$i1,[$key],#16
1044	eor	$s1,$s1,$i2,ror#16
1045	ldr	$s3,[$tbl,$s3,lsl#2]	@ Td0[s3>>24]
1046	eor	$s2,$s2,$i3,ror#24
1047
1048	ldr	$t1,[$key,#-12]
1049	eor	$s0,$s0,$i1
1050	ldr	$t2,[$key,#-8]
1051	eor	$s3,$s3,$t3,ror#8
1052	ldr	$t3,[$key,#-4]
1053	and	$i1,lr,$s0,lsr#16
1054	eor	$s1,$s1,$t1
1055	and	$i2,lr,$s0,lsr#8
1056	eor	$s2,$s2,$t2
1057	and	$i3,lr,$s0
1058	eor	$s3,$s3,$t3
1059	mov	$s0,$s0,lsr#24
1060
1061	subs	$rounds,$rounds,#1
1062	bne	.Ldec_loop
1063
1064	add	$tbl,$tbl,#1024
1065
1066	ldr	$t2,[$tbl,#0]		@ prefetch Td4
1067	ldr	$t3,[$tbl,#32]
1068	ldr	$t1,[$tbl,#64]
1069	ldr	$t2,[$tbl,#96]
1070	ldr	$t3,[$tbl,#128]
1071	ldr	$t1,[$tbl,#160]
1072	ldr	$t2,[$tbl,#192]
1073	ldr	$t3,[$tbl,#224]
1074
1075	ldrb	$s0,[$tbl,$s0]		@ Td4[s0>>24]
1076	ldrb	$t1,[$tbl,$i1]		@ Td4[s0>>16]
1077	and	$i1,lr,$s1		@ i0
1078	ldrb	$t2,[$tbl,$i2]		@ Td4[s0>>8]
1079	and	$i2,lr,$s1,lsr#16
1080	ldrb	$t3,[$tbl,$i3]		@ Td4[s0>>0]
1081	and	$i3,lr,$s1,lsr#8
1082
1083	ldrb	$i1,[$tbl,$i1]		@ Td4[s1>>0]
1084	ldrb	$s1,[$tbl,$s1,lsr#24]	@ Td4[s1>>24]
1085	ldrb	$i2,[$tbl,$i2]		@ Td4[s1>>16]
1086	eor	$s0,$i1,$s0,lsl#24
1087	ldrb	$i3,[$tbl,$i3]		@ Td4[s1>>8]
1088	eor	$s1,$t1,$s1,lsl#8
1089	and	$i1,lr,$s2,lsr#8	@ i0
1090	eor	$t2,$t2,$i2,lsl#8
1091	and	$i2,lr,$s2		@ i1
1092	ldrb	$i1,[$tbl,$i1]		@ Td4[s2>>8]
1093	eor	$t3,$t3,$i3,lsl#8
1094	ldrb	$i2,[$tbl,$i2]		@ Td4[s2>>0]
1095	and	$i3,lr,$s2,lsr#16
1096
1097	ldrb	$s2,[$tbl,$s2,lsr#24]	@ Td4[s2>>24]
1098	eor	$s0,$s0,$i1,lsl#8
1099	ldrb	$i3,[$tbl,$i3]		@ Td4[s2>>16]
1100	eor	$s1,$i2,$s1,lsl#16
1101	and	$i1,lr,$s3,lsr#16	@ i0
1102	eor	$s2,$t2,$s2,lsl#16
1103	and	$i2,lr,$s3,lsr#8	@ i1
1104	ldrb	$i1,[$tbl,$i1]		@ Td4[s3>>16]
1105	eor	$t3,$t3,$i3,lsl#16
1106	ldrb	$i2,[$tbl,$i2]		@ Td4[s3>>8]
1107	and	$i3,lr,$s3		@ i2
1108
1109	ldrb	$i3,[$tbl,$i3]		@ Td4[s3>>0]
1110	ldrb	$s3,[$tbl,$s3,lsr#24]	@ Td4[s3>>24]
1111	eor	$s0,$s0,$i1,lsl#16
1112	ldr	$i1,[$key,#0]
1113	eor	$s1,$s1,$i2,lsl#8
1114	ldr	$t1,[$key,#4]
1115	eor	$s2,$i3,$s2,lsl#8
1116	ldr	$t2,[$key,#8]
1117	eor	$s3,$t3,$s3,lsl#24
1118	ldr	$t3,[$key,#12]
1119
1120	eor	$s0,$s0,$i1
1121	eor	$s1,$s1,$t1
1122	eor	$s2,$s2,$t2
1123	eor	$s3,$s3,$t3
1124
1125	sub	$tbl,$tbl,#1024
1126	ldr	pc,[sp],#4		@ pop and return
1127.size	_armv4_AES_decrypt,.-_armv4_AES_decrypt
1128.asciz	"AES for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
1129.align	2
1130___
1131
1132$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;	# make it possible to compile with -march=armv4
1133print $code;
1134close STDOUT;	# enforce flush
1135