1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# AES for ARMv4
11
12# January 2007.
13#
14# Code uses single 1K S-box and is >2 times faster than code generated
15# by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which
16# allows to merge logical or arithmetic operation with shift or rotate
17# in one instruction and emit combined result every cycle. The module
18# is endian-neutral. The performance is ~42 cycles/byte for 128-bit
19# key [on single-issue Xscale PXA250 core].
20
21# May 2007.
22#
23# AES_set_[en|de]crypt_key is added.
24
25# July 2010.
26#
27# Rescheduling for dual-issue pipeline resulted in 12% improvement on
28# Cortex A8 core and ~25 cycles per byte processed with 128-bit key.
29
30# February 2011.
31#
32# Profiler-assisted and platform-specific optimization resulted in 16%
33# improvement on Cortex A8 core and ~21.5 cycles per byte.
34
35while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
36open STDOUT,">$output";
37
38$s0="r0";
39$s1="r1";
40$s2="r2";
41$s3="r3";
42$t1="r4";
43$t2="r5";
44$t3="r6";
45$i1="r7";
46$i2="r8";
47$i3="r9";
48
49$tbl="r10";
50$key="r11";
51$rounds="r12";
52
53$code=<<___;
54#ifndef __KERNEL__
55# include "arm_arch.h"
56#else
57# define __ARM_ARCH__ __LINUX_ARM_ARCH__
58#endif
59
60.text
61#if __ARM_ARCH__<7
62.code	32
63#else
64.syntax	unified
65# ifdef __thumb2__
66.thumb
67# else
68.code	32
69# endif
70#endif
71
72.type	AES_Te,%object
73.align	5
74AES_Te:
75.word	0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d
76.word	0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554
77.word	0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d
78.word	0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a
79.word	0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87
80.word	0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b
81.word	0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea
82.word	0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b
83.word	0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a
84.word	0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f
85.word	0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108
86.word	0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f
87.word	0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e
88.word	0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5
89.word	0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d
90.word	0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f
91.word	0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e
92.word	0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb
93.word	0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce
94.word	0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497
95.word	0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c
96.word	0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed
97.word	0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b
98.word	0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a
99.word	0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16
100.word	0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594
101.word	0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81
102.word	0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3
103.word	0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a
104.word	0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504
105.word	0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163
106.word	0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d
107.word	0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f
108.word	0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739
109.word	0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47
110.word	0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395
111.word	0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f
112.word	0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883
113.word	0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c
114.word	0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76
115.word	0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e
116.word	0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4
117.word	0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6
118.word	0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b
119.word	0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7
120.word	0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0
121.word	0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25
122.word	0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818
123.word	0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72
124.word	0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651
125.word	0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21
126.word	0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85
127.word	0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa
128.word	0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12
129.word	0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0
130.word	0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9
131.word	0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133
132.word	0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7
133.word	0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920
134.word	0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a
135.word	0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17
136.word	0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8
137.word	0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11
138.word	0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
139@ Te4[256]
140.byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
141.byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
142.byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
143.byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
144.byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
145.byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
146.byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
147.byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
148.byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
149.byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
150.byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
151.byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
152.byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
153.byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
154.byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
155.byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
156.byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
157.byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
158.byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
159.byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
160.byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
161.byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
162.byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
163.byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
164.byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
165.byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
166.byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
167.byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
168.byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
169.byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
170.byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
171.byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
172@ rcon[]
173.word	0x01000000, 0x02000000, 0x04000000, 0x08000000
174.word	0x10000000, 0x20000000, 0x40000000, 0x80000000
175.word	0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
176.size	AES_Te,.-AES_Te
177
178@ void AES_encrypt(const unsigned char *in, unsigned char *out,
179@ 		 const AES_KEY *key) {
180.global AES_encrypt
181.type   AES_encrypt,%function
182.align	5
183AES_encrypt:
184#if __ARM_ARCH__<7
185	sub	r3,pc,#8		@ AES_encrypt
186#else
187	adr	r3,AES_encrypt
188#endif
189	stmdb   sp!,{r1,r4-r12,lr}
190	mov	$rounds,r0		@ inp
191	mov	$key,r2
192	sub	$tbl,r3,#AES_encrypt-AES_Te	@ Te
193#if __ARM_ARCH__<7
194	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
195	ldrb	$t1,[$rounds,#2]	@ manner...
196	ldrb	$t2,[$rounds,#1]
197	ldrb	$t3,[$rounds,#0]
198	orr	$s0,$s0,$t1,lsl#8
199	ldrb	$s1,[$rounds,#7]
200	orr	$s0,$s0,$t2,lsl#16
201	ldrb	$t1,[$rounds,#6]
202	orr	$s0,$s0,$t3,lsl#24
203	ldrb	$t2,[$rounds,#5]
204	ldrb	$t3,[$rounds,#4]
205	orr	$s1,$s1,$t1,lsl#8
206	ldrb	$s2,[$rounds,#11]
207	orr	$s1,$s1,$t2,lsl#16
208	ldrb	$t1,[$rounds,#10]
209	orr	$s1,$s1,$t3,lsl#24
210	ldrb	$t2,[$rounds,#9]
211	ldrb	$t3,[$rounds,#8]
212	orr	$s2,$s2,$t1,lsl#8
213	ldrb	$s3,[$rounds,#15]
214	orr	$s2,$s2,$t2,lsl#16
215	ldrb	$t1,[$rounds,#14]
216	orr	$s2,$s2,$t3,lsl#24
217	ldrb	$t2,[$rounds,#13]
218	ldrb	$t3,[$rounds,#12]
219	orr	$s3,$s3,$t1,lsl#8
220	orr	$s3,$s3,$t2,lsl#16
221	orr	$s3,$s3,$t3,lsl#24
222#else
223	ldr	$s0,[$rounds,#0]
224	ldr	$s1,[$rounds,#4]
225	ldr	$s2,[$rounds,#8]
226	ldr	$s3,[$rounds,#12]
227#ifdef __ARMEL__
228	rev	$s0,$s0
229	rev	$s1,$s1
230	rev	$s2,$s2
231	rev	$s3,$s3
232#endif
233#endif
234	bl	_armv4_AES_encrypt
235
236	ldr	$rounds,[sp],#4		@ pop out
237#if __ARM_ARCH__>=7
238#ifdef __ARMEL__
239	rev	$s0,$s0
240	rev	$s1,$s1
241	rev	$s2,$s2
242	rev	$s3,$s3
243#endif
244	str	$s0,[$rounds,#0]
245	str	$s1,[$rounds,#4]
246	str	$s2,[$rounds,#8]
247	str	$s3,[$rounds,#12]
248#else
249	mov	$t1,$s0,lsr#24		@ write output in endian-neutral
250	mov	$t2,$s0,lsr#16		@ manner...
251	mov	$t3,$s0,lsr#8
252	strb	$t1,[$rounds,#0]
253	strb	$t2,[$rounds,#1]
254	mov	$t1,$s1,lsr#24
255	strb	$t3,[$rounds,#2]
256	mov	$t2,$s1,lsr#16
257	strb	$s0,[$rounds,#3]
258	mov	$t3,$s1,lsr#8
259	strb	$t1,[$rounds,#4]
260	strb	$t2,[$rounds,#5]
261	mov	$t1,$s2,lsr#24
262	strb	$t3,[$rounds,#6]
263	mov	$t2,$s2,lsr#16
264	strb	$s1,[$rounds,#7]
265	mov	$t3,$s2,lsr#8
266	strb	$t1,[$rounds,#8]
267	strb	$t2,[$rounds,#9]
268	mov	$t1,$s3,lsr#24
269	strb	$t3,[$rounds,#10]
270	mov	$t2,$s3,lsr#16
271	strb	$s2,[$rounds,#11]
272	mov	$t3,$s3,lsr#8
273	strb	$t1,[$rounds,#12]
274	strb	$t2,[$rounds,#13]
275	strb	$t3,[$rounds,#14]
276	strb	$s3,[$rounds,#15]
277#endif
278#if __ARM_ARCH__>=5
279	ldmia	sp!,{r4-r12,pc}
280#else
281	ldmia   sp!,{r4-r12,lr}
282	tst	lr,#1
283	moveq	pc,lr			@ be binary compatible with V4, yet
284	bx	lr			@ interoperable with Thumb ISA:-)
285#endif
286.size	AES_encrypt,.-AES_encrypt
287
288.type   _armv4_AES_encrypt,%function
289.align	2
290_armv4_AES_encrypt:
291	str	lr,[sp,#-4]!		@ push lr
292	ldmia	$key!,{$t1-$i1}
293	eor	$s0,$s0,$t1
294	ldr	$rounds,[$key,#240-16]
295	eor	$s1,$s1,$t2
296	eor	$s2,$s2,$t3
297	eor	$s3,$s3,$i1
298	sub	$rounds,$rounds,#1
299	mov	lr,#255
300
301	and	$i1,lr,$s0
302	and	$i2,lr,$s0,lsr#8
303	and	$i3,lr,$s0,lsr#16
304	mov	$s0,$s0,lsr#24
305.Lenc_loop:
306	ldr	$t1,[$tbl,$i1,lsl#2]	@ Te3[s0>>0]
307	and	$i1,lr,$s1,lsr#16	@ i0
308	ldr	$t2,[$tbl,$i2,lsl#2]	@ Te2[s0>>8]
309	and	$i2,lr,$s1
310	ldr	$t3,[$tbl,$i3,lsl#2]	@ Te1[s0>>16]
311	and	$i3,lr,$s1,lsr#8
312	ldr	$s0,[$tbl,$s0,lsl#2]	@ Te0[s0>>24]
313	mov	$s1,$s1,lsr#24
314
315	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te1[s1>>16]
316	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te3[s1>>0]
317	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te2[s1>>8]
318	eor	$s0,$s0,$i1,ror#8
319	ldr	$s1,[$tbl,$s1,lsl#2]	@ Te0[s1>>24]
320	and	$i1,lr,$s2,lsr#8	@ i0
321	eor	$t2,$t2,$i2,ror#8
322	and	$i2,lr,$s2,lsr#16	@ i1
323	eor	$t3,$t3,$i3,ror#8
324	and	$i3,lr,$s2
325	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te2[s2>>8]
326	eor	$s1,$s1,$t1,ror#24
327	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te1[s2>>16]
328	mov	$s2,$s2,lsr#24
329
330	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te3[s2>>0]
331	eor	$s0,$s0,$i1,ror#16
332	ldr	$s2,[$tbl,$s2,lsl#2]	@ Te0[s2>>24]
333	and	$i1,lr,$s3		@ i0
334	eor	$s1,$s1,$i2,ror#8
335	and	$i2,lr,$s3,lsr#8	@ i1
336	eor	$t3,$t3,$i3,ror#16
337	and	$i3,lr,$s3,lsr#16	@ i2
338	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te3[s3>>0]
339	eor	$s2,$s2,$t2,ror#16
340	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te2[s3>>8]
341	mov	$s3,$s3,lsr#24
342
343	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te1[s3>>16]
344	eor	$s0,$s0,$i1,ror#24
345	ldr	$i1,[$key],#16
346	eor	$s1,$s1,$i2,ror#16
347	ldr	$s3,[$tbl,$s3,lsl#2]	@ Te0[s3>>24]
348	eor	$s2,$s2,$i3,ror#8
349	ldr	$t1,[$key,#-12]
350	eor	$s3,$s3,$t3,ror#8
351
352	ldr	$t2,[$key,#-8]
353	eor	$s0,$s0,$i1
354	ldr	$t3,[$key,#-4]
355	and	$i1,lr,$s0
356	eor	$s1,$s1,$t1
357	and	$i2,lr,$s0,lsr#8
358	eor	$s2,$s2,$t2
359	and	$i3,lr,$s0,lsr#16
360	eor	$s3,$s3,$t3
361	mov	$s0,$s0,lsr#24
362
363	subs	$rounds,$rounds,#1
364	bne	.Lenc_loop
365
366	add	$tbl,$tbl,#2
367
368	ldrb	$t1,[$tbl,$i1,lsl#2]	@ Te4[s0>>0]
369	and	$i1,lr,$s1,lsr#16	@ i0
370	ldrb	$t2,[$tbl,$i2,lsl#2]	@ Te4[s0>>8]
371	and	$i2,lr,$s1
372	ldrb	$t3,[$tbl,$i3,lsl#2]	@ Te4[s0>>16]
373	and	$i3,lr,$s1,lsr#8
374	ldrb	$s0,[$tbl,$s0,lsl#2]	@ Te4[s0>>24]
375	mov	$s1,$s1,lsr#24
376
377	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s1>>16]
378	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s1>>0]
379	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s1>>8]
380	eor	$s0,$i1,$s0,lsl#8
381	ldrb	$s1,[$tbl,$s1,lsl#2]	@ Te4[s1>>24]
382	and	$i1,lr,$s2,lsr#8	@ i0
383	eor	$t2,$i2,$t2,lsl#8
384	and	$i2,lr,$s2,lsr#16	@ i1
385	eor	$t3,$i3,$t3,lsl#8
386	and	$i3,lr,$s2
387	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s2>>8]
388	eor	$s1,$t1,$s1,lsl#24
389	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s2>>16]
390	mov	$s2,$s2,lsr#24
391
392	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s2>>0]
393	eor	$s0,$i1,$s0,lsl#8
394	ldrb	$s2,[$tbl,$s2,lsl#2]	@ Te4[s2>>24]
395	and	$i1,lr,$s3		@ i0
396	eor	$s1,$s1,$i2,lsl#16
397	and	$i2,lr,$s3,lsr#8	@ i1
398	eor	$t3,$i3,$t3,lsl#8
399	and	$i3,lr,$s3,lsr#16	@ i2
400	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s3>>0]
401	eor	$s2,$t2,$s2,lsl#24
402	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s3>>8]
403	mov	$s3,$s3,lsr#24
404
405	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s3>>16]
406	eor	$s0,$i1,$s0,lsl#8
407	ldr	$i1,[$key,#0]
408	ldrb	$s3,[$tbl,$s3,lsl#2]	@ Te4[s3>>24]
409	eor	$s1,$s1,$i2,lsl#8
410	ldr	$t1,[$key,#4]
411	eor	$s2,$s2,$i3,lsl#16
412	ldr	$t2,[$key,#8]
413	eor	$s3,$t3,$s3,lsl#24
414	ldr	$t3,[$key,#12]
415
416	eor	$s0,$s0,$i1
417	eor	$s1,$s1,$t1
418	eor	$s2,$s2,$t2
419	eor	$s3,$s3,$t3
420
421	sub	$tbl,$tbl,#2
422	ldr	pc,[sp],#4		@ pop and return
423.size	_armv4_AES_encrypt,.-_armv4_AES_encrypt
424
425.global private_AES_set_encrypt_key
426.type   private_AES_set_encrypt_key,%function
427.align	5
428private_AES_set_encrypt_key:
429_armv4_AES_set_encrypt_key:
430#if __ARM_ARCH__<7
431	sub	r3,pc,#8		@ AES_set_encrypt_key
432#else
433	adr	r3,private_AES_set_encrypt_key
434#endif
435	teq	r0,#0
436#if __ARM_ARCH__>=7
437	itt	eq			@ Thumb2 thing, sanity check in ARM
438#endif
439	moveq	r0,#-1
440	beq	.Labrt
441	teq	r2,#0
442#if __ARM_ARCH__>=7
443	itt	eq			@ Thumb2 thing, sanity check in ARM
444#endif
445	moveq	r0,#-1
446	beq	.Labrt
447
448	teq	r1,#128
449	beq	.Lok
450	teq	r1,#192
451	beq	.Lok
452	teq	r1,#256
453#if __ARM_ARCH__>=7
454	itt	ne			@ Thumb2 thing, sanity check in ARM
455#endif
456	movne	r0,#-1
457	bne	.Labrt
458
459.Lok:	stmdb   sp!,{r4-r12,lr}
460	sub	$tbl,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024	@ Te4
461
462	mov	$rounds,r0		@ inp
463	mov	lr,r1			@ bits
464	mov	$key,r2			@ key
465
466#if __ARM_ARCH__<7
467	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
468	ldrb	$t1,[$rounds,#2]	@ manner...
469	ldrb	$t2,[$rounds,#1]
470	ldrb	$t3,[$rounds,#0]
471	orr	$s0,$s0,$t1,lsl#8
472	ldrb	$s1,[$rounds,#7]
473	orr	$s0,$s0,$t2,lsl#16
474	ldrb	$t1,[$rounds,#6]
475	orr	$s0,$s0,$t3,lsl#24
476	ldrb	$t2,[$rounds,#5]
477	ldrb	$t3,[$rounds,#4]
478	orr	$s1,$s1,$t1,lsl#8
479	ldrb	$s2,[$rounds,#11]
480	orr	$s1,$s1,$t2,lsl#16
481	ldrb	$t1,[$rounds,#10]
482	orr	$s1,$s1,$t3,lsl#24
483	ldrb	$t2,[$rounds,#9]
484	ldrb	$t3,[$rounds,#8]
485	orr	$s2,$s2,$t1,lsl#8
486	ldrb	$s3,[$rounds,#15]
487	orr	$s2,$s2,$t2,lsl#16
488	ldrb	$t1,[$rounds,#14]
489	orr	$s2,$s2,$t3,lsl#24
490	ldrb	$t2,[$rounds,#13]
491	ldrb	$t3,[$rounds,#12]
492	orr	$s3,$s3,$t1,lsl#8
493	str	$s0,[$key],#16
494	orr	$s3,$s3,$t2,lsl#16
495	str	$s1,[$key,#-12]
496	orr	$s3,$s3,$t3,lsl#24
497	str	$s2,[$key,#-8]
498	str	$s3,[$key,#-4]
499#else
500	ldr	$s0,[$rounds,#0]
501	ldr	$s1,[$rounds,#4]
502	ldr	$s2,[$rounds,#8]
503	ldr	$s3,[$rounds,#12]
504#ifdef __ARMEL__
505	rev	$s0,$s0
506	rev	$s1,$s1
507	rev	$s2,$s2
508	rev	$s3,$s3
509#endif
510	str	$s0,[$key],#16
511	str	$s1,[$key,#-12]
512	str	$s2,[$key,#-8]
513	str	$s3,[$key,#-4]
514#endif
515
516	teq	lr,#128
517	bne	.Lnot128
518	mov	$rounds,#10
519	str	$rounds,[$key,#240-16]
520	add	$t3,$tbl,#256			@ rcon
521	mov	lr,#255
522
523.L128_loop:
524	and	$t2,lr,$s3,lsr#24
525	and	$i1,lr,$s3,lsr#16
526	ldrb	$t2,[$tbl,$t2]
527	and	$i2,lr,$s3,lsr#8
528	ldrb	$i1,[$tbl,$i1]
529	and	$i3,lr,$s3
530	ldrb	$i2,[$tbl,$i2]
531	orr	$t2,$t2,$i1,lsl#24
532	ldrb	$i3,[$tbl,$i3]
533	orr	$t2,$t2,$i2,lsl#16
534	ldr	$t1,[$t3],#4			@ rcon[i++]
535	orr	$t2,$t2,$i3,lsl#8
536	eor	$t2,$t2,$t1
537	eor	$s0,$s0,$t2			@ rk[4]=rk[0]^...
538	eor	$s1,$s1,$s0			@ rk[5]=rk[1]^rk[4]
539	str	$s0,[$key],#16
540	eor	$s2,$s2,$s1			@ rk[6]=rk[2]^rk[5]
541	str	$s1,[$key,#-12]
542	eor	$s3,$s3,$s2			@ rk[7]=rk[3]^rk[6]
543	str	$s2,[$key,#-8]
544	subs	$rounds,$rounds,#1
545	str	$s3,[$key,#-4]
546	bne	.L128_loop
547	sub	r2,$key,#176
548	b	.Ldone
549
550.Lnot128:
551#if __ARM_ARCH__<7
552	ldrb	$i2,[$rounds,#19]
553	ldrb	$t1,[$rounds,#18]
554	ldrb	$t2,[$rounds,#17]
555	ldrb	$t3,[$rounds,#16]
556	orr	$i2,$i2,$t1,lsl#8
557	ldrb	$i3,[$rounds,#23]
558	orr	$i2,$i2,$t2,lsl#16
559	ldrb	$t1,[$rounds,#22]
560	orr	$i2,$i2,$t3,lsl#24
561	ldrb	$t2,[$rounds,#21]
562	ldrb	$t3,[$rounds,#20]
563	orr	$i3,$i3,$t1,lsl#8
564	orr	$i3,$i3,$t2,lsl#16
565	str	$i2,[$key],#8
566	orr	$i3,$i3,$t3,lsl#24
567	str	$i3,[$key,#-4]
568#else
569	ldr	$i2,[$rounds,#16]
570	ldr	$i3,[$rounds,#20]
571#ifdef __ARMEL__
572	rev	$i2,$i2
573	rev	$i3,$i3
574#endif
575	str	$i2,[$key],#8
576	str	$i3,[$key,#-4]
577#endif
578
579	teq	lr,#192
580	bne	.Lnot192
581	mov	$rounds,#12
582	str	$rounds,[$key,#240-24]
583	add	$t3,$tbl,#256			@ rcon
584	mov	lr,#255
585	mov	$rounds,#8
586
587.L192_loop:
588	and	$t2,lr,$i3,lsr#24
589	and	$i1,lr,$i3,lsr#16
590	ldrb	$t2,[$tbl,$t2]
591	and	$i2,lr,$i3,lsr#8
592	ldrb	$i1,[$tbl,$i1]
593	and	$i3,lr,$i3
594	ldrb	$i2,[$tbl,$i2]
595	orr	$t2,$t2,$i1,lsl#24
596	ldrb	$i3,[$tbl,$i3]
597	orr	$t2,$t2,$i2,lsl#16
598	ldr	$t1,[$t3],#4			@ rcon[i++]
599	orr	$t2,$t2,$i3,lsl#8
600	eor	$i3,$t2,$t1
601	eor	$s0,$s0,$i3			@ rk[6]=rk[0]^...
602	eor	$s1,$s1,$s0			@ rk[7]=rk[1]^rk[6]
603	str	$s0,[$key],#24
604	eor	$s2,$s2,$s1			@ rk[8]=rk[2]^rk[7]
605	str	$s1,[$key,#-20]
606	eor	$s3,$s3,$s2			@ rk[9]=rk[3]^rk[8]
607	str	$s2,[$key,#-16]
608	subs	$rounds,$rounds,#1
609	str	$s3,[$key,#-12]
610#if __ARM_ARCH__>=7
611	itt	eq				@ Thumb2 thing, sanity check in ARM
612#endif
613	subeq	r2,$key,#216
614	beq	.Ldone
615
616	ldr	$i1,[$key,#-32]
617	ldr	$i2,[$key,#-28]
618	eor	$i1,$i1,$s3			@ rk[10]=rk[4]^rk[9]
619	eor	$i3,$i2,$i1			@ rk[11]=rk[5]^rk[10]
620	str	$i1,[$key,#-8]
621	str	$i3,[$key,#-4]
622	b	.L192_loop
623
624.Lnot192:
625#if __ARM_ARCH__<7
626	ldrb	$i2,[$rounds,#27]
627	ldrb	$t1,[$rounds,#26]
628	ldrb	$t2,[$rounds,#25]
629	ldrb	$t3,[$rounds,#24]
630	orr	$i2,$i2,$t1,lsl#8
631	ldrb	$i3,[$rounds,#31]
632	orr	$i2,$i2,$t2,lsl#16
633	ldrb	$t1,[$rounds,#30]
634	orr	$i2,$i2,$t3,lsl#24
635	ldrb	$t2,[$rounds,#29]
636	ldrb	$t3,[$rounds,#28]
637	orr	$i3,$i3,$t1,lsl#8
638	orr	$i3,$i3,$t2,lsl#16
639	str	$i2,[$key],#8
640	orr	$i3,$i3,$t3,lsl#24
641	str	$i3,[$key,#-4]
642#else
643	ldr	$i2,[$rounds,#24]
644	ldr	$i3,[$rounds,#28]
645#ifdef __ARMEL__
646	rev	$i2,$i2
647	rev	$i3,$i3
648#endif
649	str	$i2,[$key],#8
650	str	$i3,[$key,#-4]
651#endif
652
653	mov	$rounds,#14
654	str	$rounds,[$key,#240-32]
655	add	$t3,$tbl,#256			@ rcon
656	mov	lr,#255
657	mov	$rounds,#7
658
659.L256_loop:
660	and	$t2,lr,$i3,lsr#24
661	and	$i1,lr,$i3,lsr#16
662	ldrb	$t2,[$tbl,$t2]
663	and	$i2,lr,$i3,lsr#8
664	ldrb	$i1,[$tbl,$i1]
665	and	$i3,lr,$i3
666	ldrb	$i2,[$tbl,$i2]
667	orr	$t2,$t2,$i1,lsl#24
668	ldrb	$i3,[$tbl,$i3]
669	orr	$t2,$t2,$i2,lsl#16
670	ldr	$t1,[$t3],#4			@ rcon[i++]
671	orr	$t2,$t2,$i3,lsl#8
672	eor	$i3,$t2,$t1
673	eor	$s0,$s0,$i3			@ rk[8]=rk[0]^...
674	eor	$s1,$s1,$s0			@ rk[9]=rk[1]^rk[8]
675	str	$s0,[$key],#32
676	eor	$s2,$s2,$s1			@ rk[10]=rk[2]^rk[9]
677	str	$s1,[$key,#-28]
678	eor	$s3,$s3,$s2			@ rk[11]=rk[3]^rk[10]
679	str	$s2,[$key,#-24]
680	subs	$rounds,$rounds,#1
681	str	$s3,[$key,#-20]
682#if __ARM_ARCH__>=7
683	itt	eq				@ Thumb2 thing, sanity check in ARM
684#endif
685	subeq	r2,$key,#256
686	beq	.Ldone
687
688	and	$t2,lr,$s3
689	and	$i1,lr,$s3,lsr#8
690	ldrb	$t2,[$tbl,$t2]
691	and	$i2,lr,$s3,lsr#16
692	ldrb	$i1,[$tbl,$i1]
693	and	$i3,lr,$s3,lsr#24
694	ldrb	$i2,[$tbl,$i2]
695	orr	$t2,$t2,$i1,lsl#8
696	ldrb	$i3,[$tbl,$i3]
697	orr	$t2,$t2,$i2,lsl#16
698	ldr	$t1,[$key,#-48]
699	orr	$t2,$t2,$i3,lsl#24
700
701	ldr	$i1,[$key,#-44]
702	ldr	$i2,[$key,#-40]
703	eor	$t1,$t1,$t2			@ rk[12]=rk[4]^...
704	ldr	$i3,[$key,#-36]
705	eor	$i1,$i1,$t1			@ rk[13]=rk[5]^rk[12]
706	str	$t1,[$key,#-16]
707	eor	$i2,$i2,$i1			@ rk[14]=rk[6]^rk[13]
708	str	$i1,[$key,#-12]
709	eor	$i3,$i3,$i2			@ rk[15]=rk[7]^rk[14]
710	str	$i2,[$key,#-8]
711	str	$i3,[$key,#-4]
712	b	.L256_loop
713
714.align	2
715.Ldone:	mov	r0,#0
716	ldmia   sp!,{r4-r12,lr}
717.Labrt:
718#if __ARM_ARCH__>=5
719	ret				@ bx lr
720#else
721	tst	lr,#1
722	moveq	pc,lr			@ be binary compatible with V4, yet
723	bx	lr			@ interoperable with Thumb ISA:-)
724#endif
725.size	private_AES_set_encrypt_key,.-private_AES_set_encrypt_key
726
727.global private_AES_set_decrypt_key
728.type   private_AES_set_decrypt_key,%function
729.align	5
730private_AES_set_decrypt_key:
731	str	lr,[sp,#-4]!            @ push lr
732	bl	_armv4_AES_set_encrypt_key
733	teq	r0,#0
734	ldr	lr,[sp],#4              @ pop lr
735	bne	.Labrt
736
737	mov	r0,r2			@ AES_set_encrypt_key preserves r2,
738	mov	r1,r2			@ which is AES_KEY *key
739	b	_armv4_AES_set_enc2dec_key
740.size	private_AES_set_decrypt_key,.-private_AES_set_decrypt_key
741
742@ void AES_set_enc2dec_key(const AES_KEY *inp,AES_KEY *out)
743.global	AES_set_enc2dec_key
744.type	AES_set_enc2dec_key,%function
745.align	5
746AES_set_enc2dec_key:
747_armv4_AES_set_enc2dec_key:
748	stmdb   sp!,{r4-r12,lr}
749
750	ldr	$rounds,[r0,#240]
751	mov	$i1,r0			@ input
752	add	$i2,r0,$rounds,lsl#4
753	mov	$key,r1			@ ouput
754	add	$tbl,r1,$rounds,lsl#4
755	str	$rounds,[r1,#240]
756
757.Linv:	ldr	$s0,[$i1],#16
758	ldr	$s1,[$i1,#-12]
759	ldr	$s2,[$i1,#-8]
760	ldr	$s3,[$i1,#-4]
761	ldr	$t1,[$i2],#-16
762	ldr	$t2,[$i2,#16+4]
763	ldr	$t3,[$i2,#16+8]
764	ldr	$i3,[$i2,#16+12]
765	str	$s0,[$tbl],#-16
766	str	$s1,[$tbl,#16+4]
767	str	$s2,[$tbl,#16+8]
768	str	$s3,[$tbl,#16+12]
769	str	$t1,[$key],#16
770	str	$t2,[$key,#-12]
771	str	$t3,[$key,#-8]
772	str	$i3,[$key,#-4]
773	teq	$i1,$i2
774	bne	.Linv
775
776	ldr	$s0,[$i1]
777	ldr	$s1,[$i1,#4]
778	ldr	$s2,[$i1,#8]
779	ldr	$s3,[$i1,#12]
780	str	$s0,[$key]
781	str	$s1,[$key,#4]
782	str	$s2,[$key,#8]
783	str	$s3,[$key,#12]
784	sub	$key,$key,$rounds,lsl#3
785___
786$mask80=$i1;
787$mask1b=$i2;
788$mask7f=$i3;
789$code.=<<___;
790	ldr	$s0,[$key,#16]!		@ prefetch tp1
791	mov	$mask80,#0x80
792	mov	$mask1b,#0x1b
793	orr	$mask80,$mask80,#0x8000
794	orr	$mask1b,$mask1b,#0x1b00
795	orr	$mask80,$mask80,$mask80,lsl#16
796	orr	$mask1b,$mask1b,$mask1b,lsl#16
797	sub	$rounds,$rounds,#1
798	mvn	$mask7f,$mask80
799	mov	$rounds,$rounds,lsl#2	@ (rounds-1)*4
800
801.Lmix:	and	$t1,$s0,$mask80
802	and	$s1,$s0,$mask7f
803	sub	$t1,$t1,$t1,lsr#7
804	and	$t1,$t1,$mask1b
805	eor	$s1,$t1,$s1,lsl#1	@ tp2
806
807	and	$t1,$s1,$mask80
808	and	$s2,$s1,$mask7f
809	sub	$t1,$t1,$t1,lsr#7
810	and	$t1,$t1,$mask1b
811	eor	$s2,$t1,$s2,lsl#1	@ tp4
812
813	and	$t1,$s2,$mask80
814	and	$s3,$s2,$mask7f
815	sub	$t1,$t1,$t1,lsr#7
816	and	$t1,$t1,$mask1b
817	eor	$s3,$t1,$s3,lsl#1	@ tp8
818
819	eor	$t1,$s1,$s2
820	eor	$t2,$s0,$s3		@ tp9
821	eor	$t1,$t1,$s3		@ tpe
822	eor	$t1,$t1,$s1,ror#24
823	eor	$t1,$t1,$t2,ror#24	@ ^= ROTATE(tpb=tp9^tp2,8)
824	eor	$t1,$t1,$s2,ror#16
825	eor	$t1,$t1,$t2,ror#16	@ ^= ROTATE(tpd=tp9^tp4,16)
826	eor	$t1,$t1,$t2,ror#8	@ ^= ROTATE(tp9,24)
827
828	ldr	$s0,[$key,#4]		@ prefetch tp1
829	str	$t1,[$key],#4
830	subs	$rounds,$rounds,#1
831	bne	.Lmix
832
833	mov	r0,#0
834#if __ARM_ARCH__>=5
835	ldmia	sp!,{r4-r12,pc}
836#else
837	ldmia   sp!,{r4-r12,lr}
838	tst	lr,#1
839	moveq	pc,lr			@ be binary compatible with V4, yet
840	bx	lr			@ interoperable with Thumb ISA:-)
841#endif
842.size	AES_set_enc2dec_key,.-AES_set_enc2dec_key
843
844.type	AES_Td,%object
845.align	5
846AES_Td:
847.word	0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96
848.word	0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393
849.word	0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25
850.word	0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f
851.word	0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1
852.word	0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6
853.word	0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da
854.word	0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844
855.word	0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd
856.word	0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4
857.word	0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45
858.word	0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94
859.word	0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7
860.word	0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a
861.word	0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5
862.word	0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c
863.word	0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1
864.word	0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a
865.word	0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75
866.word	0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051
867.word	0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46
868.word	0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff
869.word	0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77
870.word	0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb
871.word	0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000
872.word	0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e
873.word	0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927
874.word	0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a
875.word	0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e
876.word	0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16
877.word	0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d
878.word	0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8
879.word	0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd
880.word	0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34
881.word	0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163
882.word	0xd731dcca, 0x42638510, 0x13972240, 0x84c61120
883.word	0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d
884.word	0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0
885.word	0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422
886.word	0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef
887.word	0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36
888.word	0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4
889.word	0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662
890.word	0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5
891.word	0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3
892.word	0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b
893.word	0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8
894.word	0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6
895.word	0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6
896.word	0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0
897.word	0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815
898.word	0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f
899.word	0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df
900.word	0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f
901.word	0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e
902.word	0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713
903.word	0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89
904.word	0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c
905.word	0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf
906.word	0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86
907.word	0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f
908.word	0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541
909.word	0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190
910.word	0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
911@ Td4[256]
912.byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
913.byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
914.byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
915.byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
916.byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
917.byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
918.byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
919.byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
920.byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
921.byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
922.byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
923.byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
924.byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
925.byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
926.byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
927.byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
928.byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
929.byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
930.byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
931.byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
932.byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
933.byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
934.byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
935.byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
936.byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
937.byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
938.byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
939.byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
940.byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
941.byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
942.byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
943.byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
944.size	AES_Td,.-AES_Td
945
946@ void AES_decrypt(const unsigned char *in, unsigned char *out,
947@ 		 const AES_KEY *key) {
948.global AES_decrypt
949.type   AES_decrypt,%function
950.align	5
951AES_decrypt:
952#if __ARM_ARCH__<7
953	sub	r3,pc,#8		@ AES_decrypt
954#else
955	adr	r3,AES_decrypt
956#endif
957	stmdb   sp!,{r1,r4-r12,lr}
958	mov	$rounds,r0		@ inp
959	mov	$key,r2
960	sub	$tbl,r3,#AES_decrypt-AES_Td		@ Td
961#if __ARM_ARCH__<7
962	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
963	ldrb	$t1,[$rounds,#2]	@ manner...
964	ldrb	$t2,[$rounds,#1]
965	ldrb	$t3,[$rounds,#0]
966	orr	$s0,$s0,$t1,lsl#8
967	ldrb	$s1,[$rounds,#7]
968	orr	$s0,$s0,$t2,lsl#16
969	ldrb	$t1,[$rounds,#6]
970	orr	$s0,$s0,$t3,lsl#24
971	ldrb	$t2,[$rounds,#5]
972	ldrb	$t3,[$rounds,#4]
973	orr	$s1,$s1,$t1,lsl#8
974	ldrb	$s2,[$rounds,#11]
975	orr	$s1,$s1,$t2,lsl#16
976	ldrb	$t1,[$rounds,#10]
977	orr	$s1,$s1,$t3,lsl#24
978	ldrb	$t2,[$rounds,#9]
979	ldrb	$t3,[$rounds,#8]
980	orr	$s2,$s2,$t1,lsl#8
981	ldrb	$s3,[$rounds,#15]
982	orr	$s2,$s2,$t2,lsl#16
983	ldrb	$t1,[$rounds,#14]
984	orr	$s2,$s2,$t3,lsl#24
985	ldrb	$t2,[$rounds,#13]
986	ldrb	$t3,[$rounds,#12]
987	orr	$s3,$s3,$t1,lsl#8
988	orr	$s3,$s3,$t2,lsl#16
989	orr	$s3,$s3,$t3,lsl#24
990#else
991	ldr	$s0,[$rounds,#0]
992	ldr	$s1,[$rounds,#4]
993	ldr	$s2,[$rounds,#8]
994	ldr	$s3,[$rounds,#12]
995#ifdef __ARMEL__
996	rev	$s0,$s0
997	rev	$s1,$s1
998	rev	$s2,$s2
999	rev	$s3,$s3
1000#endif
1001#endif
1002	bl	_armv4_AES_decrypt
1003
1004	ldr	$rounds,[sp],#4		@ pop out
1005#if __ARM_ARCH__>=7
1006#ifdef __ARMEL__
1007	rev	$s0,$s0
1008	rev	$s1,$s1
1009	rev	$s2,$s2
1010	rev	$s3,$s3
1011#endif
1012	str	$s0,[$rounds,#0]
1013	str	$s1,[$rounds,#4]
1014	str	$s2,[$rounds,#8]
1015	str	$s3,[$rounds,#12]
1016#else
1017	mov	$t1,$s0,lsr#24		@ write output in endian-neutral
1018	mov	$t2,$s0,lsr#16		@ manner...
1019	mov	$t3,$s0,lsr#8
1020	strb	$t1,[$rounds,#0]
1021	strb	$t2,[$rounds,#1]
1022	mov	$t1,$s1,lsr#24
1023	strb	$t3,[$rounds,#2]
1024	mov	$t2,$s1,lsr#16
1025	strb	$s0,[$rounds,#3]
1026	mov	$t3,$s1,lsr#8
1027	strb	$t1,[$rounds,#4]
1028	strb	$t2,[$rounds,#5]
1029	mov	$t1,$s2,lsr#24
1030	strb	$t3,[$rounds,#6]
1031	mov	$t2,$s2,lsr#16
1032	strb	$s1,[$rounds,#7]
1033	mov	$t3,$s2,lsr#8
1034	strb	$t1,[$rounds,#8]
1035	strb	$t2,[$rounds,#9]
1036	mov	$t1,$s3,lsr#24
1037	strb	$t3,[$rounds,#10]
1038	mov	$t2,$s3,lsr#16
1039	strb	$s2,[$rounds,#11]
1040	mov	$t3,$s3,lsr#8
1041	strb	$t1,[$rounds,#12]
1042	strb	$t2,[$rounds,#13]
1043	strb	$t3,[$rounds,#14]
1044	strb	$s3,[$rounds,#15]
1045#endif
1046#if __ARM_ARCH__>=5
1047	ldmia	sp!,{r4-r12,pc}
1048#else
1049	ldmia   sp!,{r4-r12,lr}
1050	tst	lr,#1
1051	moveq	pc,lr			@ be binary compatible with V4, yet
1052	bx	lr			@ interoperable with Thumb ISA:-)
1053#endif
1054.size	AES_decrypt,.-AES_decrypt
1055
1056.type   _armv4_AES_decrypt,%function
1057.align	2
1058_armv4_AES_decrypt:
1059	str	lr,[sp,#-4]!		@ push lr
1060	ldmia	$key!,{$t1-$i1}
1061	eor	$s0,$s0,$t1
1062	ldr	$rounds,[$key,#240-16]
1063	eor	$s1,$s1,$t2
1064	eor	$s2,$s2,$t3
1065	eor	$s3,$s3,$i1
1066	sub	$rounds,$rounds,#1
1067	mov	lr,#255
1068
1069	and	$i1,lr,$s0,lsr#16
1070	and	$i2,lr,$s0,lsr#8
1071	and	$i3,lr,$s0
1072	mov	$s0,$s0,lsr#24
1073.Ldec_loop:
1074	ldr	$t1,[$tbl,$i1,lsl#2]	@ Td1[s0>>16]
1075	and	$i1,lr,$s1		@ i0
1076	ldr	$t2,[$tbl,$i2,lsl#2]	@ Td2[s0>>8]
1077	and	$i2,lr,$s1,lsr#16
1078	ldr	$t3,[$tbl,$i3,lsl#2]	@ Td3[s0>>0]
1079	and	$i3,lr,$s1,lsr#8
1080	ldr	$s0,[$tbl,$s0,lsl#2]	@ Td0[s0>>24]
1081	mov	$s1,$s1,lsr#24
1082
1083	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td3[s1>>0]
1084	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td1[s1>>16]
1085	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td2[s1>>8]
1086	eor	$s0,$s0,$i1,ror#24
1087	ldr	$s1,[$tbl,$s1,lsl#2]	@ Td0[s1>>24]
1088	and	$i1,lr,$s2,lsr#8	@ i0
1089	eor	$t2,$i2,$t2,ror#8
1090	and	$i2,lr,$s2		@ i1
1091	eor	$t3,$i3,$t3,ror#8
1092	and	$i3,lr,$s2,lsr#16
1093	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td2[s2>>8]
1094	eor	$s1,$s1,$t1,ror#8
1095	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td3[s2>>0]
1096	mov	$s2,$s2,lsr#24
1097
1098	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td1[s2>>16]
1099	eor	$s0,$s0,$i1,ror#16
1100	ldr	$s2,[$tbl,$s2,lsl#2]	@ Td0[s2>>24]
1101	and	$i1,lr,$s3,lsr#16	@ i0
1102	eor	$s1,$s1,$i2,ror#24
1103	and	$i2,lr,$s3,lsr#8	@ i1
1104	eor	$t3,$i3,$t3,ror#8
1105	and	$i3,lr,$s3		@ i2
1106	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td1[s3>>16]
1107	eor	$s2,$s2,$t2,ror#8
1108	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td2[s3>>8]
1109	mov	$s3,$s3,lsr#24
1110
1111	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td3[s3>>0]
1112	eor	$s0,$s0,$i1,ror#8
1113	ldr	$i1,[$key],#16
1114	eor	$s1,$s1,$i2,ror#16
1115	ldr	$s3,[$tbl,$s3,lsl#2]	@ Td0[s3>>24]
1116	eor	$s2,$s2,$i3,ror#24
1117
1118	ldr	$t1,[$key,#-12]
1119	eor	$s0,$s0,$i1
1120	ldr	$t2,[$key,#-8]
1121	eor	$s3,$s3,$t3,ror#8
1122	ldr	$t3,[$key,#-4]
1123	and	$i1,lr,$s0,lsr#16
1124	eor	$s1,$s1,$t1
1125	and	$i2,lr,$s0,lsr#8
1126	eor	$s2,$s2,$t2
1127	and	$i3,lr,$s0
1128	eor	$s3,$s3,$t3
1129	mov	$s0,$s0,lsr#24
1130
1131	subs	$rounds,$rounds,#1
1132	bne	.Ldec_loop
1133
1134	add	$tbl,$tbl,#1024
1135
1136	ldr	$t2,[$tbl,#0]		@ prefetch Td4
1137	ldr	$t3,[$tbl,#32]
1138	ldr	$t1,[$tbl,#64]
1139	ldr	$t2,[$tbl,#96]
1140	ldr	$t3,[$tbl,#128]
1141	ldr	$t1,[$tbl,#160]
1142	ldr	$t2,[$tbl,#192]
1143	ldr	$t3,[$tbl,#224]
1144
1145	ldrb	$s0,[$tbl,$s0]		@ Td4[s0>>24]
1146	ldrb	$t1,[$tbl,$i1]		@ Td4[s0>>16]
1147	and	$i1,lr,$s1		@ i0
1148	ldrb	$t2,[$tbl,$i2]		@ Td4[s0>>8]
1149	and	$i2,lr,$s1,lsr#16
1150	ldrb	$t3,[$tbl,$i3]		@ Td4[s0>>0]
1151	and	$i3,lr,$s1,lsr#8
1152
1153	add	$s1,$tbl,$s1,lsr#24
1154	ldrb	$i1,[$tbl,$i1]		@ Td4[s1>>0]
1155	ldrb	$s1,[$s1]		@ Td4[s1>>24]
1156	ldrb	$i2,[$tbl,$i2]		@ Td4[s1>>16]
1157	eor	$s0,$i1,$s0,lsl#24
1158	ldrb	$i3,[$tbl,$i3]		@ Td4[s1>>8]
1159	eor	$s1,$t1,$s1,lsl#8
1160	and	$i1,lr,$s2,lsr#8	@ i0
1161	eor	$t2,$t2,$i2,lsl#8
1162	and	$i2,lr,$s2		@ i1
1163	ldrb	$i1,[$tbl,$i1]		@ Td4[s2>>8]
1164	eor	$t3,$t3,$i3,lsl#8
1165	ldrb	$i2,[$tbl,$i2]		@ Td4[s2>>0]
1166	and	$i3,lr,$s2,lsr#16
1167
1168	add	$s2,$tbl,$s2,lsr#24
1169	ldrb	$s2,[$s2]		@ Td4[s2>>24]
1170	eor	$s0,$s0,$i1,lsl#8
1171	ldrb	$i3,[$tbl,$i3]		@ Td4[s2>>16]
1172	eor	$s1,$i2,$s1,lsl#16
1173	and	$i1,lr,$s3,lsr#16	@ i0
1174	eor	$s2,$t2,$s2,lsl#16
1175	and	$i2,lr,$s3,lsr#8	@ i1
1176	ldrb	$i1,[$tbl,$i1]		@ Td4[s3>>16]
1177	eor	$t3,$t3,$i3,lsl#16
1178	ldrb	$i2,[$tbl,$i2]		@ Td4[s3>>8]
1179	and	$i3,lr,$s3		@ i2
1180
1181	add	$s3,$tbl,$s3,lsr#24
1182	ldrb	$i3,[$tbl,$i3]		@ Td4[s3>>0]
1183	ldrb	$s3,[$s3]		@ Td4[s3>>24]
1184	eor	$s0,$s0,$i1,lsl#16
1185	ldr	$i1,[$key,#0]
1186	eor	$s1,$s1,$i2,lsl#8
1187	ldr	$t1,[$key,#4]
1188	eor	$s2,$i3,$s2,lsl#8
1189	ldr	$t2,[$key,#8]
1190	eor	$s3,$t3,$s3,lsl#24
1191	ldr	$t3,[$key,#12]
1192
1193	eor	$s0,$s0,$i1
1194	eor	$s1,$s1,$t1
1195	eor	$s2,$s2,$t2
1196	eor	$s3,$s3,$t3
1197
1198	sub	$tbl,$tbl,#1024
1199	ldr	pc,[sp],#4		@ pop and return
1200.size	_armv4_AES_decrypt,.-_armv4_AES_decrypt
1201.asciz	"AES for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
1202.align	2
1203___
1204
1205$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;	# make it possible to compile with -march=armv4
1206$code =~ s/\bret\b/bx\tlr/gm;
1207
1208open SELF,$0;
1209while(<SELF>) {
1210	next if (/^#!/);
1211	last if (!s/^#/@/ and !/^$/);
1212	print;
1213}
1214close SELF;
1215
1216print $code;
1217close STDOUT;	# enforce flush
1218