1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# AES for ARMv4
11
12# January 2007.
13#
14# Code uses single 1K S-box and is >2 times faster than code generated
15# by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which
16# allows to merge logical or arithmetic operation with shift or rotate
17# in one instruction and emit combined result every cycle. The module
18# is endian-neutral. The performance is ~42 cycles/byte for 128-bit
19# key [on single-issue Xscale PXA250 core].
20
21# May 2007.
22#
23# AES_set_[en|de]crypt_key is added.
24
25# July 2010.
26#
27# Rescheduling for dual-issue pipeline resulted in 12% improvement on
28# Cortex A8 core and ~25 cycles per byte processed with 128-bit key.
29
30# February 2011.
31#
32# Profiler-assisted and platform-specific optimization resulted in 16%
33# improvement on Cortex A8 core and ~21.5 cycles per byte.
34
35while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
36open STDOUT,">$output";
37
38$s0="r0";
39$s1="r1";
40$s2="r2";
41$s3="r3";
42$t1="r4";
43$t2="r5";
44$t3="r6";
45$i1="r7";
46$i2="r8";
47$i3="r9";
48
49$tbl="r10";
50$key="r11";
51$rounds="r12";
52
53$code=<<___;
54#if defined(__arm__)
55#ifndef __KERNEL__
56# include "arm_arch.h"
57#else
58# define __ARM_ARCH__ __LINUX_ARM_ARCH__
59#endif
60
61.text
62#if __ARM_ARCH__<7
63.code	32
64#else
65.syntax	unified
66# ifdef __thumb2__
67.thumb
68# else
69.code	32
70# endif
71#endif
72
73.type	AES_Te,%object
74.align	5
75AES_Te:
76.word	0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d
77.word	0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554
78.word	0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d
79.word	0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a
80.word	0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87
81.word	0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b
82.word	0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea
83.word	0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b
84.word	0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a
85.word	0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f
86.word	0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108
87.word	0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f
88.word	0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e
89.word	0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5
90.word	0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d
91.word	0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f
92.word	0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e
93.word	0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb
94.word	0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce
95.word	0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497
96.word	0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c
97.word	0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed
98.word	0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b
99.word	0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a
100.word	0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16
101.word	0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594
102.word	0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81
103.word	0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3
104.word	0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a
105.word	0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504
106.word	0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163
107.word	0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d
108.word	0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f
109.word	0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739
110.word	0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47
111.word	0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395
112.word	0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f
113.word	0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883
114.word	0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c
115.word	0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76
116.word	0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e
117.word	0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4
118.word	0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6
119.word	0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b
120.word	0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7
121.word	0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0
122.word	0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25
123.word	0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818
124.word	0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72
125.word	0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651
126.word	0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21
127.word	0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85
128.word	0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa
129.word	0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12
130.word	0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0
131.word	0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9
132.word	0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133
133.word	0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7
134.word	0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920
135.word	0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a
136.word	0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17
137.word	0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8
138.word	0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11
139.word	0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
140@ Te4[256]
141.byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
142.byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
143.byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
144.byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
145.byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
146.byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
147.byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
148.byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
149.byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
150.byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
151.byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
152.byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
153.byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
154.byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
155.byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
156.byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
157.byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
158.byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
159.byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
160.byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
161.byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
162.byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
163.byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
164.byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
165.byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
166.byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
167.byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
168.byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
169.byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
170.byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
171.byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
172.byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
173@ rcon[]
174.word	0x01000000, 0x02000000, 0x04000000, 0x08000000
175.word	0x10000000, 0x20000000, 0x40000000, 0x80000000
176.word	0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
177.size	AES_Te,.-AES_Te
178
179@ void asm_AES_encrypt(const unsigned char *in, unsigned char *out,
180@ 		       const AES_KEY *key) {
181.global asm_AES_encrypt
182.hidden asm_AES_encrypt
183.type   asm_AES_encrypt,%function
184.align	5
185asm_AES_encrypt:
186#if __ARM_ARCH__<7
187	sub	r3,pc,#8		@ asm_AES_encrypt
188#else
189	adr	r3,asm_AES_encrypt
190#endif
191	stmdb   sp!,{r1,r4-r12,lr}
192	mov	$rounds,r0		@ inp
193	mov	$key,r2
194	sub	$tbl,r3,#asm_AES_encrypt-AES_Te	@ Te
195#if __ARM_ARCH__<7
196	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
197	ldrb	$t1,[$rounds,#2]	@ manner...
198	ldrb	$t2,[$rounds,#1]
199	ldrb	$t3,[$rounds,#0]
200	orr	$s0,$s0,$t1,lsl#8
201	ldrb	$s1,[$rounds,#7]
202	orr	$s0,$s0,$t2,lsl#16
203	ldrb	$t1,[$rounds,#6]
204	orr	$s0,$s0,$t3,lsl#24
205	ldrb	$t2,[$rounds,#5]
206	ldrb	$t3,[$rounds,#4]
207	orr	$s1,$s1,$t1,lsl#8
208	ldrb	$s2,[$rounds,#11]
209	orr	$s1,$s1,$t2,lsl#16
210	ldrb	$t1,[$rounds,#10]
211	orr	$s1,$s1,$t3,lsl#24
212	ldrb	$t2,[$rounds,#9]
213	ldrb	$t3,[$rounds,#8]
214	orr	$s2,$s2,$t1,lsl#8
215	ldrb	$s3,[$rounds,#15]
216	orr	$s2,$s2,$t2,lsl#16
217	ldrb	$t1,[$rounds,#14]
218	orr	$s2,$s2,$t3,lsl#24
219	ldrb	$t2,[$rounds,#13]
220	ldrb	$t3,[$rounds,#12]
221	orr	$s3,$s3,$t1,lsl#8
222	orr	$s3,$s3,$t2,lsl#16
223	orr	$s3,$s3,$t3,lsl#24
224#else
225	ldr	$s0,[$rounds,#0]
226	ldr	$s1,[$rounds,#4]
227	ldr	$s2,[$rounds,#8]
228	ldr	$s3,[$rounds,#12]
229#ifdef __ARMEL__
230	rev	$s0,$s0
231	rev	$s1,$s1
232	rev	$s2,$s2
233	rev	$s3,$s3
234#endif
235#endif
236	bl	_armv4_AES_encrypt
237
238	ldr	$rounds,[sp],#4		@ pop out
239#if __ARM_ARCH__>=7
240#ifdef __ARMEL__
241	rev	$s0,$s0
242	rev	$s1,$s1
243	rev	$s2,$s2
244	rev	$s3,$s3
245#endif
246	str	$s0,[$rounds,#0]
247	str	$s1,[$rounds,#4]
248	str	$s2,[$rounds,#8]
249	str	$s3,[$rounds,#12]
250#else
251	mov	$t1,$s0,lsr#24		@ write output in endian-neutral
252	mov	$t2,$s0,lsr#16		@ manner...
253	mov	$t3,$s0,lsr#8
254	strb	$t1,[$rounds,#0]
255	strb	$t2,[$rounds,#1]
256	mov	$t1,$s1,lsr#24
257	strb	$t3,[$rounds,#2]
258	mov	$t2,$s1,lsr#16
259	strb	$s0,[$rounds,#3]
260	mov	$t3,$s1,lsr#8
261	strb	$t1,[$rounds,#4]
262	strb	$t2,[$rounds,#5]
263	mov	$t1,$s2,lsr#24
264	strb	$t3,[$rounds,#6]
265	mov	$t2,$s2,lsr#16
266	strb	$s1,[$rounds,#7]
267	mov	$t3,$s2,lsr#8
268	strb	$t1,[$rounds,#8]
269	strb	$t2,[$rounds,#9]
270	mov	$t1,$s3,lsr#24
271	strb	$t3,[$rounds,#10]
272	mov	$t2,$s3,lsr#16
273	strb	$s2,[$rounds,#11]
274	mov	$t3,$s3,lsr#8
275	strb	$t1,[$rounds,#12]
276	strb	$t2,[$rounds,#13]
277	strb	$t3,[$rounds,#14]
278	strb	$s3,[$rounds,#15]
279#endif
280#if __ARM_ARCH__>=5
281	ldmia	sp!,{r4-r12,pc}
282#else
283	ldmia   sp!,{r4-r12,lr}
284	tst	lr,#1
285	moveq	pc,lr			@ be binary compatible with V4, yet
286	bx	lr			@ interoperable with Thumb ISA:-)
287#endif
288.size	asm_AES_encrypt,.-asm_AES_encrypt
289
290.type   _armv4_AES_encrypt,%function
291.align	2
292_armv4_AES_encrypt:
293	str	lr,[sp,#-4]!		@ push lr
294	ldmia	$key!,{$t1-$i1}
295	eor	$s0,$s0,$t1
296	ldr	$rounds,[$key,#240-16]
297	eor	$s1,$s1,$t2
298	eor	$s2,$s2,$t3
299	eor	$s3,$s3,$i1
300	sub	$rounds,$rounds,#1
301	mov	lr,#255
302
303	and	$i1,lr,$s0
304	and	$i2,lr,$s0,lsr#8
305	and	$i3,lr,$s0,lsr#16
306	mov	$s0,$s0,lsr#24
307.Lenc_loop:
308	ldr	$t1,[$tbl,$i1,lsl#2]	@ Te3[s0>>0]
309	and	$i1,lr,$s1,lsr#16	@ i0
310	ldr	$t2,[$tbl,$i2,lsl#2]	@ Te2[s0>>8]
311	and	$i2,lr,$s1
312	ldr	$t3,[$tbl,$i3,lsl#2]	@ Te1[s0>>16]
313	and	$i3,lr,$s1,lsr#8
314	ldr	$s0,[$tbl,$s0,lsl#2]	@ Te0[s0>>24]
315	mov	$s1,$s1,lsr#24
316
317	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te1[s1>>16]
318	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te3[s1>>0]
319	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te2[s1>>8]
320	eor	$s0,$s0,$i1,ror#8
321	ldr	$s1,[$tbl,$s1,lsl#2]	@ Te0[s1>>24]
322	and	$i1,lr,$s2,lsr#8	@ i0
323	eor	$t2,$t2,$i2,ror#8
324	and	$i2,lr,$s2,lsr#16	@ i1
325	eor	$t3,$t3,$i3,ror#8
326	and	$i3,lr,$s2
327	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te2[s2>>8]
328	eor	$s1,$s1,$t1,ror#24
329	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te1[s2>>16]
330	mov	$s2,$s2,lsr#24
331
332	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te3[s2>>0]
333	eor	$s0,$s0,$i1,ror#16
334	ldr	$s2,[$tbl,$s2,lsl#2]	@ Te0[s2>>24]
335	and	$i1,lr,$s3		@ i0
336	eor	$s1,$s1,$i2,ror#8
337	and	$i2,lr,$s3,lsr#8	@ i1
338	eor	$t3,$t3,$i3,ror#16
339	and	$i3,lr,$s3,lsr#16	@ i2
340	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te3[s3>>0]
341	eor	$s2,$s2,$t2,ror#16
342	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te2[s3>>8]
343	mov	$s3,$s3,lsr#24
344
345	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te1[s3>>16]
346	eor	$s0,$s0,$i1,ror#24
347	ldr	$i1,[$key],#16
348	eor	$s1,$s1,$i2,ror#16
349	ldr	$s3,[$tbl,$s3,lsl#2]	@ Te0[s3>>24]
350	eor	$s2,$s2,$i3,ror#8
351	ldr	$t1,[$key,#-12]
352	eor	$s3,$s3,$t3,ror#8
353
354	ldr	$t2,[$key,#-8]
355	eor	$s0,$s0,$i1
356	ldr	$t3,[$key,#-4]
357	and	$i1,lr,$s0
358	eor	$s1,$s1,$t1
359	and	$i2,lr,$s0,lsr#8
360	eor	$s2,$s2,$t2
361	and	$i3,lr,$s0,lsr#16
362	eor	$s3,$s3,$t3
363	mov	$s0,$s0,lsr#24
364
365	subs	$rounds,$rounds,#1
366	bne	.Lenc_loop
367
368	add	$tbl,$tbl,#2
369
370	ldrb	$t1,[$tbl,$i1,lsl#2]	@ Te4[s0>>0]
371	and	$i1,lr,$s1,lsr#16	@ i0
372	ldrb	$t2,[$tbl,$i2,lsl#2]	@ Te4[s0>>8]
373	and	$i2,lr,$s1
374	ldrb	$t3,[$tbl,$i3,lsl#2]	@ Te4[s0>>16]
375	and	$i3,lr,$s1,lsr#8
376	ldrb	$s0,[$tbl,$s0,lsl#2]	@ Te4[s0>>24]
377	mov	$s1,$s1,lsr#24
378
379	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s1>>16]
380	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s1>>0]
381	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s1>>8]
382	eor	$s0,$i1,$s0,lsl#8
383	ldrb	$s1,[$tbl,$s1,lsl#2]	@ Te4[s1>>24]
384	and	$i1,lr,$s2,lsr#8	@ i0
385	eor	$t2,$i2,$t2,lsl#8
386	and	$i2,lr,$s2,lsr#16	@ i1
387	eor	$t3,$i3,$t3,lsl#8
388	and	$i3,lr,$s2
389	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s2>>8]
390	eor	$s1,$t1,$s1,lsl#24
391	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s2>>16]
392	mov	$s2,$s2,lsr#24
393
394	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s2>>0]
395	eor	$s0,$i1,$s0,lsl#8
396	ldrb	$s2,[$tbl,$s2,lsl#2]	@ Te4[s2>>24]
397	and	$i1,lr,$s3		@ i0
398	eor	$s1,$s1,$i2,lsl#16
399	and	$i2,lr,$s3,lsr#8	@ i1
400	eor	$t3,$i3,$t3,lsl#8
401	and	$i3,lr,$s3,lsr#16	@ i2
402	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s3>>0]
403	eor	$s2,$t2,$s2,lsl#24
404	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s3>>8]
405	mov	$s3,$s3,lsr#24
406
407	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s3>>16]
408	eor	$s0,$i1,$s0,lsl#8
409	ldr	$i1,[$key,#0]
410	ldrb	$s3,[$tbl,$s3,lsl#2]	@ Te4[s3>>24]
411	eor	$s1,$s1,$i2,lsl#8
412	ldr	$t1,[$key,#4]
413	eor	$s2,$s2,$i3,lsl#16
414	ldr	$t2,[$key,#8]
415	eor	$s3,$t3,$s3,lsl#24
416	ldr	$t3,[$key,#12]
417
418	eor	$s0,$s0,$i1
419	eor	$s1,$s1,$t1
420	eor	$s2,$s2,$t2
421	eor	$s3,$s3,$t3
422
423	sub	$tbl,$tbl,#2
424	ldr	pc,[sp],#4		@ pop and return
425.size	_armv4_AES_encrypt,.-_armv4_AES_encrypt
426
427.global asm_AES_set_encrypt_key
428.hidden asm_AES_set_encrypt_key
429.type   asm_AES_set_encrypt_key,%function
430.align	5
431asm_AES_set_encrypt_key:
432_armv4_AES_set_encrypt_key:
433#if __ARM_ARCH__<7
434	sub	r3,pc,#8		@ asm_AES_set_encrypt_key
435#else
436	adr	r3,asm_AES_set_encrypt_key
437#endif
438	teq	r0,#0
439#if __ARM_ARCH__>=7
440	itt	eq			@ Thumb2 thing, sanity check in ARM
441#endif
442	moveq	r0,#-1
443	beq	.Labrt
444	teq	r2,#0
445#if __ARM_ARCH__>=7
446	itt	eq			@ Thumb2 thing, sanity check in ARM
447#endif
448	moveq	r0,#-1
449	beq	.Labrt
450
451	teq	r1,#128
452	beq	.Lok
453	teq	r1,#192
454	beq	.Lok
455	teq	r1,#256
456#if __ARM_ARCH__>=7
457	itt	ne			@ Thumb2 thing, sanity check in ARM
458#endif
459	movne	r0,#-1
460	bne	.Labrt
461
462.Lok:	stmdb   sp!,{r4-r12,lr}
463	sub	$tbl,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024	@ Te4
464
465	mov	$rounds,r0		@ inp
466	mov	lr,r1			@ bits
467	mov	$key,r2			@ key
468
469#if __ARM_ARCH__<7
470	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
471	ldrb	$t1,[$rounds,#2]	@ manner...
472	ldrb	$t2,[$rounds,#1]
473	ldrb	$t3,[$rounds,#0]
474	orr	$s0,$s0,$t1,lsl#8
475	ldrb	$s1,[$rounds,#7]
476	orr	$s0,$s0,$t2,lsl#16
477	ldrb	$t1,[$rounds,#6]
478	orr	$s0,$s0,$t3,lsl#24
479	ldrb	$t2,[$rounds,#5]
480	ldrb	$t3,[$rounds,#4]
481	orr	$s1,$s1,$t1,lsl#8
482	ldrb	$s2,[$rounds,#11]
483	orr	$s1,$s1,$t2,lsl#16
484	ldrb	$t1,[$rounds,#10]
485	orr	$s1,$s1,$t3,lsl#24
486	ldrb	$t2,[$rounds,#9]
487	ldrb	$t3,[$rounds,#8]
488	orr	$s2,$s2,$t1,lsl#8
489	ldrb	$s3,[$rounds,#15]
490	orr	$s2,$s2,$t2,lsl#16
491	ldrb	$t1,[$rounds,#14]
492	orr	$s2,$s2,$t3,lsl#24
493	ldrb	$t2,[$rounds,#13]
494	ldrb	$t3,[$rounds,#12]
495	orr	$s3,$s3,$t1,lsl#8
496	str	$s0,[$key],#16
497	orr	$s3,$s3,$t2,lsl#16
498	str	$s1,[$key,#-12]
499	orr	$s3,$s3,$t3,lsl#24
500	str	$s2,[$key,#-8]
501	str	$s3,[$key,#-4]
502#else
503	ldr	$s0,[$rounds,#0]
504	ldr	$s1,[$rounds,#4]
505	ldr	$s2,[$rounds,#8]
506	ldr	$s3,[$rounds,#12]
507#ifdef __ARMEL__
508	rev	$s0,$s0
509	rev	$s1,$s1
510	rev	$s2,$s2
511	rev	$s3,$s3
512#endif
513	str	$s0,[$key],#16
514	str	$s1,[$key,#-12]
515	str	$s2,[$key,#-8]
516	str	$s3,[$key,#-4]
517#endif
518
519	teq	lr,#128
520	bne	.Lnot128
521	mov	$rounds,#10
522	str	$rounds,[$key,#240-16]
523	add	$t3,$tbl,#256			@ rcon
524	mov	lr,#255
525
526.L128_loop:
527	and	$t2,lr,$s3,lsr#24
528	and	$i1,lr,$s3,lsr#16
529	ldrb	$t2,[$tbl,$t2]
530	and	$i2,lr,$s3,lsr#8
531	ldrb	$i1,[$tbl,$i1]
532	and	$i3,lr,$s3
533	ldrb	$i2,[$tbl,$i2]
534	orr	$t2,$t2,$i1,lsl#24
535	ldrb	$i3,[$tbl,$i3]
536	orr	$t2,$t2,$i2,lsl#16
537	ldr	$t1,[$t3],#4			@ rcon[i++]
538	orr	$t2,$t2,$i3,lsl#8
539	eor	$t2,$t2,$t1
540	eor	$s0,$s0,$t2			@ rk[4]=rk[0]^...
541	eor	$s1,$s1,$s0			@ rk[5]=rk[1]^rk[4]
542	str	$s0,[$key],#16
543	eor	$s2,$s2,$s1			@ rk[6]=rk[2]^rk[5]
544	str	$s1,[$key,#-12]
545	eor	$s3,$s3,$s2			@ rk[7]=rk[3]^rk[6]
546	str	$s2,[$key,#-8]
547	subs	$rounds,$rounds,#1
548	str	$s3,[$key,#-4]
549	bne	.L128_loop
550	sub	r2,$key,#176
551	b	.Ldone
552
553.Lnot128:
554#if __ARM_ARCH__<7
555	ldrb	$i2,[$rounds,#19]
556	ldrb	$t1,[$rounds,#18]
557	ldrb	$t2,[$rounds,#17]
558	ldrb	$t3,[$rounds,#16]
559	orr	$i2,$i2,$t1,lsl#8
560	ldrb	$i3,[$rounds,#23]
561	orr	$i2,$i2,$t2,lsl#16
562	ldrb	$t1,[$rounds,#22]
563	orr	$i2,$i2,$t3,lsl#24
564	ldrb	$t2,[$rounds,#21]
565	ldrb	$t3,[$rounds,#20]
566	orr	$i3,$i3,$t1,lsl#8
567	orr	$i3,$i3,$t2,lsl#16
568	str	$i2,[$key],#8
569	orr	$i3,$i3,$t3,lsl#24
570	str	$i3,[$key,#-4]
571#else
572	ldr	$i2,[$rounds,#16]
573	ldr	$i3,[$rounds,#20]
574#ifdef __ARMEL__
575	rev	$i2,$i2
576	rev	$i3,$i3
577#endif
578	str	$i2,[$key],#8
579	str	$i3,[$key,#-4]
580#endif
581
582	teq	lr,#192
583	bne	.Lnot192
584	mov	$rounds,#12
585	str	$rounds,[$key,#240-24]
586	add	$t3,$tbl,#256			@ rcon
587	mov	lr,#255
588	mov	$rounds,#8
589
590.L192_loop:
591	and	$t2,lr,$i3,lsr#24
592	and	$i1,lr,$i3,lsr#16
593	ldrb	$t2,[$tbl,$t2]
594	and	$i2,lr,$i3,lsr#8
595	ldrb	$i1,[$tbl,$i1]
596	and	$i3,lr,$i3
597	ldrb	$i2,[$tbl,$i2]
598	orr	$t2,$t2,$i1,lsl#24
599	ldrb	$i3,[$tbl,$i3]
600	orr	$t2,$t2,$i2,lsl#16
601	ldr	$t1,[$t3],#4			@ rcon[i++]
602	orr	$t2,$t2,$i3,lsl#8
603	eor	$i3,$t2,$t1
604	eor	$s0,$s0,$i3			@ rk[6]=rk[0]^...
605	eor	$s1,$s1,$s0			@ rk[7]=rk[1]^rk[6]
606	str	$s0,[$key],#24
607	eor	$s2,$s2,$s1			@ rk[8]=rk[2]^rk[7]
608	str	$s1,[$key,#-20]
609	eor	$s3,$s3,$s2			@ rk[9]=rk[3]^rk[8]
610	str	$s2,[$key,#-16]
611	subs	$rounds,$rounds,#1
612	str	$s3,[$key,#-12]
613#if __ARM_ARCH__>=7
614	itt	eq				@ Thumb2 thing, sanity check in ARM
615#endif
616	subeq	r2,$key,#216
617	beq	.Ldone
618
619	ldr	$i1,[$key,#-32]
620	ldr	$i2,[$key,#-28]
621	eor	$i1,$i1,$s3			@ rk[10]=rk[4]^rk[9]
622	eor	$i3,$i2,$i1			@ rk[11]=rk[5]^rk[10]
623	str	$i1,[$key,#-8]
624	str	$i3,[$key,#-4]
625	b	.L192_loop
626
627.Lnot192:
628#if __ARM_ARCH__<7
629	ldrb	$i2,[$rounds,#27]
630	ldrb	$t1,[$rounds,#26]
631	ldrb	$t2,[$rounds,#25]
632	ldrb	$t3,[$rounds,#24]
633	orr	$i2,$i2,$t1,lsl#8
634	ldrb	$i3,[$rounds,#31]
635	orr	$i2,$i2,$t2,lsl#16
636	ldrb	$t1,[$rounds,#30]
637	orr	$i2,$i2,$t3,lsl#24
638	ldrb	$t2,[$rounds,#29]
639	ldrb	$t3,[$rounds,#28]
640	orr	$i3,$i3,$t1,lsl#8
641	orr	$i3,$i3,$t2,lsl#16
642	str	$i2,[$key],#8
643	orr	$i3,$i3,$t3,lsl#24
644	str	$i3,[$key,#-4]
645#else
646	ldr	$i2,[$rounds,#24]
647	ldr	$i3,[$rounds,#28]
648#ifdef __ARMEL__
649	rev	$i2,$i2
650	rev	$i3,$i3
651#endif
652	str	$i2,[$key],#8
653	str	$i3,[$key,#-4]
654#endif
655
656	mov	$rounds,#14
657	str	$rounds,[$key,#240-32]
658	add	$t3,$tbl,#256			@ rcon
659	mov	lr,#255
660	mov	$rounds,#7
661
662.L256_loop:
663	and	$t2,lr,$i3,lsr#24
664	and	$i1,lr,$i3,lsr#16
665	ldrb	$t2,[$tbl,$t2]
666	and	$i2,lr,$i3,lsr#8
667	ldrb	$i1,[$tbl,$i1]
668	and	$i3,lr,$i3
669	ldrb	$i2,[$tbl,$i2]
670	orr	$t2,$t2,$i1,lsl#24
671	ldrb	$i3,[$tbl,$i3]
672	orr	$t2,$t2,$i2,lsl#16
673	ldr	$t1,[$t3],#4			@ rcon[i++]
674	orr	$t2,$t2,$i3,lsl#8
675	eor	$i3,$t2,$t1
676	eor	$s0,$s0,$i3			@ rk[8]=rk[0]^...
677	eor	$s1,$s1,$s0			@ rk[9]=rk[1]^rk[8]
678	str	$s0,[$key],#32
679	eor	$s2,$s2,$s1			@ rk[10]=rk[2]^rk[9]
680	str	$s1,[$key,#-28]
681	eor	$s3,$s3,$s2			@ rk[11]=rk[3]^rk[10]
682	str	$s2,[$key,#-24]
683	subs	$rounds,$rounds,#1
684	str	$s3,[$key,#-20]
685#if __ARM_ARCH__>=7
686	itt	eq				@ Thumb2 thing, sanity check in ARM
687#endif
688	subeq	r2,$key,#256
689	beq	.Ldone
690
691	and	$t2,lr,$s3
692	and	$i1,lr,$s3,lsr#8
693	ldrb	$t2,[$tbl,$t2]
694	and	$i2,lr,$s3,lsr#16
695	ldrb	$i1,[$tbl,$i1]
696	and	$i3,lr,$s3,lsr#24
697	ldrb	$i2,[$tbl,$i2]
698	orr	$t2,$t2,$i1,lsl#8
699	ldrb	$i3,[$tbl,$i3]
700	orr	$t2,$t2,$i2,lsl#16
701	ldr	$t1,[$key,#-48]
702	orr	$t2,$t2,$i3,lsl#24
703
704	ldr	$i1,[$key,#-44]
705	ldr	$i2,[$key,#-40]
706	eor	$t1,$t1,$t2			@ rk[12]=rk[4]^...
707	ldr	$i3,[$key,#-36]
708	eor	$i1,$i1,$t1			@ rk[13]=rk[5]^rk[12]
709	str	$t1,[$key,#-16]
710	eor	$i2,$i2,$i1			@ rk[14]=rk[6]^rk[13]
711	str	$i1,[$key,#-12]
712	eor	$i3,$i3,$i2			@ rk[15]=rk[7]^rk[14]
713	str	$i2,[$key,#-8]
714	str	$i3,[$key,#-4]
715	b	.L256_loop
716
717.align	2
718.Ldone:	mov	r0,#0
719	ldmia   sp!,{r4-r12,lr}
720.Labrt:
721#if defined(__thumb2__) && __ARM_ARCH__>=7
722	.short	0x4770			@ bx lr in Thumb2 encoding
723#else
724	tst	lr,#1
725	moveq	pc,lr			@ be binary compatible with V4, yet
726	bx	lr			@ interoperable with Thumb ISA:-)
727#endif
728.size	asm_AES_set_encrypt_key,.-asm_AES_set_encrypt_key
729
730.global asm_AES_set_decrypt_key
731.hidden asm_AES_set_decrypt_key
732.type   asm_AES_set_decrypt_key,%function
733.align	5
734asm_AES_set_decrypt_key:
735	str	lr,[sp,#-4]!            @ push lr
736	bl	_armv4_AES_set_encrypt_key
737	teq	r0,#0
738	ldr	lr,[sp],#4              @ pop lr
739	bne	.Labrt
740
741	mov	r0,r2			@ asm_AES_set_encrypt_key preserves r2,
742	mov	r1,r2			@ which is AES_KEY *key
743	b	_armv4_AES_set_enc2dec_key
744.size	asm_AES_set_decrypt_key,.-asm_AES_set_decrypt_key
745
746@ void AES_set_enc2dec_key(const AES_KEY *inp,AES_KEY *out)
747.global	AES_set_enc2dec_key
748.hidden	AES_set_enc2dec_key
749.type	AES_set_enc2dec_key,%function
750.align	5
751AES_set_enc2dec_key:
752_armv4_AES_set_enc2dec_key:
753	stmdb   sp!,{r4-r12,lr}
754
755	ldr	$rounds,[r0,#240]
756	mov	$i1,r0			@ input
757	add	$i2,r0,$rounds,lsl#4
758	mov	$key,r1			@ ouput
759	add	$tbl,r1,$rounds,lsl#4
760	str	$rounds,[r1,#240]
761
762.Linv:	ldr	$s0,[$i1],#16
763	ldr	$s1,[$i1,#-12]
764	ldr	$s2,[$i1,#-8]
765	ldr	$s3,[$i1,#-4]
766	ldr	$t1,[$i2],#-16
767	ldr	$t2,[$i2,#16+4]
768	ldr	$t3,[$i2,#16+8]
769	ldr	$i3,[$i2,#16+12]
770	str	$s0,[$tbl],#-16
771	str	$s1,[$tbl,#16+4]
772	str	$s2,[$tbl,#16+8]
773	str	$s3,[$tbl,#16+12]
774	str	$t1,[$key],#16
775	str	$t2,[$key,#-12]
776	str	$t3,[$key,#-8]
777	str	$i3,[$key,#-4]
778	teq	$i1,$i2
779	bne	.Linv
780
781	ldr	$s0,[$i1]
782	ldr	$s1,[$i1,#4]
783	ldr	$s2,[$i1,#8]
784	ldr	$s3,[$i1,#12]
785	str	$s0,[$key]
786	str	$s1,[$key,#4]
787	str	$s2,[$key,#8]
788	str	$s3,[$key,#12]
789	sub	$key,$key,$rounds,lsl#3
790___
791$mask80=$i1;
792$mask1b=$i2;
793$mask7f=$i3;
794$code.=<<___;
795	ldr	$s0,[$key,#16]!		@ prefetch tp1
796	mov	$mask80,#0x80
797	mov	$mask1b,#0x1b
798	orr	$mask80,$mask80,#0x8000
799	orr	$mask1b,$mask1b,#0x1b00
800	orr	$mask80,$mask80,$mask80,lsl#16
801	orr	$mask1b,$mask1b,$mask1b,lsl#16
802	sub	$rounds,$rounds,#1
803	mvn	$mask7f,$mask80
804	mov	$rounds,$rounds,lsl#2	@ (rounds-1)*4
805
806.Lmix:	and	$t1,$s0,$mask80
807	and	$s1,$s0,$mask7f
808	sub	$t1,$t1,$t1,lsr#7
809	and	$t1,$t1,$mask1b
810	eor	$s1,$t1,$s1,lsl#1	@ tp2
811
812	and	$t1,$s1,$mask80
813	and	$s2,$s1,$mask7f
814	sub	$t1,$t1,$t1,lsr#7
815	and	$t1,$t1,$mask1b
816	eor	$s2,$t1,$s2,lsl#1	@ tp4
817
818	and	$t1,$s2,$mask80
819	and	$s3,$s2,$mask7f
820	sub	$t1,$t1,$t1,lsr#7
821	and	$t1,$t1,$mask1b
822	eor	$s3,$t1,$s3,lsl#1	@ tp8
823
824	eor	$t1,$s1,$s2
825	eor	$t2,$s0,$s3		@ tp9
826	eor	$t1,$t1,$s3		@ tpe
827	eor	$t1,$t1,$s1,ror#24
828	eor	$t1,$t1,$t2,ror#24	@ ^= ROTATE(tpb=tp9^tp2,8)
829	eor	$t1,$t1,$s2,ror#16
830	eor	$t1,$t1,$t2,ror#16	@ ^= ROTATE(tpd=tp9^tp4,16)
831	eor	$t1,$t1,$t2,ror#8	@ ^= ROTATE(tp9,24)
832
833	ldr	$s0,[$key,#4]		@ prefetch tp1
834	str	$t1,[$key],#4
835	subs	$rounds,$rounds,#1
836	bne	.Lmix
837
838	mov	r0,#0
839#if __ARM_ARCH__>=5
840	ldmia	sp!,{r4-r12,pc}
841#else
842	ldmia   sp!,{r4-r12,lr}
843	tst	lr,#1
844	moveq	pc,lr			@ be binary compatible with V4, yet
845	bx	lr			@ interoperable with Thumb ISA:-)
846#endif
847.size	AES_set_enc2dec_key,.-AES_set_enc2dec_key
848
849.type	AES_Td,%object
850.align	5
851AES_Td:
852.word	0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96
853.word	0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393
854.word	0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25
855.word	0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f
856.word	0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1
857.word	0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6
858.word	0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da
859.word	0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844
860.word	0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd
861.word	0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4
862.word	0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45
863.word	0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94
864.word	0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7
865.word	0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a
866.word	0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5
867.word	0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c
868.word	0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1
869.word	0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a
870.word	0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75
871.word	0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051
872.word	0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46
873.word	0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff
874.word	0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77
875.word	0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb
876.word	0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000
877.word	0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e
878.word	0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927
879.word	0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a
880.word	0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e
881.word	0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16
882.word	0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d
883.word	0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8
884.word	0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd
885.word	0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34
886.word	0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163
887.word	0xd731dcca, 0x42638510, 0x13972240, 0x84c61120
888.word	0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d
889.word	0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0
890.word	0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422
891.word	0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef
892.word	0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36
893.word	0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4
894.word	0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662
895.word	0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5
896.word	0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3
897.word	0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b
898.word	0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8
899.word	0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6
900.word	0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6
901.word	0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0
902.word	0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815
903.word	0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f
904.word	0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df
905.word	0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f
906.word	0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e
907.word	0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713
908.word	0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89
909.word	0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c
910.word	0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf
911.word	0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86
912.word	0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f
913.word	0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541
914.word	0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190
915.word	0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
916@ Td4[256]
917.byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
918.byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
919.byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
920.byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
921.byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
922.byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
923.byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
924.byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
925.byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
926.byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
927.byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
928.byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
929.byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
930.byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
931.byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
932.byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
933.byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
934.byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
935.byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
936.byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
937.byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
938.byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
939.byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
940.byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
941.byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
942.byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
943.byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
944.byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
945.byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
946.byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
947.byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
948.byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
949.size	AES_Td,.-AES_Td
950
951@ void asm_AES_decrypt(const unsigned char *in, unsigned char *out,
952@ 		       const AES_KEY *key) {
953.global asm_AES_decrypt
954.hidden asm_AES_decrypt
955.type   asm_AES_decrypt,%function
956.align	5
957asm_AES_decrypt:
958#if __ARM_ARCH__<7
959	sub	r3,pc,#8		@ asm_AES_decrypt
960#else
961	adr	r3,asm_AES_decrypt
962#endif
963	stmdb   sp!,{r1,r4-r12,lr}
964	mov	$rounds,r0		@ inp
965	mov	$key,r2
966	sub	$tbl,r3,#asm_AES_decrypt-AES_Td		@ Td
967#if __ARM_ARCH__<7
968	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
969	ldrb	$t1,[$rounds,#2]	@ manner...
970	ldrb	$t2,[$rounds,#1]
971	ldrb	$t3,[$rounds,#0]
972	orr	$s0,$s0,$t1,lsl#8
973	ldrb	$s1,[$rounds,#7]
974	orr	$s0,$s0,$t2,lsl#16
975	ldrb	$t1,[$rounds,#6]
976	orr	$s0,$s0,$t3,lsl#24
977	ldrb	$t2,[$rounds,#5]
978	ldrb	$t3,[$rounds,#4]
979	orr	$s1,$s1,$t1,lsl#8
980	ldrb	$s2,[$rounds,#11]
981	orr	$s1,$s1,$t2,lsl#16
982	ldrb	$t1,[$rounds,#10]
983	orr	$s1,$s1,$t3,lsl#24
984	ldrb	$t2,[$rounds,#9]
985	ldrb	$t3,[$rounds,#8]
986	orr	$s2,$s2,$t1,lsl#8
987	ldrb	$s3,[$rounds,#15]
988	orr	$s2,$s2,$t2,lsl#16
989	ldrb	$t1,[$rounds,#14]
990	orr	$s2,$s2,$t3,lsl#24
991	ldrb	$t2,[$rounds,#13]
992	ldrb	$t3,[$rounds,#12]
993	orr	$s3,$s3,$t1,lsl#8
994	orr	$s3,$s3,$t2,lsl#16
995	orr	$s3,$s3,$t3,lsl#24
996#else
997	ldr	$s0,[$rounds,#0]
998	ldr	$s1,[$rounds,#4]
999	ldr	$s2,[$rounds,#8]
1000	ldr	$s3,[$rounds,#12]
1001#ifdef __ARMEL__
1002	rev	$s0,$s0
1003	rev	$s1,$s1
1004	rev	$s2,$s2
1005	rev	$s3,$s3
1006#endif
1007#endif
1008	bl	_armv4_AES_decrypt
1009
1010	ldr	$rounds,[sp],#4		@ pop out
1011#if __ARM_ARCH__>=7
1012#ifdef __ARMEL__
1013	rev	$s0,$s0
1014	rev	$s1,$s1
1015	rev	$s2,$s2
1016	rev	$s3,$s3
1017#endif
1018	str	$s0,[$rounds,#0]
1019	str	$s1,[$rounds,#4]
1020	str	$s2,[$rounds,#8]
1021	str	$s3,[$rounds,#12]
1022#else
1023	mov	$t1,$s0,lsr#24		@ write output in endian-neutral
1024	mov	$t2,$s0,lsr#16		@ manner...
1025	mov	$t3,$s0,lsr#8
1026	strb	$t1,[$rounds,#0]
1027	strb	$t2,[$rounds,#1]
1028	mov	$t1,$s1,lsr#24
1029	strb	$t3,[$rounds,#2]
1030	mov	$t2,$s1,lsr#16
1031	strb	$s0,[$rounds,#3]
1032	mov	$t3,$s1,lsr#8
1033	strb	$t1,[$rounds,#4]
1034	strb	$t2,[$rounds,#5]
1035	mov	$t1,$s2,lsr#24
1036	strb	$t3,[$rounds,#6]
1037	mov	$t2,$s2,lsr#16
1038	strb	$s1,[$rounds,#7]
1039	mov	$t3,$s2,lsr#8
1040	strb	$t1,[$rounds,#8]
1041	strb	$t2,[$rounds,#9]
1042	mov	$t1,$s3,lsr#24
1043	strb	$t3,[$rounds,#10]
1044	mov	$t2,$s3,lsr#16
1045	strb	$s2,[$rounds,#11]
1046	mov	$t3,$s3,lsr#8
1047	strb	$t1,[$rounds,#12]
1048	strb	$t2,[$rounds,#13]
1049	strb	$t3,[$rounds,#14]
1050	strb	$s3,[$rounds,#15]
1051#endif
1052#if __ARM_ARCH__>=5
1053	ldmia	sp!,{r4-r12,pc}
1054#else
1055	ldmia   sp!,{r4-r12,lr}
1056	tst	lr,#1
1057	moveq	pc,lr			@ be binary compatible with V4, yet
1058	bx	lr			@ interoperable with Thumb ISA:-)
1059#endif
1060.size	asm_AES_decrypt,.-asm_AES_decrypt
1061
1062.type   _armv4_AES_decrypt,%function
1063.align	2
1064_armv4_AES_decrypt:
1065	str	lr,[sp,#-4]!		@ push lr
1066	ldmia	$key!,{$t1-$i1}
1067	eor	$s0,$s0,$t1
1068	ldr	$rounds,[$key,#240-16]
1069	eor	$s1,$s1,$t2
1070	eor	$s2,$s2,$t3
1071	eor	$s3,$s3,$i1
1072	sub	$rounds,$rounds,#1
1073	mov	lr,#255
1074
1075	and	$i1,lr,$s0,lsr#16
1076	and	$i2,lr,$s0,lsr#8
1077	and	$i3,lr,$s0
1078	mov	$s0,$s0,lsr#24
1079.Ldec_loop:
1080	ldr	$t1,[$tbl,$i1,lsl#2]	@ Td1[s0>>16]
1081	and	$i1,lr,$s1		@ i0
1082	ldr	$t2,[$tbl,$i2,lsl#2]	@ Td2[s0>>8]
1083	and	$i2,lr,$s1,lsr#16
1084	ldr	$t3,[$tbl,$i3,lsl#2]	@ Td3[s0>>0]
1085	and	$i3,lr,$s1,lsr#8
1086	ldr	$s0,[$tbl,$s0,lsl#2]	@ Td0[s0>>24]
1087	mov	$s1,$s1,lsr#24
1088
1089	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td3[s1>>0]
1090	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td1[s1>>16]
1091	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td2[s1>>8]
1092	eor	$s0,$s0,$i1,ror#24
1093	ldr	$s1,[$tbl,$s1,lsl#2]	@ Td0[s1>>24]
1094	and	$i1,lr,$s2,lsr#8	@ i0
1095	eor	$t2,$i2,$t2,ror#8
1096	and	$i2,lr,$s2		@ i1
1097	eor	$t3,$i3,$t3,ror#8
1098	and	$i3,lr,$s2,lsr#16
1099	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td2[s2>>8]
1100	eor	$s1,$s1,$t1,ror#8
1101	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td3[s2>>0]
1102	mov	$s2,$s2,lsr#24
1103
1104	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td1[s2>>16]
1105	eor	$s0,$s0,$i1,ror#16
1106	ldr	$s2,[$tbl,$s2,lsl#2]	@ Td0[s2>>24]
1107	and	$i1,lr,$s3,lsr#16	@ i0
1108	eor	$s1,$s1,$i2,ror#24
1109	and	$i2,lr,$s3,lsr#8	@ i1
1110	eor	$t3,$i3,$t3,ror#8
1111	and	$i3,lr,$s3		@ i2
1112	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td1[s3>>16]
1113	eor	$s2,$s2,$t2,ror#8
1114	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td2[s3>>8]
1115	mov	$s3,$s3,lsr#24
1116
1117	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td3[s3>>0]
1118	eor	$s0,$s0,$i1,ror#8
1119	ldr	$i1,[$key],#16
1120	eor	$s1,$s1,$i2,ror#16
1121	ldr	$s3,[$tbl,$s3,lsl#2]	@ Td0[s3>>24]
1122	eor	$s2,$s2,$i3,ror#24
1123
1124	ldr	$t1,[$key,#-12]
1125	eor	$s0,$s0,$i1
1126	ldr	$t2,[$key,#-8]
1127	eor	$s3,$s3,$t3,ror#8
1128	ldr	$t3,[$key,#-4]
1129	and	$i1,lr,$s0,lsr#16
1130	eor	$s1,$s1,$t1
1131	and	$i2,lr,$s0,lsr#8
1132	eor	$s2,$s2,$t2
1133	and	$i3,lr,$s0
1134	eor	$s3,$s3,$t3
1135	mov	$s0,$s0,lsr#24
1136
1137	subs	$rounds,$rounds,#1
1138	bne	.Ldec_loop
1139
1140	add	$tbl,$tbl,#1024
1141
1142	ldr	$t2,[$tbl,#0]		@ prefetch Td4
1143	ldr	$t3,[$tbl,#32]
1144	ldr	$t1,[$tbl,#64]
1145	ldr	$t2,[$tbl,#96]
1146	ldr	$t3,[$tbl,#128]
1147	ldr	$t1,[$tbl,#160]
1148	ldr	$t2,[$tbl,#192]
1149	ldr	$t3,[$tbl,#224]
1150
1151	ldrb	$s0,[$tbl,$s0]		@ Td4[s0>>24]
1152	ldrb	$t1,[$tbl,$i1]		@ Td4[s0>>16]
1153	and	$i1,lr,$s1		@ i0
1154	ldrb	$t2,[$tbl,$i2]		@ Td4[s0>>8]
1155	and	$i2,lr,$s1,lsr#16
1156	ldrb	$t3,[$tbl,$i3]		@ Td4[s0>>0]
1157	and	$i3,lr,$s1,lsr#8
1158
1159	add	$s1,$tbl,$s1,lsr#24
1160	ldrb	$i1,[$tbl,$i1]		@ Td4[s1>>0]
1161	ldrb	$s1,[$s1]		@ Td4[s1>>24]
1162	ldrb	$i2,[$tbl,$i2]		@ Td4[s1>>16]
1163	eor	$s0,$i1,$s0,lsl#24
1164	ldrb	$i3,[$tbl,$i3]		@ Td4[s1>>8]
1165	eor	$s1,$t1,$s1,lsl#8
1166	and	$i1,lr,$s2,lsr#8	@ i0
1167	eor	$t2,$t2,$i2,lsl#8
1168	and	$i2,lr,$s2		@ i1
1169	ldrb	$i1,[$tbl,$i1]		@ Td4[s2>>8]
1170	eor	$t3,$t3,$i3,lsl#8
1171	ldrb	$i2,[$tbl,$i2]		@ Td4[s2>>0]
1172	and	$i3,lr,$s2,lsr#16
1173
1174	add	$s2,$tbl,$s2,lsr#24
1175	ldrb	$s2,[$s2]		@ Td4[s2>>24]
1176	eor	$s0,$s0,$i1,lsl#8
1177	ldrb	$i3,[$tbl,$i3]		@ Td4[s2>>16]
1178	eor	$s1,$i2,$s1,lsl#16
1179	and	$i1,lr,$s3,lsr#16	@ i0
1180	eor	$s2,$t2,$s2,lsl#16
1181	and	$i2,lr,$s3,lsr#8	@ i1
1182	ldrb	$i1,[$tbl,$i1]		@ Td4[s3>>16]
1183	eor	$t3,$t3,$i3,lsl#16
1184	ldrb	$i2,[$tbl,$i2]		@ Td4[s3>>8]
1185	and	$i3,lr,$s3		@ i2
1186
1187	add	$s3,$tbl,$s3,lsr#24
1188	ldrb	$i3,[$tbl,$i3]		@ Td4[s3>>0]
1189	ldrb	$s3,[$s3]		@ Td4[s3>>24]
1190	eor	$s0,$s0,$i1,lsl#16
1191	ldr	$i1,[$key,#0]
1192	eor	$s1,$s1,$i2,lsl#8
1193	ldr	$t1,[$key,#4]
1194	eor	$s2,$i3,$s2,lsl#8
1195	ldr	$t2,[$key,#8]
1196	eor	$s3,$t3,$s3,lsl#24
1197	ldr	$t3,[$key,#12]
1198
1199	eor	$s0,$s0,$i1
1200	eor	$s1,$s1,$t1
1201	eor	$s2,$s2,$t2
1202	eor	$s3,$s3,$t3
1203
1204	sub	$tbl,$tbl,#1024
1205	ldr	pc,[sp],#4		@ pop and return
1206.size	_armv4_AES_decrypt,.-_armv4_AES_decrypt
1207.asciz	"AES for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
1208.align	2
1209
1210#endif
1211___
1212
1213$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;	# make it possible to compile with -march=armv4
1214
1215open SELF,$0;
1216while(<SELF>) {
1217	next if (/^#!/);
1218	last if (!s/^#/@/ and !/^$/);
1219	print;
1220}
1221close SELF;
1222
1223print $code;
1224close STDOUT;	# enforce flush
1225