1#define __ARM_ARCH__ __LINUX_ARM_ARCH__
2@ ====================================================================
3@ Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
4@ project. The module is, however, dual licensed under OpenSSL and
5@ CRYPTOGAMS licenses depending on where you obtain it. For further
6@ details see http://www.openssl.org/~appro/cryptogams/.
7@ ====================================================================
8
9@ AES for ARMv4
10
11@ January 2007.
12@
13@ Code uses single 1K S-box and is >2 times faster than code generated
14@ by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which
15@ allows to merge logical or arithmetic operation with shift or rotate
16@ in one instruction and emit combined result every cycle. The module
17@ is endian-neutral. The performance is ~42 cycles/byte for 128-bit
18@ key [on single-issue Xscale PXA250 core].
19
20@ May 2007.
21@
22@ AES_set_[en|de]crypt_key is added.
23
24@ July 2010.
25@
26@ Rescheduling for dual-issue pipeline resulted in 12% improvement on
27@ Cortex A8 core and ~25 cycles per byte processed with 128-bit key.
28
29@ February 2011.
30@
31@ Profiler-assisted and platform-specific optimization resulted in 16%
32@ improvement on Cortex A8 core and ~21.5 cycles per byte.
33
34@ A little glue here to select the correct code below for the ARM CPU
35@ that is being targetted.
36
37#include <linux/linkage.h>
38#include <asm/assembler.h>
39
40.text
41
42.type	AES_Te,%object
43.align	5
44AES_Te:
45.word	0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d
46.word	0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554
47.word	0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d
48.word	0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a
49.word	0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87
50.word	0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b
51.word	0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea
52.word	0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b
53.word	0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a
54.word	0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f
55.word	0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108
56.word	0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f
57.word	0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e
58.word	0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5
59.word	0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d
60.word	0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f
61.word	0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e
62.word	0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb
63.word	0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce
64.word	0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497
65.word	0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c
66.word	0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed
67.word	0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b
68.word	0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a
69.word	0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16
70.word	0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594
71.word	0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81
72.word	0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3
73.word	0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a
74.word	0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504
75.word	0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163
76.word	0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d
77.word	0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f
78.word	0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739
79.word	0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47
80.word	0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395
81.word	0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f
82.word	0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883
83.word	0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c
84.word	0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76
85.word	0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e
86.word	0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4
87.word	0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6
88.word	0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b
89.word	0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7
90.word	0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0
91.word	0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25
92.word	0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818
93.word	0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72
94.word	0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651
95.word	0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21
96.word	0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85
97.word	0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa
98.word	0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12
99.word	0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0
100.word	0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9
101.word	0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133
102.word	0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7
103.word	0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920
104.word	0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a
105.word	0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17
106.word	0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8
107.word	0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11
108.word	0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
109@ Te4[256]
110.byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
111.byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
112.byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
113.byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
114.byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
115.byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
116.byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
117.byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
118.byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
119.byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
120.byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
121.byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
122.byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
123.byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
124.byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
125.byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
126.byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
127.byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
128.byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
129.byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
130.byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
131.byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
132.byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
133.byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
134.byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
135.byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
136.byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
137.byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
138.byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
139.byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
140.byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
141.byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
142@ rcon[]
143.word	0x01000000, 0x02000000, 0x04000000, 0x08000000
144.word	0x10000000, 0x20000000, 0x40000000, 0x80000000
145.word	0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
146.size	AES_Te,.-AES_Te
147
148@ void AES_encrypt(const unsigned char *in, unsigned char *out,
149@ 		 const AES_KEY *key) {
150.align	5
151ENTRY(AES_encrypt)
152	adr	r3,AES_encrypt
153	stmdb   sp!,{r1,r4-r12,lr}
154	mov	r12,r0		@ inp
155	mov	r11,r2
156	sub	r10,r3,#AES_encrypt-AES_Te	@ Te
157#if __ARM_ARCH__<7
158	ldrb	r0,[r12,#3]	@ load input data in endian-neutral
159	ldrb	r4,[r12,#2]	@ manner...
160	ldrb	r5,[r12,#1]
161	ldrb	r6,[r12,#0]
162	orr	r0,r0,r4,lsl#8
163	ldrb	r1,[r12,#7]
164	orr	r0,r0,r5,lsl#16
165	ldrb	r4,[r12,#6]
166	orr	r0,r0,r6,lsl#24
167	ldrb	r5,[r12,#5]
168	ldrb	r6,[r12,#4]
169	orr	r1,r1,r4,lsl#8
170	ldrb	r2,[r12,#11]
171	orr	r1,r1,r5,lsl#16
172	ldrb	r4,[r12,#10]
173	orr	r1,r1,r6,lsl#24
174	ldrb	r5,[r12,#9]
175	ldrb	r6,[r12,#8]
176	orr	r2,r2,r4,lsl#8
177	ldrb	r3,[r12,#15]
178	orr	r2,r2,r5,lsl#16
179	ldrb	r4,[r12,#14]
180	orr	r2,r2,r6,lsl#24
181	ldrb	r5,[r12,#13]
182	ldrb	r6,[r12,#12]
183	orr	r3,r3,r4,lsl#8
184	orr	r3,r3,r5,lsl#16
185	orr	r3,r3,r6,lsl#24
186#else
187	ldr	r0,[r12,#0]
188	ldr	r1,[r12,#4]
189	ldr	r2,[r12,#8]
190	ldr	r3,[r12,#12]
191#ifdef __ARMEL__
192	rev	r0,r0
193	rev	r1,r1
194	rev	r2,r2
195	rev	r3,r3
196#endif
197#endif
198	bl	_armv4_AES_encrypt
199
200	ldr	r12,[sp],#4		@ pop out
201#if __ARM_ARCH__>=7
202#ifdef __ARMEL__
203	rev	r0,r0
204	rev	r1,r1
205	rev	r2,r2
206	rev	r3,r3
207#endif
208	str	r0,[r12,#0]
209	str	r1,[r12,#4]
210	str	r2,[r12,#8]
211	str	r3,[r12,#12]
212#else
213	mov	r4,r0,lsr#24		@ write output in endian-neutral
214	mov	r5,r0,lsr#16		@ manner...
215	mov	r6,r0,lsr#8
216	strb	r4,[r12,#0]
217	strb	r5,[r12,#1]
218	mov	r4,r1,lsr#24
219	strb	r6,[r12,#2]
220	mov	r5,r1,lsr#16
221	strb	r0,[r12,#3]
222	mov	r6,r1,lsr#8
223	strb	r4,[r12,#4]
224	strb	r5,[r12,#5]
225	mov	r4,r2,lsr#24
226	strb	r6,[r12,#6]
227	mov	r5,r2,lsr#16
228	strb	r1,[r12,#7]
229	mov	r6,r2,lsr#8
230	strb	r4,[r12,#8]
231	strb	r5,[r12,#9]
232	mov	r4,r3,lsr#24
233	strb	r6,[r12,#10]
234	mov	r5,r3,lsr#16
235	strb	r2,[r12,#11]
236	mov	r6,r3,lsr#8
237	strb	r4,[r12,#12]
238	strb	r5,[r12,#13]
239	strb	r6,[r12,#14]
240	strb	r3,[r12,#15]
241#endif
242	ldmia	sp!,{r4-r12,pc}
243ENDPROC(AES_encrypt)
244
245.type   _armv4_AES_encrypt,%function
246.align	2
247_armv4_AES_encrypt:
248	str	lr,[sp,#-4]!		@ push lr
249	ldmia	r11!,{r4-r7}
250	eor	r0,r0,r4
251	ldr	r12,[r11,#240-16]
252	eor	r1,r1,r5
253	eor	r2,r2,r6
254	eor	r3,r3,r7
255	sub	r12,r12,#1
256	mov	lr,#255
257
258	and	r7,lr,r0
259	and	r8,lr,r0,lsr#8
260	and	r9,lr,r0,lsr#16
261	mov	r0,r0,lsr#24
262.Lenc_loop:
263	ldr	r4,[r10,r7,lsl#2]	@ Te3[s0>>0]
264	and	r7,lr,r1,lsr#16	@ i0
265	ldr	r5,[r10,r8,lsl#2]	@ Te2[s0>>8]
266	and	r8,lr,r1
267	ldr	r6,[r10,r9,lsl#2]	@ Te1[s0>>16]
268	and	r9,lr,r1,lsr#8
269	ldr	r0,[r10,r0,lsl#2]	@ Te0[s0>>24]
270	mov	r1,r1,lsr#24
271
272	ldr	r7,[r10,r7,lsl#2]	@ Te1[s1>>16]
273	ldr	r8,[r10,r8,lsl#2]	@ Te3[s1>>0]
274	ldr	r9,[r10,r9,lsl#2]	@ Te2[s1>>8]
275	eor	r0,r0,r7,ror#8
276	ldr	r1,[r10,r1,lsl#2]	@ Te0[s1>>24]
277	and	r7,lr,r2,lsr#8	@ i0
278	eor	r5,r5,r8,ror#8
279	and	r8,lr,r2,lsr#16	@ i1
280	eor	r6,r6,r9,ror#8
281	and	r9,lr,r2
282	ldr	r7,[r10,r7,lsl#2]	@ Te2[s2>>8]
283	eor	r1,r1,r4,ror#24
284	ldr	r8,[r10,r8,lsl#2]	@ Te1[s2>>16]
285	mov	r2,r2,lsr#24
286
287	ldr	r9,[r10,r9,lsl#2]	@ Te3[s2>>0]
288	eor	r0,r0,r7,ror#16
289	ldr	r2,[r10,r2,lsl#2]	@ Te0[s2>>24]
290	and	r7,lr,r3		@ i0
291	eor	r1,r1,r8,ror#8
292	and	r8,lr,r3,lsr#8	@ i1
293	eor	r6,r6,r9,ror#16
294	and	r9,lr,r3,lsr#16	@ i2
295	ldr	r7,[r10,r7,lsl#2]	@ Te3[s3>>0]
296	eor	r2,r2,r5,ror#16
297	ldr	r8,[r10,r8,lsl#2]	@ Te2[s3>>8]
298	mov	r3,r3,lsr#24
299
300	ldr	r9,[r10,r9,lsl#2]	@ Te1[s3>>16]
301	eor	r0,r0,r7,ror#24
302	ldr	r7,[r11],#16
303	eor	r1,r1,r8,ror#16
304	ldr	r3,[r10,r3,lsl#2]	@ Te0[s3>>24]
305	eor	r2,r2,r9,ror#8
306	ldr	r4,[r11,#-12]
307	eor	r3,r3,r6,ror#8
308
309	ldr	r5,[r11,#-8]
310	eor	r0,r0,r7
311	ldr	r6,[r11,#-4]
312	and	r7,lr,r0
313	eor	r1,r1,r4
314	and	r8,lr,r0,lsr#8
315	eor	r2,r2,r5
316	and	r9,lr,r0,lsr#16
317	eor	r3,r3,r6
318	mov	r0,r0,lsr#24
319
320	subs	r12,r12,#1
321	bne	.Lenc_loop
322
323	add	r10,r10,#2
324
325	ldrb	r4,[r10,r7,lsl#2]	@ Te4[s0>>0]
326	and	r7,lr,r1,lsr#16	@ i0
327	ldrb	r5,[r10,r8,lsl#2]	@ Te4[s0>>8]
328	and	r8,lr,r1
329	ldrb	r6,[r10,r9,lsl#2]	@ Te4[s0>>16]
330	and	r9,lr,r1,lsr#8
331	ldrb	r0,[r10,r0,lsl#2]	@ Te4[s0>>24]
332	mov	r1,r1,lsr#24
333
334	ldrb	r7,[r10,r7,lsl#2]	@ Te4[s1>>16]
335	ldrb	r8,[r10,r8,lsl#2]	@ Te4[s1>>0]
336	ldrb	r9,[r10,r9,lsl#2]	@ Te4[s1>>8]
337	eor	r0,r7,r0,lsl#8
338	ldrb	r1,[r10,r1,lsl#2]	@ Te4[s1>>24]
339	and	r7,lr,r2,lsr#8	@ i0
340	eor	r5,r8,r5,lsl#8
341	and	r8,lr,r2,lsr#16	@ i1
342	eor	r6,r9,r6,lsl#8
343	and	r9,lr,r2
344	ldrb	r7,[r10,r7,lsl#2]	@ Te4[s2>>8]
345	eor	r1,r4,r1,lsl#24
346	ldrb	r8,[r10,r8,lsl#2]	@ Te4[s2>>16]
347	mov	r2,r2,lsr#24
348
349	ldrb	r9,[r10,r9,lsl#2]	@ Te4[s2>>0]
350	eor	r0,r7,r0,lsl#8
351	ldrb	r2,[r10,r2,lsl#2]	@ Te4[s2>>24]
352	and	r7,lr,r3		@ i0
353	eor	r1,r1,r8,lsl#16
354	and	r8,lr,r3,lsr#8	@ i1
355	eor	r6,r9,r6,lsl#8
356	and	r9,lr,r3,lsr#16	@ i2
357	ldrb	r7,[r10,r7,lsl#2]	@ Te4[s3>>0]
358	eor	r2,r5,r2,lsl#24
359	ldrb	r8,[r10,r8,lsl#2]	@ Te4[s3>>8]
360	mov	r3,r3,lsr#24
361
362	ldrb	r9,[r10,r9,lsl#2]	@ Te4[s3>>16]
363	eor	r0,r7,r0,lsl#8
364	ldr	r7,[r11,#0]
365	ldrb	r3,[r10,r3,lsl#2]	@ Te4[s3>>24]
366	eor	r1,r1,r8,lsl#8
367	ldr	r4,[r11,#4]
368	eor	r2,r2,r9,lsl#16
369	ldr	r5,[r11,#8]
370	eor	r3,r6,r3,lsl#24
371	ldr	r6,[r11,#12]
372
373	eor	r0,r0,r7
374	eor	r1,r1,r4
375	eor	r2,r2,r5
376	eor	r3,r3,r6
377
378	sub	r10,r10,#2
379	ldr	pc,[sp],#4		@ pop and return
380.size	_armv4_AES_encrypt,.-_armv4_AES_encrypt
381
382.align	5
383ENTRY(private_AES_set_encrypt_key)
384_armv4_AES_set_encrypt_key:
385	adr	r3,_armv4_AES_set_encrypt_key
386	teq	r0,#0
387	moveq	r0,#-1
388	beq	.Labrt
389	teq	r2,#0
390	moveq	r0,#-1
391	beq	.Labrt
392
393	teq	r1,#128
394	beq	.Lok
395	teq	r1,#192
396	beq	.Lok
397	teq	r1,#256
398	movne	r0,#-1
399	bne	.Labrt
400
401.Lok:	stmdb   sp!,{r4-r12,lr}
402	sub	r10,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024	@ Te4
403
404	mov	r12,r0		@ inp
405	mov	lr,r1			@ bits
406	mov	r11,r2			@ key
407
408#if __ARM_ARCH__<7
409	ldrb	r0,[r12,#3]	@ load input data in endian-neutral
410	ldrb	r4,[r12,#2]	@ manner...
411	ldrb	r5,[r12,#1]
412	ldrb	r6,[r12,#0]
413	orr	r0,r0,r4,lsl#8
414	ldrb	r1,[r12,#7]
415	orr	r0,r0,r5,lsl#16
416	ldrb	r4,[r12,#6]
417	orr	r0,r0,r6,lsl#24
418	ldrb	r5,[r12,#5]
419	ldrb	r6,[r12,#4]
420	orr	r1,r1,r4,lsl#8
421	ldrb	r2,[r12,#11]
422	orr	r1,r1,r5,lsl#16
423	ldrb	r4,[r12,#10]
424	orr	r1,r1,r6,lsl#24
425	ldrb	r5,[r12,#9]
426	ldrb	r6,[r12,#8]
427	orr	r2,r2,r4,lsl#8
428	ldrb	r3,[r12,#15]
429	orr	r2,r2,r5,lsl#16
430	ldrb	r4,[r12,#14]
431	orr	r2,r2,r6,lsl#24
432	ldrb	r5,[r12,#13]
433	ldrb	r6,[r12,#12]
434	orr	r3,r3,r4,lsl#8
435	str	r0,[r11],#16
436	orr	r3,r3,r5,lsl#16
437	str	r1,[r11,#-12]
438	orr	r3,r3,r6,lsl#24
439	str	r2,[r11,#-8]
440	str	r3,[r11,#-4]
441#else
442	ldr	r0,[r12,#0]
443	ldr	r1,[r12,#4]
444	ldr	r2,[r12,#8]
445	ldr	r3,[r12,#12]
446#ifdef __ARMEL__
447	rev	r0,r0
448	rev	r1,r1
449	rev	r2,r2
450	rev	r3,r3
451#endif
452	str	r0,[r11],#16
453	str	r1,[r11,#-12]
454	str	r2,[r11,#-8]
455	str	r3,[r11,#-4]
456#endif
457
458	teq	lr,#128
459	bne	.Lnot128
460	mov	r12,#10
461	str	r12,[r11,#240-16]
462	add	r6,r10,#256			@ rcon
463	mov	lr,#255
464
465.L128_loop:
466	and	r5,lr,r3,lsr#24
467	and	r7,lr,r3,lsr#16
468	ldrb	r5,[r10,r5]
469	and	r8,lr,r3,lsr#8
470	ldrb	r7,[r10,r7]
471	and	r9,lr,r3
472	ldrb	r8,[r10,r8]
473	orr	r5,r5,r7,lsl#24
474	ldrb	r9,[r10,r9]
475	orr	r5,r5,r8,lsl#16
476	ldr	r4,[r6],#4			@ rcon[i++]
477	orr	r5,r5,r9,lsl#8
478	eor	r5,r5,r4
479	eor	r0,r0,r5			@ rk[4]=rk[0]^...
480	eor	r1,r1,r0			@ rk[5]=rk[1]^rk[4]
481	str	r0,[r11],#16
482	eor	r2,r2,r1			@ rk[6]=rk[2]^rk[5]
483	str	r1,[r11,#-12]
484	eor	r3,r3,r2			@ rk[7]=rk[3]^rk[6]
485	str	r2,[r11,#-8]
486	subs	r12,r12,#1
487	str	r3,[r11,#-4]
488	bne	.L128_loop
489	sub	r2,r11,#176
490	b	.Ldone
491
492.Lnot128:
493#if __ARM_ARCH__<7
494	ldrb	r8,[r12,#19]
495	ldrb	r4,[r12,#18]
496	ldrb	r5,[r12,#17]
497	ldrb	r6,[r12,#16]
498	orr	r8,r8,r4,lsl#8
499	ldrb	r9,[r12,#23]
500	orr	r8,r8,r5,lsl#16
501	ldrb	r4,[r12,#22]
502	orr	r8,r8,r6,lsl#24
503	ldrb	r5,[r12,#21]
504	ldrb	r6,[r12,#20]
505	orr	r9,r9,r4,lsl#8
506	orr	r9,r9,r5,lsl#16
507	str	r8,[r11],#8
508	orr	r9,r9,r6,lsl#24
509	str	r9,[r11,#-4]
510#else
511	ldr	r8,[r12,#16]
512	ldr	r9,[r12,#20]
513#ifdef __ARMEL__
514	rev	r8,r8
515	rev	r9,r9
516#endif
517	str	r8,[r11],#8
518	str	r9,[r11,#-4]
519#endif
520
521	teq	lr,#192
522	bne	.Lnot192
523	mov	r12,#12
524	str	r12,[r11,#240-24]
525	add	r6,r10,#256			@ rcon
526	mov	lr,#255
527	mov	r12,#8
528
529.L192_loop:
530	and	r5,lr,r9,lsr#24
531	and	r7,lr,r9,lsr#16
532	ldrb	r5,[r10,r5]
533	and	r8,lr,r9,lsr#8
534	ldrb	r7,[r10,r7]
535	and	r9,lr,r9
536	ldrb	r8,[r10,r8]
537	orr	r5,r5,r7,lsl#24
538	ldrb	r9,[r10,r9]
539	orr	r5,r5,r8,lsl#16
540	ldr	r4,[r6],#4			@ rcon[i++]
541	orr	r5,r5,r9,lsl#8
542	eor	r9,r5,r4
543	eor	r0,r0,r9			@ rk[6]=rk[0]^...
544	eor	r1,r1,r0			@ rk[7]=rk[1]^rk[6]
545	str	r0,[r11],#24
546	eor	r2,r2,r1			@ rk[8]=rk[2]^rk[7]
547	str	r1,[r11,#-20]
548	eor	r3,r3,r2			@ rk[9]=rk[3]^rk[8]
549	str	r2,[r11,#-16]
550	subs	r12,r12,#1
551	str	r3,[r11,#-12]
552	subeq	r2,r11,#216
553	beq	.Ldone
554
555	ldr	r7,[r11,#-32]
556	ldr	r8,[r11,#-28]
557	eor	r7,r7,r3			@ rk[10]=rk[4]^rk[9]
558	eor	r9,r8,r7			@ rk[11]=rk[5]^rk[10]
559	str	r7,[r11,#-8]
560	str	r9,[r11,#-4]
561	b	.L192_loop
562
563.Lnot192:
564#if __ARM_ARCH__<7
565	ldrb	r8,[r12,#27]
566	ldrb	r4,[r12,#26]
567	ldrb	r5,[r12,#25]
568	ldrb	r6,[r12,#24]
569	orr	r8,r8,r4,lsl#8
570	ldrb	r9,[r12,#31]
571	orr	r8,r8,r5,lsl#16
572	ldrb	r4,[r12,#30]
573	orr	r8,r8,r6,lsl#24
574	ldrb	r5,[r12,#29]
575	ldrb	r6,[r12,#28]
576	orr	r9,r9,r4,lsl#8
577	orr	r9,r9,r5,lsl#16
578	str	r8,[r11],#8
579	orr	r9,r9,r6,lsl#24
580	str	r9,[r11,#-4]
581#else
582	ldr	r8,[r12,#24]
583	ldr	r9,[r12,#28]
584#ifdef __ARMEL__
585	rev	r8,r8
586	rev	r9,r9
587#endif
588	str	r8,[r11],#8
589	str	r9,[r11,#-4]
590#endif
591
592	mov	r12,#14
593	str	r12,[r11,#240-32]
594	add	r6,r10,#256			@ rcon
595	mov	lr,#255
596	mov	r12,#7
597
598.L256_loop:
599	and	r5,lr,r9,lsr#24
600	and	r7,lr,r9,lsr#16
601	ldrb	r5,[r10,r5]
602	and	r8,lr,r9,lsr#8
603	ldrb	r7,[r10,r7]
604	and	r9,lr,r9
605	ldrb	r8,[r10,r8]
606	orr	r5,r5,r7,lsl#24
607	ldrb	r9,[r10,r9]
608	orr	r5,r5,r8,lsl#16
609	ldr	r4,[r6],#4			@ rcon[i++]
610	orr	r5,r5,r9,lsl#8
611	eor	r9,r5,r4
612	eor	r0,r0,r9			@ rk[8]=rk[0]^...
613	eor	r1,r1,r0			@ rk[9]=rk[1]^rk[8]
614	str	r0,[r11],#32
615	eor	r2,r2,r1			@ rk[10]=rk[2]^rk[9]
616	str	r1,[r11,#-28]
617	eor	r3,r3,r2			@ rk[11]=rk[3]^rk[10]
618	str	r2,[r11,#-24]
619	subs	r12,r12,#1
620	str	r3,[r11,#-20]
621	subeq	r2,r11,#256
622	beq	.Ldone
623
624	and	r5,lr,r3
625	and	r7,lr,r3,lsr#8
626	ldrb	r5,[r10,r5]
627	and	r8,lr,r3,lsr#16
628	ldrb	r7,[r10,r7]
629	and	r9,lr,r3,lsr#24
630	ldrb	r8,[r10,r8]
631	orr	r5,r5,r7,lsl#8
632	ldrb	r9,[r10,r9]
633	orr	r5,r5,r8,lsl#16
634	ldr	r4,[r11,#-48]
635	orr	r5,r5,r9,lsl#24
636
637	ldr	r7,[r11,#-44]
638	ldr	r8,[r11,#-40]
639	eor	r4,r4,r5			@ rk[12]=rk[4]^...
640	ldr	r9,[r11,#-36]
641	eor	r7,r7,r4			@ rk[13]=rk[5]^rk[12]
642	str	r4,[r11,#-16]
643	eor	r8,r8,r7			@ rk[14]=rk[6]^rk[13]
644	str	r7,[r11,#-12]
645	eor	r9,r9,r8			@ rk[15]=rk[7]^rk[14]
646	str	r8,[r11,#-8]
647	str	r9,[r11,#-4]
648	b	.L256_loop
649
650.Ldone:	mov	r0,#0
651	ldmia   sp!,{r4-r12,lr}
652.Labrt:	ret	lr
653ENDPROC(private_AES_set_encrypt_key)
654
655.align	5
656ENTRY(private_AES_set_decrypt_key)
657	str	lr,[sp,#-4]!            @ push lr
658#if 0
659	@ kernel does both of these in setkey so optimise this bit out by
660	@ expecting the key to already have the enc_key work done (see aes_glue.c)
661	bl	_armv4_AES_set_encrypt_key
662#else
663	mov	r0,#0
664#endif
665	teq	r0,#0
666	ldrne	lr,[sp],#4              @ pop lr
667	bne	.Labrt
668
669	stmdb   sp!,{r4-r12}
670
671	ldr	r12,[r2,#240]	@ AES_set_encrypt_key preserves r2,
672	mov	r11,r2			@ which is AES_KEY *key
673	mov	r7,r2
674	add	r8,r2,r12,lsl#4
675
676.Linv:	ldr	r0,[r7]
677	ldr	r1,[r7,#4]
678	ldr	r2,[r7,#8]
679	ldr	r3,[r7,#12]
680	ldr	r4,[r8]
681	ldr	r5,[r8,#4]
682	ldr	r6,[r8,#8]
683	ldr	r9,[r8,#12]
684	str	r0,[r8],#-16
685	str	r1,[r8,#16+4]
686	str	r2,[r8,#16+8]
687	str	r3,[r8,#16+12]
688	str	r4,[r7],#16
689	str	r5,[r7,#-12]
690	str	r6,[r7,#-8]
691	str	r9,[r7,#-4]
692	teq	r7,r8
693	bne	.Linv
694	ldr	r0,[r11,#16]!		@ prefetch tp1
695	mov	r7,#0x80
696	mov	r8,#0x1b
697	orr	r7,r7,#0x8000
698	orr	r8,r8,#0x1b00
699	orr	r7,r7,r7,lsl#16
700	orr	r8,r8,r8,lsl#16
701	sub	r12,r12,#1
702	mvn	r9,r7
703	mov	r12,r12,lsl#2	@ (rounds-1)*4
704
705.Lmix:	and	r4,r0,r7
706	and	r1,r0,r9
707	sub	r4,r4,r4,lsr#7
708	and	r4,r4,r8
709	eor	r1,r4,r1,lsl#1	@ tp2
710
711	and	r4,r1,r7
712	and	r2,r1,r9
713	sub	r4,r4,r4,lsr#7
714	and	r4,r4,r8
715	eor	r2,r4,r2,lsl#1	@ tp4
716
717	and	r4,r2,r7
718	and	r3,r2,r9
719	sub	r4,r4,r4,lsr#7
720	and	r4,r4,r8
721	eor	r3,r4,r3,lsl#1	@ tp8
722
723	eor	r4,r1,r2
724	eor	r5,r0,r3		@ tp9
725	eor	r4,r4,r3		@ tpe
726	eor	r4,r4,r1,ror#24
727	eor	r4,r4,r5,ror#24	@ ^= ROTATE(tpb=tp9^tp2,8)
728	eor	r4,r4,r2,ror#16
729	eor	r4,r4,r5,ror#16	@ ^= ROTATE(tpd=tp9^tp4,16)
730	eor	r4,r4,r5,ror#8	@ ^= ROTATE(tp9,24)
731
732	ldr	r0,[r11,#4]		@ prefetch tp1
733	str	r4,[r11],#4
734	subs	r12,r12,#1
735	bne	.Lmix
736
737	mov	r0,#0
738	ldmia	sp!,{r4-r12,pc}
739ENDPROC(private_AES_set_decrypt_key)
740
741.type	AES_Td,%object
742.align	5
743AES_Td:
744.word	0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96
745.word	0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393
746.word	0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25
747.word	0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f
748.word	0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1
749.word	0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6
750.word	0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da
751.word	0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844
752.word	0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd
753.word	0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4
754.word	0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45
755.word	0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94
756.word	0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7
757.word	0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a
758.word	0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5
759.word	0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c
760.word	0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1
761.word	0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a
762.word	0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75
763.word	0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051
764.word	0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46
765.word	0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff
766.word	0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77
767.word	0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb
768.word	0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000
769.word	0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e
770.word	0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927
771.word	0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a
772.word	0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e
773.word	0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16
774.word	0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d
775.word	0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8
776.word	0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd
777.word	0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34
778.word	0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163
779.word	0xd731dcca, 0x42638510, 0x13972240, 0x84c61120
780.word	0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d
781.word	0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0
782.word	0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422
783.word	0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef
784.word	0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36
785.word	0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4
786.word	0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662
787.word	0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5
788.word	0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3
789.word	0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b
790.word	0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8
791.word	0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6
792.word	0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6
793.word	0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0
794.word	0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815
795.word	0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f
796.word	0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df
797.word	0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f
798.word	0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e
799.word	0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713
800.word	0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89
801.word	0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c
802.word	0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf
803.word	0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86
804.word	0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f
805.word	0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541
806.word	0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190
807.word	0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
808@ Td4[256]
809.byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
810.byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
811.byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
812.byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
813.byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
814.byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
815.byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
816.byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
817.byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
818.byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
819.byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
820.byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
821.byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
822.byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
823.byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
824.byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
825.byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
826.byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
827.byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
828.byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
829.byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
830.byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
831.byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
832.byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
833.byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
834.byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
835.byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
836.byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
837.byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
838.byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
839.byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
840.byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
841.size	AES_Td,.-AES_Td
842
843@ void AES_decrypt(const unsigned char *in, unsigned char *out,
844@ 		 const AES_KEY *key) {
845.align	5
846ENTRY(AES_decrypt)
847	adr	r3,AES_decrypt
848	stmdb   sp!,{r1,r4-r12,lr}
849	mov	r12,r0		@ inp
850	mov	r11,r2
851	sub	r10,r3,#AES_decrypt-AES_Td		@ Td
852#if __ARM_ARCH__<7
853	ldrb	r0,[r12,#3]	@ load input data in endian-neutral
854	ldrb	r4,[r12,#2]	@ manner...
855	ldrb	r5,[r12,#1]
856	ldrb	r6,[r12,#0]
857	orr	r0,r0,r4,lsl#8
858	ldrb	r1,[r12,#7]
859	orr	r0,r0,r5,lsl#16
860	ldrb	r4,[r12,#6]
861	orr	r0,r0,r6,lsl#24
862	ldrb	r5,[r12,#5]
863	ldrb	r6,[r12,#4]
864	orr	r1,r1,r4,lsl#8
865	ldrb	r2,[r12,#11]
866	orr	r1,r1,r5,lsl#16
867	ldrb	r4,[r12,#10]
868	orr	r1,r1,r6,lsl#24
869	ldrb	r5,[r12,#9]
870	ldrb	r6,[r12,#8]
871	orr	r2,r2,r4,lsl#8
872	ldrb	r3,[r12,#15]
873	orr	r2,r2,r5,lsl#16
874	ldrb	r4,[r12,#14]
875	orr	r2,r2,r6,lsl#24
876	ldrb	r5,[r12,#13]
877	ldrb	r6,[r12,#12]
878	orr	r3,r3,r4,lsl#8
879	orr	r3,r3,r5,lsl#16
880	orr	r3,r3,r6,lsl#24
881#else
882	ldr	r0,[r12,#0]
883	ldr	r1,[r12,#4]
884	ldr	r2,[r12,#8]
885	ldr	r3,[r12,#12]
886#ifdef __ARMEL__
887	rev	r0,r0
888	rev	r1,r1
889	rev	r2,r2
890	rev	r3,r3
891#endif
892#endif
893	bl	_armv4_AES_decrypt
894
895	ldr	r12,[sp],#4		@ pop out
896#if __ARM_ARCH__>=7
897#ifdef __ARMEL__
898	rev	r0,r0
899	rev	r1,r1
900	rev	r2,r2
901	rev	r3,r3
902#endif
903	str	r0,[r12,#0]
904	str	r1,[r12,#4]
905	str	r2,[r12,#8]
906	str	r3,[r12,#12]
907#else
908	mov	r4,r0,lsr#24		@ write output in endian-neutral
909	mov	r5,r0,lsr#16		@ manner...
910	mov	r6,r0,lsr#8
911	strb	r4,[r12,#0]
912	strb	r5,[r12,#1]
913	mov	r4,r1,lsr#24
914	strb	r6,[r12,#2]
915	mov	r5,r1,lsr#16
916	strb	r0,[r12,#3]
917	mov	r6,r1,lsr#8
918	strb	r4,[r12,#4]
919	strb	r5,[r12,#5]
920	mov	r4,r2,lsr#24
921	strb	r6,[r12,#6]
922	mov	r5,r2,lsr#16
923	strb	r1,[r12,#7]
924	mov	r6,r2,lsr#8
925	strb	r4,[r12,#8]
926	strb	r5,[r12,#9]
927	mov	r4,r3,lsr#24
928	strb	r6,[r12,#10]
929	mov	r5,r3,lsr#16
930	strb	r2,[r12,#11]
931	mov	r6,r3,lsr#8
932	strb	r4,[r12,#12]
933	strb	r5,[r12,#13]
934	strb	r6,[r12,#14]
935	strb	r3,[r12,#15]
936#endif
937	ldmia	sp!,{r4-r12,pc}
938ENDPROC(AES_decrypt)
939
940.type   _armv4_AES_decrypt,%function
941.align	2
942_armv4_AES_decrypt:
943	str	lr,[sp,#-4]!		@ push lr
944	ldmia	r11!,{r4-r7}
945	eor	r0,r0,r4
946	ldr	r12,[r11,#240-16]
947	eor	r1,r1,r5
948	eor	r2,r2,r6
949	eor	r3,r3,r7
950	sub	r12,r12,#1
951	mov	lr,#255
952
953	and	r7,lr,r0,lsr#16
954	and	r8,lr,r0,lsr#8
955	and	r9,lr,r0
956	mov	r0,r0,lsr#24
957.Ldec_loop:
958	ldr	r4,[r10,r7,lsl#2]	@ Td1[s0>>16]
959	and	r7,lr,r1		@ i0
960	ldr	r5,[r10,r8,lsl#2]	@ Td2[s0>>8]
961	and	r8,lr,r1,lsr#16
962	ldr	r6,[r10,r9,lsl#2]	@ Td3[s0>>0]
963	and	r9,lr,r1,lsr#8
964	ldr	r0,[r10,r0,lsl#2]	@ Td0[s0>>24]
965	mov	r1,r1,lsr#24
966
967	ldr	r7,[r10,r7,lsl#2]	@ Td3[s1>>0]
968	ldr	r8,[r10,r8,lsl#2]	@ Td1[s1>>16]
969	ldr	r9,[r10,r9,lsl#2]	@ Td2[s1>>8]
970	eor	r0,r0,r7,ror#24
971	ldr	r1,[r10,r1,lsl#2]	@ Td0[s1>>24]
972	and	r7,lr,r2,lsr#8	@ i0
973	eor	r5,r8,r5,ror#8
974	and	r8,lr,r2		@ i1
975	eor	r6,r9,r6,ror#8
976	and	r9,lr,r2,lsr#16
977	ldr	r7,[r10,r7,lsl#2]	@ Td2[s2>>8]
978	eor	r1,r1,r4,ror#8
979	ldr	r8,[r10,r8,lsl#2]	@ Td3[s2>>0]
980	mov	r2,r2,lsr#24
981
982	ldr	r9,[r10,r9,lsl#2]	@ Td1[s2>>16]
983	eor	r0,r0,r7,ror#16
984	ldr	r2,[r10,r2,lsl#2]	@ Td0[s2>>24]
985	and	r7,lr,r3,lsr#16	@ i0
986	eor	r1,r1,r8,ror#24
987	and	r8,lr,r3,lsr#8	@ i1
988	eor	r6,r9,r6,ror#8
989	and	r9,lr,r3		@ i2
990	ldr	r7,[r10,r7,lsl#2]	@ Td1[s3>>16]
991	eor	r2,r2,r5,ror#8
992	ldr	r8,[r10,r8,lsl#2]	@ Td2[s3>>8]
993	mov	r3,r3,lsr#24
994
995	ldr	r9,[r10,r9,lsl#2]	@ Td3[s3>>0]
996	eor	r0,r0,r7,ror#8
997	ldr	r7,[r11],#16
998	eor	r1,r1,r8,ror#16
999	ldr	r3,[r10,r3,lsl#2]	@ Td0[s3>>24]
1000	eor	r2,r2,r9,ror#24
1001
1002	ldr	r4,[r11,#-12]
1003	eor	r0,r0,r7
1004	ldr	r5,[r11,#-8]
1005	eor	r3,r3,r6,ror#8
1006	ldr	r6,[r11,#-4]
1007	and	r7,lr,r0,lsr#16
1008	eor	r1,r1,r4
1009	and	r8,lr,r0,lsr#8
1010	eor	r2,r2,r5
1011	and	r9,lr,r0
1012	eor	r3,r3,r6
1013	mov	r0,r0,lsr#24
1014
1015	subs	r12,r12,#1
1016	bne	.Ldec_loop
1017
1018	add	r10,r10,#1024
1019
1020	ldr	r5,[r10,#0]		@ prefetch Td4
1021	ldr	r6,[r10,#32]
1022	ldr	r4,[r10,#64]
1023	ldr	r5,[r10,#96]
1024	ldr	r6,[r10,#128]
1025	ldr	r4,[r10,#160]
1026	ldr	r5,[r10,#192]
1027	ldr	r6,[r10,#224]
1028
1029	ldrb	r0,[r10,r0]		@ Td4[s0>>24]
1030	ldrb	r4,[r10,r7]		@ Td4[s0>>16]
1031	and	r7,lr,r1		@ i0
1032	ldrb	r5,[r10,r8]		@ Td4[s0>>8]
1033	and	r8,lr,r1,lsr#16
1034	ldrb	r6,[r10,r9]		@ Td4[s0>>0]
1035	and	r9,lr,r1,lsr#8
1036
1037	ldrb	r7,[r10,r7]		@ Td4[s1>>0]
1038 ARM(	ldrb	r1,[r10,r1,lsr#24]  )	@ Td4[s1>>24]
1039 THUMB(	add	r1,r10,r1,lsr#24    ) 	@ Td4[s1>>24]
1040 THUMB(	ldrb	r1,[r1]		    )
1041	ldrb	r8,[r10,r8]		@ Td4[s1>>16]
1042	eor	r0,r7,r0,lsl#24
1043	ldrb	r9,[r10,r9]		@ Td4[s1>>8]
1044	eor	r1,r4,r1,lsl#8
1045	and	r7,lr,r2,lsr#8	@ i0
1046	eor	r5,r5,r8,lsl#8
1047	and	r8,lr,r2		@ i1
1048	ldrb	r7,[r10,r7]		@ Td4[s2>>8]
1049	eor	r6,r6,r9,lsl#8
1050	ldrb	r8,[r10,r8]		@ Td4[s2>>0]
1051	and	r9,lr,r2,lsr#16
1052
1053 ARM(	ldrb	r2,[r10,r2,lsr#24]  )	@ Td4[s2>>24]
1054 THUMB(	add	r2,r10,r2,lsr#24    )	@ Td4[s2>>24]
1055 THUMB(	ldrb	r2,[r2]		    )
1056	eor	r0,r0,r7,lsl#8
1057	ldrb	r9,[r10,r9]		@ Td4[s2>>16]
1058	eor	r1,r8,r1,lsl#16
1059	and	r7,lr,r3,lsr#16	@ i0
1060	eor	r2,r5,r2,lsl#16
1061	and	r8,lr,r3,lsr#8	@ i1
1062	ldrb	r7,[r10,r7]		@ Td4[s3>>16]
1063	eor	r6,r6,r9,lsl#16
1064	ldrb	r8,[r10,r8]		@ Td4[s3>>8]
1065	and	r9,lr,r3		@ i2
1066
1067	ldrb	r9,[r10,r9]		@ Td4[s3>>0]
1068 ARM(	ldrb	r3,[r10,r3,lsr#24]  )	@ Td4[s3>>24]
1069 THUMB(	add	r3,r10,r3,lsr#24    )	@ Td4[s3>>24]
1070 THUMB(	ldrb	r3,[r3]		    )
1071	eor	r0,r0,r7,lsl#16
1072	ldr	r7,[r11,#0]
1073	eor	r1,r1,r8,lsl#8
1074	ldr	r4,[r11,#4]
1075	eor	r2,r9,r2,lsl#8
1076	ldr	r5,[r11,#8]
1077	eor	r3,r6,r3,lsl#24
1078	ldr	r6,[r11,#12]
1079
1080	eor	r0,r0,r7
1081	eor	r1,r1,r4
1082	eor	r2,r2,r5
1083	eor	r3,r3,r6
1084
1085	sub	r10,r10,#1024
1086	ldr	pc,[sp],#4		@ pop and return
1087.size	_armv4_AES_decrypt,.-_armv4_AES_decrypt
1088.asciz	"AES for ARMv4, CRYPTOGAMS by <appro@openssl.org>"
1089.align	2
1090