1#if defined(__arm__)
2#include <openssl/arm_arch.h>
3
4#if __ARM_MAX_ARCH__>=7
5.text
6.arch	armv7-a	@ don't confuse not-so-latest binutils with argv8 :-)
7.fpu	neon
8.code	32
9#undef	__thumb2__
10.align	5
11.Lrcon:
12.long	0x01,0x01,0x01,0x01
13.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	@ rotate-n-splat
14.long	0x1b,0x1b,0x1b,0x1b
15
16.globl	aes_hw_set_encrypt_key
17.hidden	aes_hw_set_encrypt_key
18.type	aes_hw_set_encrypt_key,%function
19.align	5
20aes_hw_set_encrypt_key:
21.Lenc_key:
22	mov	r3,#-1
23	cmp	r0,#0
24	beq	.Lenc_key_abort
25	cmp	r2,#0
26	beq	.Lenc_key_abort
27	mov	r3,#-2
28	cmp	r1,#128
29	blt	.Lenc_key_abort
30	cmp	r1,#256
31	bgt	.Lenc_key_abort
32	tst	r1,#0x3f
33	bne	.Lenc_key_abort
34
35	adr	r3,.Lrcon
36	cmp	r1,#192
37
38	veor	q0,q0,q0
39	vld1.8	{q3},[r0]!
40	mov	r1,#8		@ reuse r1
41	vld1.32	{q1,q2},[r3]!
42
43	blt	.Loop128
44	beq	.L192
45	b	.L256
46
47.align	4
48.Loop128:
49	vtbl.8	d20,{q3},d4
50	vtbl.8	d21,{q3},d5
51	vext.8	q9,q0,q3,#12
52	vst1.32	{q3},[r2]!
53.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
54	subs	r1,r1,#1
55
56	veor	q3,q3,q9
57	vext.8	q9,q0,q9,#12
58	veor	q3,q3,q9
59	vext.8	q9,q0,q9,#12
60	veor	q10,q10,q1
61	veor	q3,q3,q9
62	vshl.u8	q1,q1,#1
63	veor	q3,q3,q10
64	bne	.Loop128
65
66	vld1.32	{q1},[r3]
67
68	vtbl.8	d20,{q3},d4
69	vtbl.8	d21,{q3},d5
70	vext.8	q9,q0,q3,#12
71	vst1.32	{q3},[r2]!
72.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
73
74	veor	q3,q3,q9
75	vext.8	q9,q0,q9,#12
76	veor	q3,q3,q9
77	vext.8	q9,q0,q9,#12
78	veor	q10,q10,q1
79	veor	q3,q3,q9
80	vshl.u8	q1,q1,#1
81	veor	q3,q3,q10
82
83	vtbl.8	d20,{q3},d4
84	vtbl.8	d21,{q3},d5
85	vext.8	q9,q0,q3,#12
86	vst1.32	{q3},[r2]!
87.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
88
89	veor	q3,q3,q9
90	vext.8	q9,q0,q9,#12
91	veor	q3,q3,q9
92	vext.8	q9,q0,q9,#12
93	veor	q10,q10,q1
94	veor	q3,q3,q9
95	veor	q3,q3,q10
96	vst1.32	{q3},[r2]
97	add	r2,r2,#0x50
98
99	mov	r12,#10
100	b	.Ldone
101
102.align	4
103.L192:
104	vld1.8	{d16},[r0]!
105	vmov.i8	q10,#8			@ borrow q10
106	vst1.32	{q3},[r2]!
107	vsub.i8	q2,q2,q10	@ adjust the mask
108
109.Loop192:
110	vtbl.8	d20,{q8},d4
111	vtbl.8	d21,{q8},d5
112	vext.8	q9,q0,q3,#12
113	vst1.32	{d16},[r2]!
114.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
115	subs	r1,r1,#1
116
117	veor	q3,q3,q9
118	vext.8	q9,q0,q9,#12
119	veor	q3,q3,q9
120	vext.8	q9,q0,q9,#12
121	veor	q3,q3,q9
122
123	vdup.32	q9,d7[1]
124	veor	q9,q9,q8
125	veor	q10,q10,q1
126	vext.8	q8,q0,q8,#12
127	vshl.u8	q1,q1,#1
128	veor	q8,q8,q9
129	veor	q3,q3,q10
130	veor	q8,q8,q10
131	vst1.32	{q3},[r2]!
132	bne	.Loop192
133
134	mov	r12,#12
135	add	r2,r2,#0x20
136	b	.Ldone
137
138.align	4
139.L256:
140	vld1.8	{q8},[r0]
141	mov	r1,#7
142	mov	r12,#14
143	vst1.32	{q3},[r2]!
144
145.Loop256:
146	vtbl.8	d20,{q8},d4
147	vtbl.8	d21,{q8},d5
148	vext.8	q9,q0,q3,#12
149	vst1.32	{q8},[r2]!
150.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
151	subs	r1,r1,#1
152
153	veor	q3,q3,q9
154	vext.8	q9,q0,q9,#12
155	veor	q3,q3,q9
156	vext.8	q9,q0,q9,#12
157	veor	q10,q10,q1
158	veor	q3,q3,q9
159	vshl.u8	q1,q1,#1
160	veor	q3,q3,q10
161	vst1.32	{q3},[r2]!
162	beq	.Ldone
163
164	vdup.32	q10,d7[1]
165	vext.8	q9,q0,q8,#12
166.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
167
168	veor	q8,q8,q9
169	vext.8	q9,q0,q9,#12
170	veor	q8,q8,q9
171	vext.8	q9,q0,q9,#12
172	veor	q8,q8,q9
173
174	veor	q8,q8,q10
175	b	.Loop256
176
177.Ldone:
178	str	r12,[r2]
179	mov	r3,#0
180
181.Lenc_key_abort:
182	mov	r0,r3			@ return value
183
184	bx	lr
185.size	aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
186
187.globl	aes_hw_set_decrypt_key
188.hidden	aes_hw_set_decrypt_key
189.type	aes_hw_set_decrypt_key,%function
190.align	5
191aes_hw_set_decrypt_key:
192	stmdb	sp!,{r4,lr}
193	bl	.Lenc_key
194
195	cmp	r0,#0
196	bne	.Ldec_key_abort
197
198	sub	r2,r2,#240		@ restore original r2
199	mov	r4,#-16
200	add	r0,r2,r12,lsl#4	@ end of key schedule
201
202	vld1.32	{q0},[r2]
203	vld1.32	{q1},[r0]
204	vst1.32	{q0},[r0],r4
205	vst1.32	{q1},[r2]!
206
207.Loop_imc:
208	vld1.32	{q0},[r2]
209	vld1.32	{q1},[r0]
210.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
211.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
212	vst1.32	{q0},[r0],r4
213	vst1.32	{q1},[r2]!
214	cmp	r0,r2
215	bhi	.Loop_imc
216
217	vld1.32	{q0},[r2]
218.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
219	vst1.32	{q0},[r0]
220
221	eor	r0,r0,r0		@ return value
222.Ldec_key_abort:
223	ldmia	sp!,{r4,pc}
224.size	aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
225.globl	aes_hw_encrypt
226.hidden	aes_hw_encrypt
227.type	aes_hw_encrypt,%function
228.align	5
229aes_hw_encrypt:
230	ldr	r3,[r2,#240]
231	vld1.32	{q0},[r2]!
232	vld1.8	{q2},[r0]
233	sub	r3,r3,#2
234	vld1.32	{q1},[r2]!
235
236.Loop_enc:
237.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
238.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
239	vld1.32	{q0},[r2]!
240	subs	r3,r3,#2
241.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
242.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
243	vld1.32	{q1},[r2]!
244	bgt	.Loop_enc
245
246.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
247.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
248	vld1.32	{q0},[r2]
249.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
250	veor	q2,q2,q0
251
252	vst1.8	{q2},[r1]
253	bx	lr
254.size	aes_hw_encrypt,.-aes_hw_encrypt
255.globl	aes_hw_decrypt
256.hidden	aes_hw_decrypt
257.type	aes_hw_decrypt,%function
258.align	5
259aes_hw_decrypt:
260	ldr	r3,[r2,#240]
261	vld1.32	{q0},[r2]!
262	vld1.8	{q2},[r0]
263	sub	r3,r3,#2
264	vld1.32	{q1},[r2]!
265
266.Loop_dec:
267.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
268.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
269	vld1.32	{q0},[r2]!
270	subs	r3,r3,#2
271.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
272.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
273	vld1.32	{q1},[r2]!
274	bgt	.Loop_dec
275
276.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
277.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
278	vld1.32	{q0},[r2]
279.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
280	veor	q2,q2,q0
281
282	vst1.8	{q2},[r1]
283	bx	lr
284.size	aes_hw_decrypt,.-aes_hw_decrypt
285.globl	aes_hw_cbc_encrypt
286.hidden	aes_hw_cbc_encrypt
287.type	aes_hw_cbc_encrypt,%function
288.align	5
289aes_hw_cbc_encrypt:
290	mov	ip,sp
291	stmdb	sp!,{r4,r5,r6,r7,r8,lr}
292	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
293	ldmia	ip,{r4,r5}		@ load remaining args
294	subs	r2,r2,#16
295	mov	r8,#16
296	blo	.Lcbc_abort
297	moveq	r8,#0
298
299	cmp	r5,#0			@ en- or decrypting?
300	ldr	r5,[r3,#240]
301	and	r2,r2,#-16
302	vld1.8	{q6},[r4]
303	vld1.8	{q0},[r0],r8
304
305	vld1.32	{q8,q9},[r3]		@ load key schedule...
306	sub	r5,r5,#6
307	add	r7,r3,r5,lsl#4	@ pointer to last 7 round keys
308	sub	r5,r5,#2
309	vld1.32	{q10,q11},[r7]!
310	vld1.32	{q12,q13},[r7]!
311	vld1.32	{q14,q15},[r7]!
312	vld1.32	{q7},[r7]
313
314	add	r7,r3,#32
315	mov	r6,r5
316	beq	.Lcbc_dec
317
318	cmp	r5,#2
319	veor	q0,q0,q6
320	veor	q5,q8,q7
321	beq	.Lcbc_enc128
322
323	vld1.32	{q2,q3},[r7]
324	add	r7,r3,#16
325	add	r6,r3,#16*4
326	add	r12,r3,#16*5
327.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
328.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
329	add	r14,r3,#16*6
330	add	r3,r3,#16*7
331	b	.Lenter_cbc_enc
332
333.align	4
334.Loop_cbc_enc:
335.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
336.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
337	vst1.8	{q6},[r1]!
338.Lenter_cbc_enc:
339.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
340.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
341.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
342.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
343	vld1.32	{q8},[r6]
344	cmp	r5,#4
345.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
346.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
347	vld1.32	{q9},[r12]
348	beq	.Lcbc_enc192
349
350.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
351.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
352	vld1.32	{q8},[r14]
353.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
354.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
355	vld1.32	{q9},[r3]
356	nop
357
358.Lcbc_enc192:
359.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
360.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
361	subs	r2,r2,#16
362.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
363.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
364	moveq	r8,#0
365.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
366.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
367.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
368.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
369	vld1.8	{q8},[r0],r8
370.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
371.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
372	veor	q8,q8,q5
373.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
374.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
375	vld1.32	{q9},[r7]		@ re-pre-load rndkey[1]
376.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
377.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
378.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
379	veor	q6,q0,q7
380	bhs	.Loop_cbc_enc
381
382	vst1.8	{q6},[r1]!
383	b	.Lcbc_done
384
385.align	5
386.Lcbc_enc128:
387	vld1.32	{q2,q3},[r7]
388.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
389.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
390	b	.Lenter_cbc_enc128
391.Loop_cbc_enc128:
392.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
393.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
394	vst1.8	{q6},[r1]!
395.Lenter_cbc_enc128:
396.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
397.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
398	subs	r2,r2,#16
399.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
400.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
401	moveq	r8,#0
402.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
403.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
404.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
405.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
406.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
407.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
408	vld1.8	{q8},[r0],r8
409.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
410.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
411.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
412.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
413.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
414.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
415	veor	q8,q8,q5
416.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
417	veor	q6,q0,q7
418	bhs	.Loop_cbc_enc128
419
420	vst1.8	{q6},[r1]!
421	b	.Lcbc_done
422.align	5
423.Lcbc_dec:
424	vld1.8	{q10},[r0]!
425	subs	r2,r2,#32		@ bias
426	add	r6,r5,#2
427	vorr	q3,q0,q0
428	vorr	q1,q0,q0
429	vorr	q11,q10,q10
430	blo	.Lcbc_dec_tail
431
432	vorr	q1,q10,q10
433	vld1.8	{q10},[r0]!
434	vorr	q2,q0,q0
435	vorr	q3,q1,q1
436	vorr	q11,q10,q10
437
438.Loop3x_cbc_dec:
439.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
440.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
441.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
442.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
443.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
444.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
445	vld1.32	{q8},[r7]!
446	subs	r6,r6,#2
447.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
448.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
449.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
450.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
451.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
452.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
453	vld1.32	{q9},[r7]!
454	bgt	.Loop3x_cbc_dec
455
456.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
457.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
458.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
459.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
460.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
461.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
462	veor	q4,q6,q7
463	subs	r2,r2,#0x30
464	veor	q5,q2,q7
465	movlo	r6,r2			@ r6, r6, is zero at this point
466.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
467.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
468.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
469.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
470.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
471.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
472	veor	q9,q3,q7
473	add	r0,r0,r6		@ r0 is adjusted in such way that
474					@ at exit from the loop q1-q10
475					@ are loaded with last "words"
476	vorr	q6,q11,q11
477	mov	r7,r3
478.byte	0x68,0x03,0xb0,0xf3	@ aesd q0,q12
479.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
480.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
481.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
482.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
483.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
484	vld1.8	{q2},[r0]!
485.byte	0x6a,0x03,0xb0,0xf3	@ aesd q0,q13
486.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
487.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
488.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
489.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
490.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
491	vld1.8	{q3},[r0]!
492.byte	0x6c,0x03,0xb0,0xf3	@ aesd q0,q14
493.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
494.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
495.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
496.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
497.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
498	vld1.8	{q11},[r0]!
499.byte	0x6e,0x03,0xb0,0xf3	@ aesd q0,q15
500.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
501.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
502	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
503	add	r6,r5,#2
504	veor	q4,q4,q0
505	veor	q5,q5,q1
506	veor	q10,q10,q9
507	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
508	vst1.8	{q4},[r1]!
509	vorr	q0,q2,q2
510	vst1.8	{q5},[r1]!
511	vorr	q1,q3,q3
512	vst1.8	{q10},[r1]!
513	vorr	q10,q11,q11
514	bhs	.Loop3x_cbc_dec
515
516	cmn	r2,#0x30
517	beq	.Lcbc_done
518	nop
519
520.Lcbc_dec_tail:
521.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
522.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
523.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
524.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
525	vld1.32	{q8},[r7]!
526	subs	r6,r6,#2
527.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
528.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
529.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
530.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
531	vld1.32	{q9},[r7]!
532	bgt	.Lcbc_dec_tail
533
534.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
535.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
536.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
537.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
538.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
539.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
540.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
541.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
542.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
543.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
544.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
545.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
546	cmn	r2,#0x20
547.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
548.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
549.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
550.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
551	veor	q5,q6,q7
552.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
553.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
554.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
555.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
556	veor	q9,q3,q7
557.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
558.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
559	beq	.Lcbc_dec_one
560	veor	q5,q5,q1
561	veor	q9,q9,q10
562	vorr	q6,q11,q11
563	vst1.8	{q5},[r1]!
564	vst1.8	{q9},[r1]!
565	b	.Lcbc_done
566
567.Lcbc_dec_one:
568	veor	q5,q5,q10
569	vorr	q6,q11,q11
570	vst1.8	{q5},[r1]!
571
572.Lcbc_done:
573	vst1.8	{q6},[r4]
574.Lcbc_abort:
575	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
576	ldmia	sp!,{r4,r5,r6,r7,r8,pc}
577.size	aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
578.globl	aes_hw_ctr32_encrypt_blocks
579.hidden	aes_hw_ctr32_encrypt_blocks
580.type	aes_hw_ctr32_encrypt_blocks,%function
581.align	5
582aes_hw_ctr32_encrypt_blocks:
583	mov	ip,sp
584	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
585	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
586	ldr	r4, [ip]		@ load remaining arg
587	ldr	r5,[r3,#240]
588
589	ldr	r8, [r4, #12]
590	vld1.32	{q0},[r4]
591
592	vld1.32	{q8,q9},[r3]		@ load key schedule...
593	sub	r5,r5,#4
594	mov	r12,#16
595	cmp	r2,#2
596	add	r7,r3,r5,lsl#4	@ pointer to last 5 round keys
597	sub	r5,r5,#2
598	vld1.32	{q12,q13},[r7]!
599	vld1.32	{q14,q15},[r7]!
600	vld1.32	{q7},[r7]
601	add	r7,r3,#32
602	mov	r6,r5
603	movlo	r12,#0
604#ifndef __ARMEB__
605	rev	r8, r8
606#endif
607	vorr	q1,q0,q0
608	add	r10, r8, #1
609	vorr	q10,q0,q0
610	add	r8, r8, #2
611	vorr	q6,q0,q0
612	rev	r10, r10
613	vmov.32	d3[1],r10
614	bls	.Lctr32_tail
615	rev	r12, r8
616	sub	r2,r2,#3		@ bias
617	vmov.32	d21[1],r12
618	b	.Loop3x_ctr32
619
620.align	4
621.Loop3x_ctr32:
622.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
623.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
624.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
625.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
626.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
627.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
628	vld1.32	{q8},[r7]!
629	subs	r6,r6,#2
630.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
631.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
632.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
633.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
634.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
635.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
636	vld1.32	{q9},[r7]!
637	bgt	.Loop3x_ctr32
638
639.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
640.byte	0x80,0x83,0xb0,0xf3	@ aesmc q4,q0
641.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
642.byte	0x82,0xa3,0xb0,0xf3	@ aesmc q5,q1
643	vld1.8	{q2},[r0]!
644	vorr	q0,q6,q6
645.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
646.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
647	vld1.8	{q3},[r0]!
648	vorr	q1,q6,q6
649.byte	0x22,0x83,0xb0,0xf3	@ aese q4,q9
650.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
651.byte	0x22,0xa3,0xb0,0xf3	@ aese q5,q9
652.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
653	vld1.8	{q11},[r0]!
654	mov	r7,r3
655.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
656.byte	0xa4,0x23,0xf0,0xf3	@ aesmc q9,q10
657	vorr	q10,q6,q6
658	add	r9,r8,#1
659.byte	0x28,0x83,0xb0,0xf3	@ aese q4,q12
660.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
661.byte	0x28,0xa3,0xb0,0xf3	@ aese q5,q12
662.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
663	veor	q2,q2,q7
664	add	r10,r8,#2
665.byte	0x28,0x23,0xf0,0xf3	@ aese q9,q12
666.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
667	veor	q3,q3,q7
668	add	r8,r8,#3
669.byte	0x2a,0x83,0xb0,0xf3	@ aese q4,q13
670.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
671.byte	0x2a,0xa3,0xb0,0xf3	@ aese q5,q13
672.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
673	veor	q11,q11,q7
674	rev	r9,r9
675.byte	0x2a,0x23,0xf0,0xf3	@ aese q9,q13
676.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
677	vmov.32	d1[1], r9
678	rev	r10,r10
679.byte	0x2c,0x83,0xb0,0xf3	@ aese q4,q14
680.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
681.byte	0x2c,0xa3,0xb0,0xf3	@ aese q5,q14
682.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
683	vmov.32	d3[1], r10
684	rev	r12,r8
685.byte	0x2c,0x23,0xf0,0xf3	@ aese q9,q14
686.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
687	vmov.32	d21[1], r12
688	subs	r2,r2,#3
689.byte	0x2e,0x83,0xb0,0xf3	@ aese q4,q15
690.byte	0x2e,0xa3,0xb0,0xf3	@ aese q5,q15
691.byte	0x2e,0x23,0xf0,0xf3	@ aese q9,q15
692
693	veor	q2,q2,q4
694	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
695	vst1.8	{q2},[r1]!
696	veor	q3,q3,q5
697	mov	r6,r5
698	vst1.8	{q3},[r1]!
699	veor	q11,q11,q9
700	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
701	vst1.8	{q11},[r1]!
702	bhs	.Loop3x_ctr32
703
704	adds	r2,r2,#3
705	beq	.Lctr32_done
706	cmp	r2,#1
707	mov	r12,#16
708	moveq	r12,#0
709
710.Lctr32_tail:
711.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
712.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
713.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
714.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
715	vld1.32	{q8},[r7]!
716	subs	r6,r6,#2
717.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
718.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
719.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
720.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
721	vld1.32	{q9},[r7]!
722	bgt	.Lctr32_tail
723
724.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
725.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
726.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
727.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
728.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
729.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
730.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
731.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
732	vld1.8	{q2},[r0],r12
733.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
734.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
735.byte	0x28,0x23,0xb0,0xf3	@ aese q1,q12
736.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
737	vld1.8	{q3},[r0]
738.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
739.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
740.byte	0x2a,0x23,0xb0,0xf3	@ aese q1,q13
741.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
742	veor	q2,q2,q7
743.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
744.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
745.byte	0x2c,0x23,0xb0,0xf3	@ aese q1,q14
746.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
747	veor	q3,q3,q7
748.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
749.byte	0x2e,0x23,0xb0,0xf3	@ aese q1,q15
750
751	cmp	r2,#1
752	veor	q2,q2,q0
753	veor	q3,q3,q1
754	vst1.8	{q2},[r1]!
755	beq	.Lctr32_done
756	vst1.8	{q3},[r1]
757
758.Lctr32_done:
759	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
760	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
761.size	aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
762#endif
763#endif
764