18ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan#include <openssl/arm_arch.h>
28ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
38ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.text
4927a49544eb76fe28bcca2552db0168fd2efc502Robert Sloan#if defined(__thumb2__) || defined(__clang__)
58ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.syntax	unified
6927a49544eb76fe28bcca2552db0168fd2efc502Robert Sloan#endif
7927a49544eb76fe28bcca2552db0168fd2efc502Robert Sloan#if defined(__thumb2__)
88ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.thumb
98ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan#else
108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.code	32
118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan#endif
128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan#if defined(__thumb2__) || defined(__clang__)
148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan#define ldrhsb	ldrbhs
158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan#endif
168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.align	5
188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert SloanLsigma:
198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.long	0x61707865,0x3320646e,0x79622d32,0x6b206574	@ endian-neutral
208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert SloanLone:
218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.long	1,0,0,0
228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan#if __ARM_MAX_ARCH__>=7
238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert SloanLOPENSSL_armcap:
248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.word	OPENSSL_armcap_P-LChaCha20_ctr32
258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan#else
268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.word	-1
278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan#endif
288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.globl	_ChaCha20_ctr32
308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.private_extern	_ChaCha20_ctr32
318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan#ifdef __thumb2__
328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.thumb_func	_ChaCha20_ctr32
338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan#endif
348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.align	5
358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan_ChaCha20_ctr32:
368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert SloanLChaCha20_ctr32:
378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r12,[sp,#0]		@ pull pointer to counter and nonce
388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan#if __ARM_ARCH__<7 && !defined(__thumb2__)
408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	sub	r14,pc,#16		@ _ChaCha20_ctr32
418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan#else
428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	adr	r14,LChaCha20_ctr32
438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan#endif
448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	cmp	r2,#0			@ len==0?
458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan#ifdef	__thumb2__
468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	eq
478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan#endif
488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	addeq	sp,sp,#4*3
498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	beq	Lno_data
508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan#if __ARM_MAX_ARCH__>=7
518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	cmp	r2,#192			@ test len
528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	bls	Lshort
538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r4,[r14,#-32]
548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r4,[r14,r4]
558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__APPLE__
568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r4,[r4]
578ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
588ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	tst	r4,#ARMV7_NEON
598ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	bne	LChaCha20_neon
608ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert SloanLshort:
618ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan#endif
628ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldmia	r12,{r4,r5,r6,r7}		@ load counter and nonce
638ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	sub	sp,sp,#4*(16)		@ off-load area
648ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	sub	r14,r14,#64		@ Lsigma
658ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	stmdb	sp!,{r4,r5,r6,r7}		@ copy counter and nonce
668ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}		@ load key
678ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldmia	r14,{r0,r1,r2,r3}		@ load sigma
688ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11}		@ copy key
698ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	stmdb	sp!,{r0,r1,r2,r3}		@ copy sigma
708ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r10,[sp,#4*(16+10)]	@ off-load "rx"
718ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r11,[sp,#4*(16+11)]	@ off-load "rx"
728ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	b	Loop_outer_enter
738ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
748ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.align	4
758ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert SloanLoop_outer:
768ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldmia	sp,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}		@ load key material
778ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r11,[sp,#4*(32+2)]	@ save len
788ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r12,  [sp,#4*(32+1)]	@ save inp
798ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r14,  [sp,#4*(32+0)]	@ save out
808ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert SloanLoop_outer_enter:
818ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r11, [sp,#4*(15)]
828ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r12,[sp,#4*(12)]	@ modulo-scheduled load
838ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r10, [sp,#4*(13)]
848ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r14,[sp,#4*(14)]
858ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r11, [sp,#4*(16+15)]
868ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r11,#10
878ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	b	Loop
888ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
898ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.align	4
908ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert SloanLoop:
918ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	subs	r11,r11,#1
928ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r0,r0,r4
938ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r12,r12,ror#16
948ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r1,r1,r5
958ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r10,r10,ror#16
968ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r12,r12,r0,ror#16
978ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r10,r10,r1,ror#16
988ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r8,r8,r12
998ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r4,r4,ror#20
1008ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r9,r9,r10
1018ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r5,r5,ror#20
1028ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r4,r4,r8,ror#20
1038ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r5,r5,r9,ror#20
1048ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r0,r0,r4
1058ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r12,r12,ror#24
1068ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r1,r1,r5
1078ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r10,r10,ror#24
1088ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r12,r12,r0,ror#24
1098ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r10,r10,r1,ror#24
1108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r8,r8,r12
1118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r4,r4,ror#25
1128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r9,r9,r10
1138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r5,r5,ror#25
1148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r10,[sp,#4*(16+13)]
1158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r10,[sp,#4*(16+15)]
1168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r4,r4,r8,ror#25
1178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r5,r5,r9,ror#25
1188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r8,[sp,#4*(16+8)]
1198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r8,[sp,#4*(16+10)]
1208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r2,r2,r6
1218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r14,r14,ror#16
1228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r9,[sp,#4*(16+9)]
1238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r9,[sp,#4*(16+11)]
1248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r3,r3,r7
1258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r10,r10,ror#16
1268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r14,r14,r2,ror#16
1278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r10,r10,r3,ror#16
1288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r8,r8,r14
1298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r6,r6,ror#20
1308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r9,r9,r10
1318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r7,r7,ror#20
1328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r6,r6,r8,ror#20
1338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r7,r7,r9,ror#20
1348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r2,r2,r6
1358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r14,r14,ror#24
1368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r3,r3,r7
1378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r10,r10,ror#24
1388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r14,r14,r2,ror#24
1398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r10,r10,r3,ror#24
1408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r8,r8,r14
1418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r6,r6,ror#25
1428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r9,r9,r10
1438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r7,r7,ror#25
1448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r6,r6,r8,ror#25
1458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r7,r7,r9,ror#25
1468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r0,r0,r5
1478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r10,r10,ror#16
1488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r1,r1,r6
1498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r12,r12,ror#16
1508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r10,r10,r0,ror#16
1518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r12,r12,r1,ror#16
1528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r8,r8,r10
1538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r5,r5,ror#20
1548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r9,r9,r12
1558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r6,r6,ror#20
1568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r5,r5,r8,ror#20
1578ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r6,r6,r9,ror#20
1588ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r0,r0,r5
1598ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r10,r10,ror#24
1608ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r1,r1,r6
1618ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r12,r12,ror#24
1628ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r10,r10,r0,ror#24
1638ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r12,r12,r1,ror#24
1648ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r8,r8,r10
1658ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r5,r5,ror#25
1668ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r10,[sp,#4*(16+15)]
1678ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r10,[sp,#4*(16+13)]
1688ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r9,r9,r12
1698ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r6,r6,ror#25
1708ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r5,r5,r8,ror#25
1718ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r6,r6,r9,ror#25
1728ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r8,[sp,#4*(16+10)]
1738ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r8,[sp,#4*(16+8)]
1748ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r2,r2,r7
1758ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r10,r10,ror#16
1768ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r9,[sp,#4*(16+11)]
1778ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r9,[sp,#4*(16+9)]
1788ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r3,r3,r4
1798ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r14,r14,ror#16
1808ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r10,r10,r2,ror#16
1818ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r14,r14,r3,ror#16
1828ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r8,r8,r10
1838ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r7,r7,ror#20
1848ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r9,r9,r14
1858ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r4,r4,ror#20
1868ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r7,r7,r8,ror#20
1878ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r4,r4,r9,ror#20
1888ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r2,r2,r7
1898ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r10,r10,ror#24
1908ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r3,r3,r4
1918ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r14,r14,ror#24
1928ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r10,r10,r2,ror#24
1938ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r14,r14,r3,ror#24
1948ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r8,r8,r10
1958ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r7,r7,ror#25
1968ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r9,r9,r14
1978ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r4,r4,ror#25
1988ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r7,r7,r8,ror#25
1998ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r4,r4,r9,ror#25
2008ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	bne	Loop
2018ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
2028ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r11,[sp,#4*(32+2)]	@ load len
2038ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
2048ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r8, [sp,#4*(16+8)]	@ modulo-scheduled store
2058ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r9, [sp,#4*(16+9)]
2068ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r12,[sp,#4*(16+12)]
2078ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r10, [sp,#4*(16+13)]
2088ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r14,[sp,#4*(16+14)]
2098ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
2108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	@ at this point we have first half of 512-bit result in
2118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	@ rx and second half at sp+4*(16+8)
2128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
2138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	cmp	r11,#64		@ done yet?
2148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan#ifdef	__thumb2__
2158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itete	lo
2168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan#endif
2178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	addlo	r12,sp,#4*(0)		@ shortcut or ...
2188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhs	r12,[sp,#4*(32+1)]	@ ... load inp
2198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	addlo	r14,sp,#4*(0)		@ shortcut or ...
2208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhs	r14,[sp,#4*(32+0)]	@ ... load out
2218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
2228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r8,[sp,#4*(0)]	@ load key material
2238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r9,[sp,#4*(1)]
2248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
2258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan#if __ARM_ARCH__>=6 || !defined(__ARMEB__)
2268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# if __ARM_ARCH__<7
2278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	orr	r10,r12,r14
2288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	tst	r10,#3		@ are input and output aligned?
2298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r10,[sp,#4*(2)]
2308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	bne	Lunaligned
2318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	cmp	r11,#64		@ restore flags
2328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# else
2338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r10,[sp,#4*(2)]
2348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
2358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r11,[sp,#4*(3)]
2368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
2378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r0,r0,r8	@ accumulate key material
2388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r1,r1,r9
2398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
2408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
2418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
2428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhs	r8,[r12],#16		@ load input
2438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhs	r9,[r12,#-12]
2448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
2458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r2,r2,r10
2468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r3,r3,r11
2478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
2488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
2498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
2508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhs	r10,[r12,#-8]
2518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhs	r11,[r12,#-4]
2528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# if __ARM_ARCH__>=6 && defined(__ARMEB__)
2538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r0,r0
2548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r1,r1
2558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r2,r2
2568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r3,r3
2578ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
2588ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
2598ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
2608ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
2618ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eorhs	r0,r0,r8	@ xor with input
2628ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eorhs	r1,r1,r9
2638ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r8,sp,#4*(4)
2648ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r0,[r14],#16		@ store output
2658ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
2668ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
2678ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
2688ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eorhs	r2,r2,r10
2698ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eorhs	r3,r3,r11
2708ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldmia	r8,{r8,r9,r10,r11}	@ load key material
2718ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r1,[r14,#-12]
2728ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r2,[r14,#-8]
2738ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r3,[r14,#-4]
2748ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
2758ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r4,r4,r8	@ accumulate key material
2768ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r5,r5,r9
2778ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
2788ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
2798ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
2808ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhs	r8,[r12],#16		@ load input
2818ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhs	r9,[r12,#-12]
2828ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r6,r6,r10
2838ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r7,r7,r11
2848ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
2858ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
2868ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
2878ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhs	r10,[r12,#-8]
2888ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhs	r11,[r12,#-4]
2898ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# if __ARM_ARCH__>=6 && defined(__ARMEB__)
2908ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r4,r4
2918ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r5,r5
2928ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r6,r6
2938ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r7,r7
2948ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
2958ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
2968ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
2978ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
2988ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eorhs	r4,r4,r8
2998ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eorhs	r5,r5,r9
3008ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r8,sp,#4*(8)
3018ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r4,[r14],#16		@ store output
3028ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
3038ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
3048ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
3058ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eorhs	r6,r6,r10
3068ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eorhs	r7,r7,r11
3078ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r5,[r14,#-12]
3088ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldmia	r8,{r8,r9,r10,r11}	@ load key material
3098ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r6,[r14,#-8]
3108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r0,sp,#4*(16+8)
3118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r7,[r14,#-4]
3128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
3138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldmia	r0,{r0,r1,r2,r3,r4,r5,r6,r7}	@ load second half
3148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
3158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r0,r0,r8	@ accumulate key material
3168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r1,r1,r9
3178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
3188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
3198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
3208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhs	r8,[r12],#16		@ load input
3218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhs	r9,[r12,#-12]
3228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
3238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hi
3248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
3258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strhi	r10,[sp,#4*(16+10)]	@ copy "rx" while at it
3268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strhi	r11,[sp,#4*(16+11)]	@ copy "rx" while at it
3278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r2,r2,r10
3288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r3,r3,r11
3298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
3308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
3318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
3328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhs	r10,[r12,#-8]
3338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhs	r11,[r12,#-4]
3348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# if __ARM_ARCH__>=6 && defined(__ARMEB__)
3358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r0,r0
3368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r1,r1
3378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r2,r2
3388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r3,r3
3398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
3408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
3418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
3428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
3438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eorhs	r0,r0,r8
3448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eorhs	r1,r1,r9
3458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r8,sp,#4*(12)
3468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r0,[r14],#16		@ store output
3478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
3488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
3498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
3508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eorhs	r2,r2,r10
3518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eorhs	r3,r3,r11
3528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r1,[r14,#-12]
3538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldmia	r8,{r8,r9,r10,r11}	@ load key material
3548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r2,[r14,#-8]
3558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r3,[r14,#-4]
3568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
3578ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r4,r4,r8	@ accumulate key material
3588ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r5,r5,r9
3598ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
3608ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hi
3618ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
3628ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	addhi	r8,r8,#1		@ next counter value
3638ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strhi	r8,[sp,#4*(12)]	@ save next counter value
3648ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
3658ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
3668ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
3678ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhs	r8,[r12],#16		@ load input
3688ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhs	r9,[r12,#-12]
3698ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r6,r6,r10
3708ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r7,r7,r11
3718ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
3728ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
3738ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
3748ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhs	r10,[r12,#-8]
3758ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhs	r11,[r12,#-4]
3768ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# if __ARM_ARCH__>=6 && defined(__ARMEB__)
3778ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r4,r4
3788ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r5,r5
3798ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r6,r6
3808ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r7,r7
3818ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
3828ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
3838ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
3848ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
3858ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eorhs	r4,r4,r8
3868ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eorhs	r5,r5,r9
3878ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
3888ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	it	ne
3898ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
3908ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrne	r8,[sp,#4*(32+2)]	@ re-load len
3918ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
3928ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
3938ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
3948ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eorhs	r6,r6,r10
3958ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eorhs	r7,r7,r11
3968ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r4,[r14],#16		@ store output
3978ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r5,[r14,#-12]
3988ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
3998ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	it	hs
4008ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
4018ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	subhs	r11,r8,#64		@ len-=64
4028ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r6,[r14,#-8]
4038ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r7,[r14,#-4]
4048ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	bhi	Loop_outer
4058ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
4068ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	beq	Ldone
4078ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# if __ARM_ARCH__<7
4088ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	b	Ltail
4098ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
4108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.align	4
4118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert SloanLunaligned:@ unaligned endian-neutral path
4128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	cmp	r11,#64		@ restore flags
4138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
4148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan#endif
4158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan#if __ARM_ARCH__<7
4168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r11,[sp,#4*(3)]
4178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r0,r0,r8		@ accumulate key material
4188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r1,r1,r9
4198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r2,r2,r10
4208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
4218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itete	lo
4228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
4238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eorlo	r8,r8,r8		@ zero or ...
4248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r8,[r12],#16			@ ... load input
4258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eorlo	r9,r9,r9
4268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r9,[r12,#-12]
4278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
4288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r3,r3,r11
4298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
4308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itete	lo
4318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
4328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eorlo	r10,r10,r10
4338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r10,[r12,#-8]
4348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eorlo	r11,r11,r11
4358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r11,[r12,#-4]
4368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
4378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r0,r8,r0		@ xor with input (or zero)
4388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r1,r9,r1
4398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
4408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
4418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
4428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r8,[r12,#-15]		@ load more input
4438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r9,[r12,#-11]
4448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r2,r10,r2
4458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r0,[r14],#16		@ store output
4468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r3,r11,r3
4478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
4488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
4498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
4508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r10,[r12,#-7]
4518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r11,[r12,#-3]
4528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r1,[r14,#-12]
4538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r0,r8,r0,lsr#8
4548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r2,[r14,#-8]
4558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r1,r9,r1,lsr#8
4568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
4578ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
4588ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
4598ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r8,[r12,#-14]		@ load more input
4608ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r9,[r12,#-10]
4618ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r3,[r14,#-4]
4628ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r2,r10,r2,lsr#8
4638ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r0,[r14,#-15]
4648ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r3,r11,r3,lsr#8
4658ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
4668ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
4678ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
4688ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r10,[r12,#-6]
4698ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r11,[r12,#-2]
4708ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r1,[r14,#-11]
4718ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r0,r8,r0,lsr#8
4728ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r2,[r14,#-7]
4738ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r1,r9,r1,lsr#8
4748ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
4758ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
4768ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
4778ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r8,[r12,#-13]		@ load more input
4788ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r9,[r12,#-9]
4798ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r3,[r14,#-3]
4808ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r2,r10,r2,lsr#8
4818ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r0,[r14,#-14]
4828ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r3,r11,r3,lsr#8
4838ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
4848ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
4858ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
4868ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r10,[r12,#-5]
4878ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r11,[r12,#-1]
4888ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r1,[r14,#-10]
4898ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r2,[r14,#-6]
4908ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r0,r8,r0,lsr#8
4918ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r3,[r14,#-2]
4928ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r1,r9,r1,lsr#8
4938ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r0,[r14,#-13]
4948ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r2,r10,r2,lsr#8
4958ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r1,[r14,#-9]
4968ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r3,r11,r3,lsr#8
4978ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r2,[r14,#-5]
4988ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r3,[r14,#-1]
4998ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r8,sp,#4*(4+0)
5008ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldmia	r8,{r8,r9,r10,r11}		@ load key material
5018ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r0,sp,#4*(16+8)
5028ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r4,r4,r8		@ accumulate key material
5038ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r5,r5,r9
5048ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r6,r6,r10
5058ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
5068ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itete	lo
5078ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
5088ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eorlo	r8,r8,r8		@ zero or ...
5098ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r8,[r12],#16			@ ... load input
5108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eorlo	r9,r9,r9
5118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r9,[r12,#-12]
5128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
5138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r7,r7,r11
5148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
5158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itete	lo
5168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
5178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eorlo	r10,r10,r10
5188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r10,[r12,#-8]
5198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eorlo	r11,r11,r11
5208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r11,[r12,#-4]
5218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
5228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r4,r8,r4		@ xor with input (or zero)
5238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r5,r9,r5
5248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
5258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
5268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
5278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r8,[r12,#-15]		@ load more input
5288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r9,[r12,#-11]
5298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r6,r10,r6
5308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r4,[r14],#16		@ store output
5318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r7,r11,r7
5328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
5338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
5348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
5358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r10,[r12,#-7]
5368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r11,[r12,#-3]
5378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r5,[r14,#-12]
5388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r4,r8,r4,lsr#8
5398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r6,[r14,#-8]
5408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r5,r9,r5,lsr#8
5418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
5428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
5438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
5448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r8,[r12,#-14]		@ load more input
5458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r9,[r12,#-10]
5468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r7,[r14,#-4]
5478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r6,r10,r6,lsr#8
5488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r4,[r14,#-15]
5498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r7,r11,r7,lsr#8
5508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
5518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
5528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
5538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r10,[r12,#-6]
5548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r11,[r12,#-2]
5558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r5,[r14,#-11]
5568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r4,r8,r4,lsr#8
5578ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r6,[r14,#-7]
5588ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r5,r9,r5,lsr#8
5598ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
5608ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
5618ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
5628ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r8,[r12,#-13]		@ load more input
5638ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r9,[r12,#-9]
5648ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r7,[r14,#-3]
5658ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r6,r10,r6,lsr#8
5668ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r4,[r14,#-14]
5678ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r7,r11,r7,lsr#8
5688ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
5698ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
5708ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
5718ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r10,[r12,#-5]
5728ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r11,[r12,#-1]
5738ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r5,[r14,#-10]
5748ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r6,[r14,#-6]
5758ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r4,r8,r4,lsr#8
5768ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r7,[r14,#-2]
5778ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r5,r9,r5,lsr#8
5788ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r4,[r14,#-13]
5798ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r6,r10,r6,lsr#8
5808ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r5,[r14,#-9]
5818ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r7,r11,r7,lsr#8
5828ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r6,[r14,#-5]
5838ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r7,[r14,#-1]
5848ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r8,sp,#4*(4+4)
5858ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldmia	r8,{r8,r9,r10,r11}		@ load key material
5868ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldmia	r0,{r0,r1,r2,r3,r4,r5,r6,r7}		@ load second half
5878ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
5888ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hi
5898ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
5908ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strhi	r10,[sp,#4*(16+10)]		@ copy "rx"
5918ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strhi	r11,[sp,#4*(16+11)]		@ copy "rx"
5928ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r0,r0,r8		@ accumulate key material
5938ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r1,r1,r9
5948ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r2,r2,r10
5958ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
5968ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itete	lo
5978ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
5988ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eorlo	r8,r8,r8		@ zero or ...
5998ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r8,[r12],#16			@ ... load input
6008ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eorlo	r9,r9,r9
6018ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r9,[r12,#-12]
6028ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
6038ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r3,r3,r11
6048ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
6058ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itete	lo
6068ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
6078ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eorlo	r10,r10,r10
6088ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r10,[r12,#-8]
6098ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eorlo	r11,r11,r11
6108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r11,[r12,#-4]
6118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
6128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r0,r8,r0		@ xor with input (or zero)
6138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r1,r9,r1
6148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
6158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
6168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
6178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r8,[r12,#-15]		@ load more input
6188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r9,[r12,#-11]
6198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r2,r10,r2
6208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r0,[r14],#16		@ store output
6218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r3,r11,r3
6228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
6238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
6248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
6258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r10,[r12,#-7]
6268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r11,[r12,#-3]
6278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r1,[r14,#-12]
6288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r0,r8,r0,lsr#8
6298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r2,[r14,#-8]
6308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r1,r9,r1,lsr#8
6318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
6328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
6338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
6348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r8,[r12,#-14]		@ load more input
6358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r9,[r12,#-10]
6368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r3,[r14,#-4]
6378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r2,r10,r2,lsr#8
6388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r0,[r14,#-15]
6398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r3,r11,r3,lsr#8
6408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
6418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
6428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
6438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r10,[r12,#-6]
6448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r11,[r12,#-2]
6458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r1,[r14,#-11]
6468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r0,r8,r0,lsr#8
6478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r2,[r14,#-7]
6488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r1,r9,r1,lsr#8
6498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
6508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
6518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
6528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r8,[r12,#-13]		@ load more input
6538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r9,[r12,#-9]
6548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r3,[r14,#-3]
6558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r2,r10,r2,lsr#8
6568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r0,[r14,#-14]
6578ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r3,r11,r3,lsr#8
6588ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
6598ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
6608ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
6618ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r10,[r12,#-5]
6628ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r11,[r12,#-1]
6638ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r1,[r14,#-10]
6648ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r2,[r14,#-6]
6658ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r0,r8,r0,lsr#8
6668ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r3,[r14,#-2]
6678ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r1,r9,r1,lsr#8
6688ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r0,[r14,#-13]
6698ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r2,r10,r2,lsr#8
6708ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r1,[r14,#-9]
6718ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r3,r11,r3,lsr#8
6728ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r2,[r14,#-5]
6738ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r3,[r14,#-1]
6748ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r8,sp,#4*(4+8)
6758ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldmia	r8,{r8,r9,r10,r11}		@ load key material
6768ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r4,r4,r8		@ accumulate key material
6778ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
6788ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hi
6798ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
6808ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	addhi	r8,r8,#1			@ next counter value
6818ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strhi	r8,[sp,#4*(12)]		@ save next counter value
6828ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r5,r5,r9
6838ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r6,r6,r10
6848ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
6858ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itete	lo
6868ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
6878ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eorlo	r8,r8,r8		@ zero or ...
6888ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r8,[r12],#16			@ ... load input
6898ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eorlo	r9,r9,r9
6908ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r9,[r12,#-12]
6918ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
6928ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r7,r7,r11
6938ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
6948ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itete	lo
6958ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
6968ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eorlo	r10,r10,r10
6978ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r10,[r12,#-8]
6988ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eorlo	r11,r11,r11
6998ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r11,[r12,#-4]
7008ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
7018ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r4,r8,r4		@ xor with input (or zero)
7028ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r5,r9,r5
7038ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
7048ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
7058ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
7068ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r8,[r12,#-15]		@ load more input
7078ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r9,[r12,#-11]
7088ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r6,r10,r6
7098ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r4,[r14],#16		@ store output
7108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r7,r11,r7
7118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
7128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
7138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
7148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r10,[r12,#-7]
7158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r11,[r12,#-3]
7168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r5,[r14,#-12]
7178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r4,r8,r4,lsr#8
7188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r6,[r14,#-8]
7198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r5,r9,r5,lsr#8
7208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
7218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
7228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
7238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r8,[r12,#-14]		@ load more input
7248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r9,[r12,#-10]
7258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r7,[r14,#-4]
7268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r6,r10,r6,lsr#8
7278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r4,[r14,#-15]
7288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r7,r11,r7,lsr#8
7298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
7308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
7318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
7328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r10,[r12,#-6]
7338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r11,[r12,#-2]
7348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r5,[r14,#-11]
7358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r4,r8,r4,lsr#8
7368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r6,[r14,#-7]
7378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r5,r9,r5,lsr#8
7388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
7398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
7408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
7418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r8,[r12,#-13]		@ load more input
7428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r9,[r12,#-9]
7438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r7,[r14,#-3]
7448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r6,r10,r6,lsr#8
7458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r4,[r14,#-14]
7468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r7,r11,r7,lsr#8
7478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
7488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	itt	hs
7498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
7508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r10,[r12,#-5]
7518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhsb	r11,[r12,#-1]
7528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r5,[r14,#-10]
7538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r6,[r14,#-6]
7548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r4,r8,r4,lsr#8
7558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r7,[r14,#-2]
7568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r5,r9,r5,lsr#8
7578ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r4,[r14,#-13]
7588ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r6,r10,r6,lsr#8
7598ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r5,[r14,#-9]
7608ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r7,r11,r7,lsr#8
7618ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r6,[r14,#-5]
7628ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r7,[r14,#-1]
7638ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
7648ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	it	ne
7658ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
7668ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrne	r8,[sp,#4*(32+2)]		@ re-load len
7678ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
7688ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	it	hs
7698ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
7708ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	subhs	r11,r8,#64			@ len-=64
7718ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	bhi	Loop_outer
7728ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
7738ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	beq	Ldone
7748ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan#endif
7758ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
7768ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert SloanLtail:
7778ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r12,[sp,#4*(32+1)]	@ load inp
7788ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r9,sp,#4*(0)
7798ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r14,[sp,#4*(32+0)]	@ load out
7808ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
7818ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert SloanLoop_tail:
7828ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrb	r10,[r9],#1	@ read buffer on stack
7838ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrb	r11,[r12],#1		@ read input
7848ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	subs	r8,r8,#1
7858ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r11,r11,r10
7868ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r11,[r14],#1		@ store output
7878ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	bne	Loop_tail
7888ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
7898ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert SloanLdone:
7908ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	sp,sp,#4*(32+3)
7918ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert SloanLno_data:
7928ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
7938ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
7948ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan#if __ARM_MAX_ARCH__>=7
7958ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
7968ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
7978ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
7988ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan#ifdef __thumb2__
7998ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.thumb_func	ChaCha20_neon
8008ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan#endif
8018ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.align	5
8028ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert SloanChaCha20_neon:
8038ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r12,[sp,#0]		@ pull pointer to counter and nonce
8048ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
8058ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert SloanLChaCha20_neon:
8068ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	adr	r14,Lsigma
8078ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}		@ ABI spec says so
8088ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	stmdb	sp!,{r0,r1,r2,r3}
8098ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
8108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vld1.32	{q1,q2},[r3]		@ load key
8118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}		@ load key
8128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
8138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	sub	sp,sp,#4*(16+16)
8148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vld1.32	{q3},[r12]		@ load counter and nonce
8158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r12,sp,#4*8
8168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldmia	r14,{r0,r1,r2,r3}		@ load sigma
8178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vld1.32	{q0},[r14]!		@ load sigma
8188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vld1.32	{q12},[r14]		@ one
8198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vst1.32	{q2,q3},[r12]		@ copy 1/2key|counter|nonce
8208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vst1.32	{q0,q1},[sp]		@ copy sigma|1/2key
8218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
8228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r10,[sp,#4*(16+10)]	@ off-load "rx"
8238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r11,[sp,#4*(16+11)]	@ off-load "rx"
8248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vshl.i32	d26,d24,#1	@ two
8258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vstr	d24,[sp,#4*(16+0)]
8268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vshl.i32	d28,d24,#2	@ four
8278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vstr	d26,[sp,#4*(16+2)]
8288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vmov	q4,q0
8298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vstr	d28,[sp,#4*(16+4)]
8308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vmov	q8,q0
8318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vmov	q5,q1
8328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vmov	q9,q1
8338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	b	Loop_neon_enter
8348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
8358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.align	4
8368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert SloanLoop_neon_outer:
8378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldmia	sp,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}		@ load key material
8388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	cmp	r11,#64*2		@ if len<=64*2
8398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	bls	Lbreak_neon		@ switch to integer-only
8408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vmov	q4,q0
8418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r11,[sp,#4*(32+2)]	@ save len
8428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vmov	q8,q0
8438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r12,  [sp,#4*(32+1)]	@ save inp
8448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vmov	q5,q1
8458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r14,  [sp,#4*(32+0)]	@ save out
8468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vmov	q9,q1
8478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert SloanLoop_neon_enter:
8488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r11, [sp,#4*(15)]
8498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q7,q3,q12		@ counter+1
8508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r12,[sp,#4*(12)]	@ modulo-scheduled load
8518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vmov	q6,q2
8528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r10, [sp,#4*(13)]
8538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vmov	q10,q2
8548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r14,[sp,#4*(14)]
8558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q11,q7,q12		@ counter+2
8568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r11, [sp,#4*(16+15)]
8578ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r11,#10
8588ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r12,r12,#3	@ counter+3
8598ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	b	Loop_neon
8608ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
8618ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.align	4
8628ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert SloanLoop_neon:
8638ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	subs	r11,r11,#1
8648ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q0,q0,q1
8658ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r0,r0,r4
8668ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q4,q4,q5
8678ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r12,r12,ror#16
8688ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q8,q8,q9
8698ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r1,r1,r5
8708ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q3,q3,q0
8718ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r10,r10,ror#16
8728ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q7,q7,q4
8738ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r12,r12,r0,ror#16
8748ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q11,q11,q8
8758ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r10,r10,r1,ror#16
8768ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vrev32.16	q3,q3
8778ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r8,r8,r12
8788ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vrev32.16	q7,q7
8798ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r4,r4,ror#20
8808ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vrev32.16	q11,q11
8818ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r9,r9,r10
8828ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q2,q2,q3
8838ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r5,r5,ror#20
8848ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q6,q6,q7
8858ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r4,r4,r8,ror#20
8868ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q10,q10,q11
8878ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r5,r5,r9,ror#20
8888ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q12,q1,q2
8898ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r0,r0,r4
8908ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q13,q5,q6
8918ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r12,r12,ror#24
8928ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q14,q9,q10
8938ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r1,r1,r5
8948ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vshr.u32	q1,q12,#20
8958ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r10,r10,ror#24
8968ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vshr.u32	q5,q13,#20
8978ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r12,r12,r0,ror#24
8988ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vshr.u32	q9,q14,#20
8998ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r10,r10,r1,ror#24
9008ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vsli.32	q1,q12,#12
9018ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r8,r8,r12
9028ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vsli.32	q5,q13,#12
9038ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r4,r4,ror#25
9048ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vsli.32	q9,q14,#12
9058ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r9,r9,r10
9068ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q0,q0,q1
9078ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r5,r5,ror#25
9088ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q4,q4,q5
9098ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r10,[sp,#4*(16+13)]
9108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q8,q8,q9
9118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r10,[sp,#4*(16+15)]
9128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q12,q3,q0
9138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r4,r4,r8,ror#25
9148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q13,q7,q4
9158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r5,r5,r9,ror#25
9168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q14,q11,q8
9178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r8,[sp,#4*(16+8)]
9188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vshr.u32	q3,q12,#24
9198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r8,[sp,#4*(16+10)]
9208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vshr.u32	q7,q13,#24
9218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r2,r2,r6
9228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vshr.u32	q11,q14,#24
9238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r14,r14,ror#16
9248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vsli.32	q3,q12,#8
9258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r9,[sp,#4*(16+9)]
9268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vsli.32	q7,q13,#8
9278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r9,[sp,#4*(16+11)]
9288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vsli.32	q11,q14,#8
9298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r3,r3,r7
9308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q2,q2,q3
9318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r10,r10,ror#16
9328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q6,q6,q7
9338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r14,r14,r2,ror#16
9348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q10,q10,q11
9358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r10,r10,r3,ror#16
9368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q12,q1,q2
9378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r8,r8,r14
9388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q13,q5,q6
9398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r6,r6,ror#20
9408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q14,q9,q10
9418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r9,r9,r10
9428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vshr.u32	q1,q12,#25
9438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r7,r7,ror#20
9448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vshr.u32	q5,q13,#25
9458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r6,r6,r8,ror#20
9468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vshr.u32	q9,q14,#25
9478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r7,r7,r9,ror#20
9488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vsli.32	q1,q12,#7
9498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r2,r2,r6
9508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vsli.32	q5,q13,#7
9518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r14,r14,ror#24
9528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vsli.32	q9,q14,#7
9538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r3,r3,r7
9548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vext.8	q2,q2,q2,#8
9558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r10,r10,ror#24
9568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vext.8	q6,q6,q6,#8
9578ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r14,r14,r2,ror#24
9588ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vext.8	q10,q10,q10,#8
9598ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r10,r10,r3,ror#24
9608ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vext.8	q1,q1,q1,#4
9618ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r8,r8,r14
9628ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vext.8	q5,q5,q5,#4
9638ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r6,r6,ror#25
9648ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vext.8	q9,q9,q9,#4
9658ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r9,r9,r10
9668ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vext.8	q3,q3,q3,#12
9678ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r7,r7,ror#25
9688ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vext.8	q7,q7,q7,#12
9698ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r6,r6,r8,ror#25
9708ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vext.8	q11,q11,q11,#12
9718ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r7,r7,r9,ror#25
9728ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q0,q0,q1
9738ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r0,r0,r5
9748ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q4,q4,q5
9758ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r10,r10,ror#16
9768ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q8,q8,q9
9778ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r1,r1,r6
9788ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q3,q3,q0
9798ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r12,r12,ror#16
9808ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q7,q7,q4
9818ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r10,r10,r0,ror#16
9828ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q11,q11,q8
9838ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r12,r12,r1,ror#16
9848ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vrev32.16	q3,q3
9858ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r8,r8,r10
9868ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vrev32.16	q7,q7
9878ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r5,r5,ror#20
9888ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vrev32.16	q11,q11
9898ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r9,r9,r12
9908ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q2,q2,q3
9918ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r6,r6,ror#20
9928ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q6,q6,q7
9938ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r5,r5,r8,ror#20
9948ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q10,q10,q11
9958ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r6,r6,r9,ror#20
9968ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q12,q1,q2
9978ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r0,r0,r5
9988ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q13,q5,q6
9998ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r10,r10,ror#24
10008ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q14,q9,q10
10018ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r1,r1,r6
10028ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vshr.u32	q1,q12,#20
10038ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r12,r12,ror#24
10048ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vshr.u32	q5,q13,#20
10058ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r10,r10,r0,ror#24
10068ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vshr.u32	q9,q14,#20
10078ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r12,r12,r1,ror#24
10088ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vsli.32	q1,q12,#12
10098ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r8,r8,r10
10108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vsli.32	q5,q13,#12
10118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r5,r5,ror#25
10128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vsli.32	q9,q14,#12
10138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r10,[sp,#4*(16+15)]
10148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q0,q0,q1
10158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r10,[sp,#4*(16+13)]
10168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q4,q4,q5
10178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r9,r9,r12
10188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q8,q8,q9
10198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r6,r6,ror#25
10208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q12,q3,q0
10218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r5,r5,r8,ror#25
10228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q13,q7,q4
10238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r6,r6,r9,ror#25
10248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q14,q11,q8
10258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r8,[sp,#4*(16+10)]
10268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vshr.u32	q3,q12,#24
10278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r8,[sp,#4*(16+8)]
10288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vshr.u32	q7,q13,#24
10298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r2,r2,r7
10308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vshr.u32	q11,q14,#24
10318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r10,r10,ror#16
10328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vsli.32	q3,q12,#8
10338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r9,[sp,#4*(16+11)]
10348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vsli.32	q7,q13,#8
10358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r9,[sp,#4*(16+9)]
10368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vsli.32	q11,q14,#8
10378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r3,r3,r4
10388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q2,q2,q3
10398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r14,r14,ror#16
10408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q6,q6,q7
10418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r10,r10,r2,ror#16
10428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q10,q10,q11
10438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r14,r14,r3,ror#16
10448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q12,q1,q2
10458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r8,r8,r10
10468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q13,q5,q6
10478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r7,r7,ror#20
10488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q14,q9,q10
10498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r9,r9,r14
10508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vshr.u32	q1,q12,#25
10518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r4,r4,ror#20
10528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vshr.u32	q5,q13,#25
10538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r7,r7,r8,ror#20
10548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vshr.u32	q9,q14,#25
10558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r4,r4,r9,ror#20
10568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vsli.32	q1,q12,#7
10578ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r2,r2,r7
10588ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vsli.32	q5,q13,#7
10598ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r10,r10,ror#24
10608ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vsli.32	q9,q14,#7
10618ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r3,r3,r4
10628ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vext.8	q2,q2,q2,#8
10638ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r14,r14,ror#24
10648ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vext.8	q6,q6,q6,#8
10658ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r10,r10,r2,ror#24
10668ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vext.8	q10,q10,q10,#8
10678ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r14,r14,r3,ror#24
10688ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vext.8	q1,q1,q1,#12
10698ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r8,r8,r10
10708ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vext.8	q5,q5,q5,#12
10718ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r7,r7,ror#25
10728ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vext.8	q9,q9,q9,#12
10738ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r9,r9,r14
10748ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vext.8	q3,q3,q3,#4
10758ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r4,r4,ror#25
10768ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vext.8	q7,q7,q7,#4
10778ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r7,r7,r8,ror#25
10788ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vext.8	q11,q11,q11,#4
10798ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r4,r4,r9,ror#25
10808ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	bne	Loop_neon
10818ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
10828ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r11,sp,#32
10838ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vld1.32	{q12,q13},[sp]		@ load key material
10848ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vld1.32	{q14,q15},[r11]
10858ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
10868ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r11,[sp,#4*(32+2)]	@ load len
10878ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
10888ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r8, [sp,#4*(16+8)]	@ modulo-scheduled store
10898ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r9, [sp,#4*(16+9)]
10908ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r12,[sp,#4*(16+12)]
10918ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r10, [sp,#4*(16+13)]
10928ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r14,[sp,#4*(16+14)]
10938ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
10948ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	@ at this point we have first half of 512-bit result in
10958ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	@ rx and second half at sp+4*(16+8)
10968ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
10978ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r12,[sp,#4*(32+1)]	@ load inp
10988ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r14,[sp,#4*(32+0)]	@ load out
10998ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
11008ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q0,q0,q12		@ accumulate key material
11018ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q4,q4,q12
11028ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q8,q8,q12
11038ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vldr	d24,[sp,#4*(16+0)]	@ one
11048ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
11058ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q1,q1,q13
11068ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q5,q5,q13
11078ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q9,q9,q13
11088ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vldr	d26,[sp,#4*(16+2)]	@ two
11098ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
11108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q2,q2,q14
11118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q6,q6,q14
11128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q10,q10,q14
11138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	d14,d14,d24	@ counter+1
11148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	d22,d22,d26	@ counter+2
11158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
11168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q3,q3,q15
11178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q7,q7,q15
11188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	q11,q11,q15
11198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
11208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	cmp	r11,#64*4
11218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	blo	Ltail_neon
11228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
11238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vld1.8	{q12,q13},[r12]!	@ load input
11248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r11,sp
11258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vld1.8	{q14,q15},[r12]!
11268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q0,q0,q12		@ xor with input
11278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q1,q1,q13
11288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vld1.8	{q12,q13},[r12]!
11298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q2,q2,q14
11308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q3,q3,q15
11318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vld1.8	{q14,q15},[r12]!
11328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
11338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q4,q4,q12
11348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vst1.8	{q0,q1},[r14]!	@ store output
11358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q5,q5,q13
11368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vld1.8	{q12,q13},[r12]!
11378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q6,q6,q14
11388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vst1.8	{q2,q3},[r14]!
11398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q7,q7,q15
11408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vld1.8	{q14,q15},[r12]!
11418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
11428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q8,q8,q12
11438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vld1.32	{q0,q1},[r11]!	@ load for next iteration
11448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	d25,d25,d25
11458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vldr	d24,[sp,#4*(16+4)]	@ four
11468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q9,q9,q13
11478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vld1.32	{q2,q3},[r11]
11488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q10,q10,q14
11498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vst1.8	{q4,q5},[r14]!
11508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q11,q11,q15
11518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vst1.8	{q6,q7},[r14]!
11528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
11538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vadd.i32	d6,d6,d24	@ next counter value
11548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vldr	d24,[sp,#4*(16+0)]	@ one
11558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
11568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldmia	sp,{r8,r9,r10,r11}	@ load key material
11578ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r0,r0,r8	@ accumulate key material
11588ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r8,[r12],#16		@ load input
11598ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vst1.8	{q8,q9},[r14]!
11608ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r1,r1,r9
11618ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r9,[r12,#-12]
11628ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vst1.8	{q10,q11},[r14]!
11638ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r2,r2,r10
11648ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r10,[r12,#-8]
11658ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r3,r3,r11
11668ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r11,[r12,#-4]
11678ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__ARMEB__
11688ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r0,r0
11698ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r1,r1
11708ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r2,r2
11718ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r3,r3
11728ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
11738ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r0,r0,r8	@ xor with input
11748ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r8,sp,#4*(4)
11758ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r1,r1,r9
11768ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r0,[r14],#16		@ store output
11778ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r2,r2,r10
11788ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r1,[r14,#-12]
11798ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r3,r3,r11
11808ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldmia	r8,{r8,r9,r10,r11}	@ load key material
11818ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r2,[r14,#-8]
11828ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r3,[r14,#-4]
11838ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
11848ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r4,r4,r8	@ accumulate key material
11858ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r8,[r12],#16		@ load input
11868ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r5,r5,r9
11878ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r9,[r12,#-12]
11888ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r6,r6,r10
11898ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r10,[r12,#-8]
11908ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r7,r7,r11
11918ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r11,[r12,#-4]
11928ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__ARMEB__
11938ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r4,r4
11948ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r5,r5
11958ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r6,r6
11968ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r7,r7
11978ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
11988ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r4,r4,r8
11998ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r8,sp,#4*(8)
12008ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r5,r5,r9
12018ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r4,[r14],#16		@ store output
12028ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r6,r6,r10
12038ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r5,[r14,#-12]
12048ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r7,r7,r11
12058ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldmia	r8,{r8,r9,r10,r11}	@ load key material
12068ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r6,[r14,#-8]
12078ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r0,sp,#4*(16+8)
12088ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r7,[r14,#-4]
12098ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
12108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldmia	r0,{r0,r1,r2,r3,r4,r5,r6,r7}	@ load second half
12118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
12128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r0,r0,r8	@ accumulate key material
12138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r8,[r12],#16		@ load input
12148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r1,r1,r9
12158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r9,[r12,#-12]
12168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
12178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	it	hi
12188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
12198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strhi	r10,[sp,#4*(16+10)]	@ copy "rx" while at it
12208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r2,r2,r10
12218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r10,[r12,#-8]
12228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
12238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	it	hi
12248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
12258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strhi	r11,[sp,#4*(16+11)]	@ copy "rx" while at it
12268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r3,r3,r11
12278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r11,[r12,#-4]
12288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__ARMEB__
12298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r0,r0
12308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r1,r1
12318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r2,r2
12328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r3,r3
12338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
12348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r0,r0,r8
12358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r8,sp,#4*(12)
12368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r1,r1,r9
12378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r0,[r14],#16		@ store output
12388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r2,r2,r10
12398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r1,[r14,#-12]
12408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r3,r3,r11
12418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldmia	r8,{r8,r9,r10,r11}	@ load key material
12428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r2,[r14,#-8]
12438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r3,[r14,#-4]
12448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
12458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r4,r4,r8	@ accumulate key material
12468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r8,r8,#4		@ next counter value
12478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r5,r5,r9
12488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r8,[sp,#4*(12)]	@ save next counter value
12498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r8,[r12],#16		@ load input
12508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r6,r6,r10
12518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r4,r4,#3		@ counter+3
12528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r9,[r12,#-12]
12538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r7,r7,r11
12548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r10,[r12,#-8]
12558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r11,[r12,#-4]
12568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__ARMEB__
12578ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r4,r4
12588ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r5,r5
12598ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r6,r6
12608ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r7,r7
12618ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
12628ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r4,r4,r8
12638ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__thumb2__
12648ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	it	hi
12658ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
12668ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrhi	r8,[sp,#4*(32+2)]	@ re-load len
12678ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r5,r5,r9
12688ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r6,r6,r10
12698ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r4,[r14],#16		@ store output
12708ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r7,r7,r11
12718ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r5,[r14,#-12]
12728ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	sub	r11,r8,#64*4	@ len-=64*4
12738ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r6,[r14,#-8]
12748ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r7,[r14,#-4]
12758ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	bhi	Loop_neon_outer
12768ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
12778ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	b	Ldone_neon
12788ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
12798ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.align	4
12808ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert SloanLbreak_neon:
12818ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	@ harmonize NEON and integer-only stack frames: load data
12828ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	@ from NEON frame, but save to integer-only one; distance
12838ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	@ between the two is 4*(32+4+16-32)=4*(20).
12848ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
12858ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r11, [sp,#4*(20+32+2)]	@ save len
12868ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r11,sp,#4*(32+4)
12878ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r12,   [sp,#4*(20+32+1)]	@ save inp
12888ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r14,   [sp,#4*(20+32+0)]	@ save out
12898ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
12908ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r12,[sp,#4*(16+10)]
12918ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r14,[sp,#4*(16+11)]
12928ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vldmia	r11,{d8,d9,d10,d11,d12,d13,d14,d15}			@ fulfill ABI requirement
12938ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r12,[sp,#4*(20+16+10)]	@ copy "rx"
12948ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r14,[sp,#4*(20+16+11)]	@ copy "rx"
12958ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
12968ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r11, [sp,#4*(15)]
12978ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r12,[sp,#4*(12)]		@ modulo-scheduled load
12988ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r10, [sp,#4*(13)]
12998ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r14,[sp,#4*(14)]
13008ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	str	r11, [sp,#4*(20+16+15)]
13018ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r11,sp,#4*(20)
13028ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vst1.32	{q0,q1},[r11]!		@ copy key
13038ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	sp,sp,#4*(20)			@ switch frame
13048ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vst1.32	{q2,q3},[r11]
13058ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	mov	r11,#10
13068ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	b	Loop				@ go integer-only
13078ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
13088ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.align	4
13098ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert SloanLtail_neon:
13108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	cmp	r11,#64*3
13118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	bhs	L192_or_more_neon
13128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	cmp	r11,#64*2
13138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	bhs	L128_or_more_neon
13148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	cmp	r11,#64*1
13158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	bhs	L64_or_more_neon
13168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
13178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r8,sp,#4*(8)
13188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vst1.8	{q0,q1},[sp]
13198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r10,sp,#4*(0)
13208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vst1.8	{q2,q3},[r8]
13218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	b	Loop_tail_neon
13228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
13238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.align	4
13248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert SloanL64_or_more_neon:
13258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vld1.8	{q12,q13},[r12]!
13268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vld1.8	{q14,q15},[r12]!
13278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q0,q0,q12
13288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q1,q1,q13
13298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q2,q2,q14
13308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q3,q3,q15
13318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vst1.8	{q0,q1},[r14]!
13328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vst1.8	{q2,q3},[r14]!
13338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
13348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	beq	Ldone_neon
13358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
13368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r8,sp,#4*(8)
13378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vst1.8	{q4,q5},[sp]
13388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r10,sp,#4*(0)
13398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vst1.8	{q6,q7},[r8]
13408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	sub	r11,r11,#64*1	@ len-=64*1
13418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	b	Loop_tail_neon
13428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
13438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.align	4
13448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert SloanL128_or_more_neon:
13458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vld1.8	{q12,q13},[r12]!
13468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vld1.8	{q14,q15},[r12]!
13478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q0,q0,q12
13488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q1,q1,q13
13498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vld1.8	{q12,q13},[r12]!
13508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q2,q2,q14
13518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q3,q3,q15
13528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vld1.8	{q14,q15},[r12]!
13538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
13548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q4,q4,q12
13558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q5,q5,q13
13568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vst1.8	{q0,q1},[r14]!
13578ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q6,q6,q14
13588ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vst1.8	{q2,q3},[r14]!
13598ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q7,q7,q15
13608ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vst1.8	{q4,q5},[r14]!
13618ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vst1.8	{q6,q7},[r14]!
13628ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
13638ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	beq	Ldone_neon
13648ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
13658ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r8,sp,#4*(8)
13668ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vst1.8	{q8,q9},[sp]
13678ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r10,sp,#4*(0)
13688ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vst1.8	{q10,q11},[r8]
13698ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	sub	r11,r11,#64*2	@ len-=64*2
13708ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	b	Loop_tail_neon
13718ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
13728ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.align	4
13738ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert SloanL192_or_more_neon:
13748ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vld1.8	{q12,q13},[r12]!
13758ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vld1.8	{q14,q15},[r12]!
13768ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q0,q0,q12
13778ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q1,q1,q13
13788ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vld1.8	{q12,q13},[r12]!
13798ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q2,q2,q14
13808ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q3,q3,q15
13818ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vld1.8	{q14,q15},[r12]!
13828ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
13838ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q4,q4,q12
13848ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q5,q5,q13
13858ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vld1.8	{q12,q13},[r12]!
13868ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q6,q6,q14
13878ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vst1.8	{q0,q1},[r14]!
13888ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q7,q7,q15
13898ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vld1.8	{q14,q15},[r12]!
13908ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
13918ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q8,q8,q12
13928ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vst1.8	{q2,q3},[r14]!
13938ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q9,q9,q13
13948ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vst1.8	{q4,q5},[r14]!
13958ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q10,q10,q14
13968ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vst1.8	{q6,q7},[r14]!
13978ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	veor	q11,q11,q15
13988ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vst1.8	{q8,q9},[r14]!
13998ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vst1.8	{q10,q11},[r14]!
14008ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
14018ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	beq	Ldone_neon
14028ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
14038ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldmia	sp,{r8,r9,r10,r11}	@ load key material
14048ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r0,r0,r8	@ accumulate key material
14058ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r8,sp,#4*(4)
14068ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r1,r1,r9
14078ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r2,r2,r10
14088ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r3,r3,r11
14098ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldmia	r8,{r8,r9,r10,r11}	@ load key material
14108ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
14118ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r4,r4,r8	@ accumulate key material
14128ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r8,sp,#4*(8)
14138ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r5,r5,r9
14148ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r6,r6,r10
14158ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r7,r7,r11
14168ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldmia	r8,{r8,r9,r10,r11}	@ load key material
14178ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__ARMEB__
14188ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r0,r0
14198ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r1,r1
14208ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r2,r2
14218ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r3,r3
14228ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r4,r4
14238ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r5,r5
14248ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r6,r6
14258ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r7,r7
14268ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
14278ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	stmia	sp,{r0,r1,r2,r3,r4,r5,r6,r7}
14288ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r0,sp,#4*(16+8)
14298ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
14308ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldmia	r0,{r0,r1,r2,r3,r4,r5,r6,r7}	@ load second half
14318ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
14328ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r0,r0,r8	@ accumulate key material
14338ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r8,sp,#4*(12)
14348ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r1,r1,r9
14358ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r2,r2,r10
14368ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r3,r3,r11
14378ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldmia	r8,{r8,r9,r10,r11}	@ load key material
14388ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
14398ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r4,r4,r8	@ accumulate key material
14408ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r8,sp,#4*(8)
14418ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r5,r5,r9
14428ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r4,r4,#3		@ counter+3
14438ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r6,r6,r10
14448ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r7,r7,r11
14458ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldr	r11,[sp,#4*(32+2)]	@ re-load len
14468ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# ifdef	__ARMEB__
14478ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r0,r0
14488ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r1,r1
14498ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r2,r2
14508ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r3,r3
14518ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r4,r4
14528ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r5,r5
14538ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r6,r6
14548ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	rev	r7,r7
14558ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan# endif
14568ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	stmia	r8,{r0,r1,r2,r3,r4,r5,r6,r7}
14578ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	r10,sp,#4*(0)
14588ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	sub	r11,r11,#64*3	@ len-=64*3
14598ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
14608ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert SloanLoop_tail_neon:
14618ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrb	r8,[r10],#1	@ read buffer on stack
14628ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldrb	r9,[r12],#1		@ read input
14638ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	subs	r11,r11,#1
14648ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	eor	r8,r8,r9
14658ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	strb	r8,[r14],#1		@ store output
14668ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	bne	Loop_tail_neon
14678ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
14688ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert SloanLdone_neon:
14698ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	sp,sp,#4*(32+4)
14708ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	vldmia	sp,{d8,d9,d10,d11,d12,d13,d14,d15}
14718ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	add	sp,sp,#4*(16+3)
14728ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
14738ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan
14748ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.comm	_OPENSSL_armcap_P,4
14758ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.non_lazy_symbol_pointer
14768ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert SloanOPENSSL_armcap_P:
14778ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.indirect_symbol	_OPENSSL_armcap_P
14788ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan.long	0
14798ff035535f7cf2903f02bbe94d2fa10b7ab855f1Robert Sloan#endif
1480