1b8494591d1b1a143f3b192d845c238bbf3bc629dKenny Root#if defined(__arm__)
2b8494591d1b1a143f3b192d845c238bbf3bc629dKenny Root#include <openssl/arm_arch.h>
3d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
4d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.text
5d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.code	32
6d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
7e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#if __ARM_MAX_ARCH__>=7
8d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.align	5
9d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.LOPENSSL_armcap:
10e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley.word	OPENSSL_armcap_P-.Lbn_mul_mont
11d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#endif
12d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
13e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley.globl	bn_mul_mont
14d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.hidden	bn_mul_mont
15d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.type	bn_mul_mont,%function
16d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
17d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.align	5
18d9e397b599b13d642138480a28c14db7a136bf0Adam Langleybn_mul_mont:
19e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley.Lbn_mul_mont:
20d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	ip,[sp,#4]		@ load num
21d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	stmdb	sp!,{r0,r2}		@ sp points at argument block
22e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#if __ARM_MAX_ARCH__>=7
23d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	tst	ip,#7
24d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	bne	.Lialu
25d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	adr	r0,bn_mul_mont
26d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	r2,.LOPENSSL_armcap
27d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	r0,[r0,r2]
28e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#ifdef	__APPLE__
29e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	ldr	r0,[r0]
30e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#endif
31d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	tst	r0,#1			@ NEON available?
32d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldmia	sp, {r0,r2}
33d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	beq	.Lialu
34d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	sp,sp,#8
35d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	b	bn_mul8x_mont_neon
36d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.align	4
37d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.Lialu:
38d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#endif
39d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	cmp	ip,#2
40d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	mov	r0,ip			@ load num
41d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	movlt	r0,#0
42d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	addlt	sp,sp,#2*4
43d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	blt	.Labrt
44d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
45e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}		@ save 10 registers
46d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
47d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	mov	r0,r0,lsl#2		@ rescale r0 for byte count
48d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sub	sp,sp,r0		@ alloca(4*num)
49d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sub	sp,sp,#4		@ +extra dword
50d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sub	r0,r0,#4		@ "num=num-1"
51d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	r4,r2,r0		@ &bp[num-1]
52d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
53d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	r0,sp,r0		@ r0 to point at &tp[num-1]
54d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	r8,[r0,#14*4]		@ &n0
55d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	r2,[r2]		@ bp[0]
56d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	r5,[r1],#4		@ ap[0],ap++
57d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	r6,[r3],#4		@ np[0],np++
58d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	r8,[r8]		@ *n0
59d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	str	r4,[r0,#15*4]		@ save &bp[num]
60d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
61d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	umull	r10,r11,r5,r2	@ ap[0]*bp[0]
62d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	str	r8,[r0,#14*4]		@ save n0 value
63d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	mul	r8,r10,r8		@ "tp[0]"*n0
64d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	mov	r12,#0
65d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	umlal	r10,r12,r6,r8	@ np[0]*n0+"t[0]"
66d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	mov	r4,sp
67d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
68d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.L1st:
69d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	r5,[r1],#4		@ ap[j],ap++
70d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	mov	r10,r11
71d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	r6,[r3],#4		@ np[j],np++
72d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	mov	r11,#0
73d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	umlal	r10,r11,r5,r2	@ ap[j]*bp[0]
74d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	mov	r14,#0
75d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	umlal	r12,r14,r6,r8	@ np[j]*n0
76d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	adds	r12,r12,r10
77d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	str	r12,[r4],#4		@ tp[j-1]=,tp++
78d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	adc	r12,r14,#0
79d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	cmp	r4,r0
80d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	bne	.L1st
81d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
82d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	adds	r12,r12,r11
83d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	r4,[r0,#13*4]		@ restore bp
84d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	mov	r14,#0
85d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	r8,[r0,#14*4]		@ restore n0
86d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	adc	r14,r14,#0
87d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	str	r12,[r0]		@ tp[num-1]=
88d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	str	r14,[r0,#4]		@ tp[num]=
89e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley
90d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.Louter:
91d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sub	r7,r0,sp		@ "original" r0-1 value
92d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sub	r1,r1,r7		@ "rewind" ap to &ap[1]
93d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	r2,[r4,#4]!		@ *(++bp)
94d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sub	r3,r3,r7		@ "rewind" np to &np[1]
95d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	r5,[r1,#-4]		@ ap[0]
96d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	r10,[sp]		@ tp[0]
97d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	r6,[r3,#-4]		@ np[0]
98d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	r7,[sp,#4]		@ tp[1]
99d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
100d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	mov	r11,#0
101d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	umlal	r10,r11,r5,r2	@ ap[0]*bp[i]+tp[0]
102d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	str	r4,[r0,#13*4]		@ save bp
103d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	mul	r8,r10,r8
104d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	mov	r12,#0
105d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	umlal	r10,r12,r6,r8	@ np[0]*n0+"tp[0]"
106d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	mov	r4,sp
107d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
108d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.Linner:
109d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	r5,[r1],#4		@ ap[j],ap++
110d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	adds	r10,r11,r7		@ +=tp[j]
111d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	r6,[r3],#4		@ np[j],np++
112d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	mov	r11,#0
113d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	umlal	r10,r11,r5,r2	@ ap[j]*bp[i]
114d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	mov	r14,#0
115d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	umlal	r12,r14,r6,r8	@ np[j]*n0
116d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	adc	r11,r11,#0
117d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	r7,[r4,#8]		@ tp[j+1]
118d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	adds	r12,r12,r10
119d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	str	r12,[r4],#4		@ tp[j-1]=,tp++
120d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	adc	r12,r14,#0
121d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	cmp	r4,r0
122d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	bne	.Linner
123d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
124d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	adds	r12,r12,r11
125d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	mov	r14,#0
126d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	r4,[r0,#13*4]		@ restore bp
127d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	adc	r14,r14,#0
128d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	r8,[r0,#14*4]		@ restore n0
129d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	adds	r12,r12,r7
130d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	r7,[r0,#15*4]		@ restore &bp[num]
131d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	adc	r14,r14,#0
132d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	str	r12,[r0]		@ tp[num-1]=
133d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	str	r14,[r0,#4]		@ tp[num]=
134d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
135d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	cmp	r4,r7
136d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	bne	.Louter
137e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley
138d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	r2,[r0,#12*4]		@ pull rp
139d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	r0,r0,#4		@ r0 to point at &tp[num]
140d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sub	r5,r0,sp		@ "original" num value
141d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	mov	r4,sp			@ "rewind" r4
142d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	mov	r1,r4			@ "borrow" r1
143d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sub	r3,r3,r5		@ "rewind" r3 to &np[0]
144d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
145d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	subs	r7,r7,r7		@ "clear" carry flag
146d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.Lsub:	ldr	r7,[r4],#4
147d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	r6,[r3],#4
148d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sbcs	r7,r7,r6		@ tp[j]-np[j]
149d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	str	r7,[r2],#4		@ rp[j]=
150d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	teq	r4,r0		@ preserve carry
151d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	bne	.Lsub
152d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sbcs	r14,r14,#0		@ upmost carry
153d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	mov	r4,sp			@ "rewind" r4
154d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sub	r2,r2,r5		@ "rewind" r2
155d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
156d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	and	r1,r4,r14
157d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	bic	r3,r2,r14
158d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	orr	r1,r1,r3		@ ap=borrow?tp:rp
159d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
160d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.Lcopy:	ldr	r7,[r1],#4		@ copy or in-place refresh
161d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	str	sp,[r4],#4		@ zap tp
162d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	str	r7,[r2],#4
163d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	cmp	r4,r0
164d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	bne	.Lcopy
165d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
166d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	sp,r0,#4		@ skip over tp[num+1]
167e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}		@ restore registers
168d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	sp,sp,#2*4		@ skip over {r0,r2}
169d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	mov	r0,#1
170e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley.Labrt:
171e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#if __ARM_ARCH__>=5
172e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	bx	lr				@ .word	0xe12fff1e
173e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#else
174e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	tst	lr,#1
175d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	moveq	pc,lr			@ be binary compatible with V4, yet
176e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
177e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#endif
178d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.size	bn_mul_mont,.-bn_mul_mont
179e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#if __ARM_MAX_ARCH__>=7
180e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley.arch	armv7-a
181d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.fpu	neon
182d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
183d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.type	bn_mul8x_mont_neon,%function
184d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.align	5
185d9e397b599b13d642138480a28c14db7a136bf0Adam Langleybn_mul8x_mont_neon:
186d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	mov	ip,sp
187e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
188e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}		@ ABI specification says so
189e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	ldmia	ip,{r4,r5}		@ load rest of parameter block
190e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley
191e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	sub	r7,sp,#16
192e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vld1.32	{d28[0]}, [r2,:32]!
193e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	sub	r7,r7,r5,lsl#4
194e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vld1.32	{d0,d1,d2,d3},  [r1]!		@ can't specify :32 :-(
195e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	and	r7,r7,#-64
196e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vld1.32	{d30[0]}, [r4,:32]
197e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	mov	sp,r7			@ alloca
198e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	veor	d8,d8,d8
199e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	subs	r8,r5,#8
200e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vzip.16	d28,d8
201d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
202d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmull.u32	q6,d28,d0[0]
203d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmull.u32	q7,d28,d0[1]
204d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmull.u32	q8,d28,d1[0]
205d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vshl.i64	d10,d13,#16
206d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmull.u32	q9,d28,d1[1]
207d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
208d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.u64	d10,d10,d12
209e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	veor	d8,d8,d8
210d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmul.u32	d29,d10,d30
211d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
212d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmull.u32	q10,d28,d2[0]
213e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vld1.32	{d4,d5,d6,d7}, [r3]!
214d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmull.u32	q11,d28,d2[1]
215d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmull.u32	q12,d28,d3[0]
216e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vzip.16	d29,d8
217d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmull.u32	q13,d28,d3[1]
218d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
219d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	bne	.LNEON_1st
220d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
221d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	@ special case for num=8, everything is in register bank...
222d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
223d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q6,d29,d4[0]
224e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	sub	r9,r5,#1
225d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q7,d29,d4[1]
226d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q8,d29,d5[0]
227d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q9,d29,d5[1]
228d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
229d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q10,d29,d6[0]
230e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vmov	q5,q6
231d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q11,d29,d6[1]
232e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vmov	q6,q7
233d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q12,d29,d7[0]
234e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vmov	q7,q8
235d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q13,d29,d7[1]
236e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vmov	q8,q9
237e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vmov	q9,q10
238d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vshr.u64	d10,d10,#16
239e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vmov	q10,q11
240e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vmov	q11,q12
241d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.u64	d10,d10,d11
242e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vmov	q12,q13
243e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	veor	q13,q13
244d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vshr.u64	d10,d10,#16
245d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
246d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	b	.LNEON_outer8
247d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
248d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.align	4
249d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.LNEON_outer8:
250e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vld1.32	{d28[0]}, [r2,:32]!
251e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	veor	d8,d8,d8
252e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vzip.16	d28,d8
253d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.u64	d12,d12,d10
254d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
255d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q6,d28,d0[0]
256d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q7,d28,d0[1]
257d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q8,d28,d1[0]
258d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vshl.i64	d10,d13,#16
259d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q9,d28,d1[1]
260d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
261d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.u64	d10,d10,d12
262e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	veor	d8,d8,d8
263e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	subs	r9,r9,#1
264d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmul.u32	d29,d10,d30
265d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
266d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q10,d28,d2[0]
267d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q11,d28,d2[1]
268d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q12,d28,d3[0]
269e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vzip.16	d29,d8
270d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q13,d28,d3[1]
271d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
272d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q6,d29,d4[0]
273d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q7,d29,d4[1]
274d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q8,d29,d5[0]
275d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q9,d29,d5[1]
276d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
277d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q10,d29,d6[0]
278e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vmov	q5,q6
279d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q11,d29,d6[1]
280e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vmov	q6,q7
281d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q12,d29,d7[0]
282e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vmov	q7,q8
283d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q13,d29,d7[1]
284e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vmov	q8,q9
285e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vmov	q9,q10
286d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vshr.u64	d10,d10,#16
287e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vmov	q10,q11
288e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vmov	q11,q12
289d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.u64	d10,d10,d11
290e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vmov	q12,q13
291e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	veor	q13,q13
292d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vshr.u64	d10,d10,#16
293d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
294d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	bne	.LNEON_outer8
295d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
296d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.u64	d12,d12,d10
297e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	mov	r7,sp
298d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vshr.u64	d10,d12,#16
299e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	mov	r8,r5
300d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.u64	d13,d13,d10
301e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	add	r6,sp,#16
302d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vshr.u64	d10,d13,#16
303e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vzip.16	d12,d13
304d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
305d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	b	.LNEON_tail2
306d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
307d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.align	4
308d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.LNEON_1st:
309d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q6,d29,d4[0]
310e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vld1.32	{d0,d1,d2,d3}, [r1]!
311d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q7,d29,d4[1]
312e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	subs	r8,r8,#8
313d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q8,d29,d5[0]
314d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q9,d29,d5[1]
315d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
316d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q10,d29,d6[0]
317e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vld1.32	{d4,d5}, [r3]!
318d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q11,d29,d6[1]
319e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vst1.64	{q6,q7}, [r7,:256]!
320d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q12,d29,d7[0]
321d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q13,d29,d7[1]
322e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vst1.64	{q8,q9}, [r7,:256]!
323d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
324d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmull.u32	q6,d28,d0[0]
325e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vld1.32	{d6,d7}, [r3]!
326d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmull.u32	q7,d28,d0[1]
327e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vst1.64	{q10,q11}, [r7,:256]!
328d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmull.u32	q8,d28,d1[0]
329d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmull.u32	q9,d28,d1[1]
330e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vst1.64	{q12,q13}, [r7,:256]!
331d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
332d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmull.u32	q10,d28,d2[0]
333d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmull.u32	q11,d28,d2[1]
334d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmull.u32	q12,d28,d3[0]
335d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmull.u32	q13,d28,d3[1]
336d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
337d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	bne	.LNEON_1st
338d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
339d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q6,d29,d4[0]
340e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	add	r6,sp,#16
341d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q7,d29,d4[1]
342e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	sub	r1,r1,r5,lsl#2		@ rewind r1
343d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q8,d29,d5[0]
344e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vld1.64	{q5}, [sp,:128]
345d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q9,d29,d5[1]
346e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	sub	r9,r5,#1
347d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
348d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q10,d29,d6[0]
349e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vst1.64	{q6,q7}, [r7,:256]!
350d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q11,d29,d6[1]
351d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vshr.u64	d10,d10,#16
352e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vld1.64	{q6},       [r6, :128]!
353d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q12,d29,d7[0]
354e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vst1.64	{q8,q9}, [r7,:256]!
355d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q13,d29,d7[1]
356d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
357e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vst1.64	{q10,q11}, [r7,:256]!
358d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.u64	d10,d10,d11
359e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	veor	q4,q4,q4
360e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vst1.64	{q12,q13}, [r7,:256]!
361e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vld1.64	{q7,q8}, [r6, :256]!
362e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vst1.64	{q4},          [r7,:128]
363d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vshr.u64	d10,d10,#16
364d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
365e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	b	.LNEON_outer
366d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
367d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.align	4
368d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.LNEON_outer:
369e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vld1.32	{d28[0]}, [r2,:32]!
370e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	sub	r3,r3,r5,lsl#2		@ rewind r3
371e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vld1.32	{d0,d1,d2,d3},  [r1]!
372e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	veor	d8,d8,d8
373e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	mov	r7,sp
374e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vzip.16	d28,d8
375e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	sub	r8,r5,#8
376d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.u64	d12,d12,d10
377d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
378d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q6,d28,d0[0]
379e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vld1.64	{q9,q10},[r6,:256]!
380d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q7,d28,d0[1]
381d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q8,d28,d1[0]
382e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vld1.64	{q11,q12},[r6,:256]!
383d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q9,d28,d1[1]
384d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
385d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vshl.i64	d10,d13,#16
386e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	veor	d8,d8,d8
387d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.u64	d10,d10,d12
388e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vld1.64	{q13},[r6,:128]!
389d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmul.u32	d29,d10,d30
390d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
391d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q10,d28,d2[0]
392e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vld1.32	{d4,d5,d6,d7}, [r3]!
393d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q11,d28,d2[1]
394d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q12,d28,d3[0]
395e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vzip.16	d29,d8
396d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q13,d28,d3[1]
397d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
398d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.LNEON_inner:
399d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q6,d29,d4[0]
400e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vld1.32	{d0,d1,d2,d3}, [r1]!
401d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q7,d29,d4[1]
402e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	subs	r8,r8,#8
403d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q8,d29,d5[0]
404d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q9,d29,d5[1]
405e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vst1.64	{q6,q7}, [r7,:256]!
406d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
407d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q10,d29,d6[0]
408e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vld1.64	{q6},       [r6, :128]!
409d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q11,d29,d6[1]
410e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vst1.64	{q8,q9}, [r7,:256]!
411d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q12,d29,d7[0]
412e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vld1.64	{q7,q8}, [r6, :256]!
413d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q13,d29,d7[1]
414e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vst1.64	{q10,q11}, [r7,:256]!
415d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
416d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q6,d28,d0[0]
417e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vld1.64	{q9,q10}, [r6, :256]!
418d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q7,d28,d0[1]
419e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vst1.64	{q12,q13}, [r7,:256]!
420d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q8,d28,d1[0]
421e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vld1.64	{q11,q12}, [r6, :256]!
422d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q9,d28,d1[1]
423e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vld1.32	{d4,d5,d6,d7}, [r3]!
424d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
425d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q10,d28,d2[0]
426e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vld1.64	{q13},       [r6, :128]!
427d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q11,d28,d2[1]
428d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q12,d28,d3[0]
429d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q13,d28,d3[1]
430d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
431d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	bne	.LNEON_inner
432d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
433d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q6,d29,d4[0]
434e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	add	r6,sp,#16
435d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q7,d29,d4[1]
436e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	sub	r1,r1,r5,lsl#2		@ rewind r1
437d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q8,d29,d5[0]
438e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vld1.64	{q5}, [sp,:128]
439d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q9,d29,d5[1]
440e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	subs	r9,r9,#1
441d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
442d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q10,d29,d6[0]
443e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vst1.64	{q6,q7}, [r7,:256]!
444d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q11,d29,d6[1]
445e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vld1.64	{q6},       [r6, :128]!
446d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vshr.u64	d10,d10,#16
447e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vst1.64	{q8,q9}, [r7,:256]!
448d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q12,d29,d7[0]
449e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vld1.64	{q7,q8}, [r6, :256]!
450d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vmlal.u32	q13,d29,d7[1]
451d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
452e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vst1.64	{q10,q11}, [r7,:256]!
453d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.u64	d10,d10,d11
454e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vst1.64	{q12,q13}, [r7,:256]!
455d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vshr.u64	d10,d10,#16
456d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
457d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	bne	.LNEON_outer
458d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
459e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	mov	r7,sp
460e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	mov	r8,r5
461d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
462d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.LNEON_tail:
463d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.u64	d12,d12,d10
464e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vld1.64	{q9,q10}, [r6, :256]!
465d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vshr.u64	d10,d12,#16
466d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.u64	d13,d13,d10
467e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vld1.64	{q11,q12}, [r6, :256]!
468d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vshr.u64	d10,d13,#16
469e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vld1.64	{q13},       [r6, :128]!
470e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vzip.16	d12,d13
471d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
472d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.LNEON_tail2:
473d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.u64	d14,d14,d10
474e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vst1.32	{d12[0]}, [r7, :32]!
475d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vshr.u64	d10,d14,#16
476d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.u64	d15,d15,d10
477d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vshr.u64	d10,d15,#16
478e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vzip.16	d14,d15
479d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
480d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.u64	d16,d16,d10
481e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vst1.32	{d14[0]}, [r7, :32]!
482d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vshr.u64	d10,d16,#16
483d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.u64	d17,d17,d10
484d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vshr.u64	d10,d17,#16
485e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vzip.16	d16,d17
486d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
487d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.u64	d18,d18,d10
488e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vst1.32	{d16[0]}, [r7, :32]!
489d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vshr.u64	d10,d18,#16
490d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.u64	d19,d19,d10
491d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vshr.u64	d10,d19,#16
492e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vzip.16	d18,d19
493d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
494d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.u64	d20,d20,d10
495e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vst1.32	{d18[0]}, [r7, :32]!
496d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vshr.u64	d10,d20,#16
497d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.u64	d21,d21,d10
498d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vshr.u64	d10,d21,#16
499e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vzip.16	d20,d21
500d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
501d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.u64	d22,d22,d10
502e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vst1.32	{d20[0]}, [r7, :32]!
503d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vshr.u64	d10,d22,#16
504d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.u64	d23,d23,d10
505d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vshr.u64	d10,d23,#16
506e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vzip.16	d22,d23
507d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
508d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.u64	d24,d24,d10
509e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vst1.32	{d22[0]}, [r7, :32]!
510d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vshr.u64	d10,d24,#16
511d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.u64	d25,d25,d10
512e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vld1.64	{q6}, [r6, :128]!
513d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vshr.u64	d10,d25,#16
514e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vzip.16	d24,d25
515d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
516d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.u64	d26,d26,d10
517e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vst1.32	{d24[0]}, [r7, :32]!
518d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vshr.u64	d10,d26,#16
519d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vadd.u64	d27,d27,d10
520e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vld1.64	{q7,q8},	[r6, :256]!
521d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vshr.u64	d10,d27,#16
522e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vzip.16	d26,d27
523e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	subs	r8,r8,#8
524e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vst1.32	{d26[0]}, [r7, :32]!
525d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
526d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	bne	.LNEON_tail
527d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
528d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	vst1.32	{d10[0]}, [r7, :32]		@ top-most bit
529d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sub	r3,r3,r5,lsl#2			@ rewind r3
530d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	subs	r1,sp,#0				@ clear carry flag
531d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	add	r2,sp,r5,lsl#2
532d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
533d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.LNEON_sub:
534e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	ldmia	r1!, {r4,r5,r6,r7}
535e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	ldmia	r3!, {r8,r9,r10,r11}
536d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sbcs	r8, r4,r8
537d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sbcs	r9, r5,r9
538d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sbcs	r10,r6,r10
539d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sbcs	r11,r7,r11
540d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	teq	r1,r2				@ preserves carry
541e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	stmia	r0!, {r8,r9,r10,r11}
542d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	bne	.LNEON_sub
543d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
544d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	ldr	r10, [r1]				@ load top-most bit
545d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	veor	q0,q0,q0
546d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sub	r11,r2,sp				@ this is num*4
547d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	veor	q1,q1,q1
548d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	mov	r1,sp
549d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sub	r0,r0,r11				@ rewind r0
550d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	mov	r3,r2				@ second 3/4th of frame
551d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sbcs	r10,r10,#0				@ result is carry flag
552d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
553d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.LNEON_copy_n_zap:
554e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	ldmia	r1!, {r4,r5,r6,r7}
555e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	ldmia	r0,  {r8,r9,r10,r11}
556d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	movcc	r8, r4
557e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vst1.64	{q0,q1}, [r3,:256]!			@ wipe
558d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	movcc	r9, r5
559d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	movcc	r10,r6
560e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vst1.64	{q0,q1}, [r3,:256]!			@ wipe
561d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	movcc	r11,r7
562e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	ldmia	r1, {r4,r5,r6,r7}
563e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	stmia	r0!, {r8,r9,r10,r11}
564d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sub	r1,r1,#16
565e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	ldmia	r0, {r8,r9,r10,r11}
566d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	movcc	r8, r4
567e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vst1.64	{q0,q1}, [r1,:256]!			@ wipe
568d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	movcc	r9, r5
569d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	movcc	r10,r6
570e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vst1.64	{q0,q1}, [r3,:256]!			@ wipe
571d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	movcc	r11,r7
572d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	teq	r1,r2				@ preserves carry
573e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	stmia	r0!, {r8,r9,r10,r11}
574d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	bne	.LNEON_copy_n_zap
575d9e397b599b13d642138480a28c14db7a136bf0Adam Langley
576d9e397b599b13d642138480a28c14db7a136bf0Adam Langley	sub	sp,ip,#96
577e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
578e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
579e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley	bx	lr						@ .word	0xe12fff1e
580d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.size	bn_mul8x_mont_neon,.-bn_mul8x_mont_neon
581d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#endif
582e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley.byte	77,111,110,116,103,111,109,101,114,121,32,109,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
583e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley.align	2
584d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.align	2
585e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley#if __ARM_MAX_ARCH__>=7
586d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.comm	OPENSSL_armcap_P,4,4
587e9ada863a7b3e81f5d2b1e3bdd2305da902a87f5Adam Langley.hidden	OPENSSL_armcap_P
588d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#endif
589b8494591d1b1a143f3b192d845c238bbf3bc629dKenny Root#endif