1#if defined(__arm__)
2@ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
3@
4@ Licensed under the OpenSSL license (the "License").  You may not use
5@ this file except in compliance with the License.  You can obtain a copy
6@ in the file LICENSE in the source distribution or at
7@ https://www.openssl.org/source/license.html
8
9
10@ ====================================================================
11@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12@ project. The module is, however, dual licensed under OpenSSL and
13@ CRYPTOGAMS licenses depending on where you obtain it. For further
14@ details see http://www.openssl.org/~appro/cryptogams/.
15@
16@ Permission to use under GPL terms is granted.
17@ ====================================================================
18
19@ SHA256 block procedure for ARMv4. May 2007.
20
21@ Performance is ~2x better than gcc 3.4 generated code and in "abso-
22@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
23@ byte [on single-issue Xscale PXA250 core].
24
25@ July 2010.
26@
27@ Rescheduling for dual-issue pipeline resulted in 22% improvement on
28@ Cortex A8 core and ~20 cycles per processed byte.
29
30@ February 2011.
31@
32@ Profiler-assisted and platform-specific optimization resulted in 16%
33@ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
34
35@ September 2013.
36@
37@ Add NEON implementation. On Cortex A8 it was measured to process one
38@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
39@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
40@ code (meaning that latter performs sub-optimally, nothing was done
41@ about it).
42
43@ May 2014.
44@
45@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
46
47#ifndef __KERNEL__
48# include <openssl/arm_arch.h>
49#else
50# define __ARM_ARCH__ __LINUX_ARM_ARCH__
51# define __ARM_MAX_ARCH__ 7
52#endif
53
54@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
55@ ARMv7 and ARMv8 processors. It does have ARMv8-only code, but those
56@ instructions are manually-encoded. (See unsha256.)
57.arch	armv7-a
58
59.text
60#if defined(__thumb2__)
61.syntax	unified
62.thumb
63#else
64.code	32
65#endif
66
67.type	K256,%object
68.align	5
69K256:
70.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
71.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
72.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
73.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
74.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
75.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
76.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
77.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
78.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
79.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
80.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
81.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
82.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
83.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
84.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
85.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
86.size	K256,.-K256
87.word	0				@ terminator
88#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
89.LOPENSSL_armcap:
90.word	OPENSSL_armcap_P-.Lsha256_block_data_order
91#endif
92.align	5
93
94.globl	sha256_block_data_order
95.hidden	sha256_block_data_order
96.type	sha256_block_data_order,%function
97sha256_block_data_order:
98.Lsha256_block_data_order:
99#if __ARM_ARCH__<7 && !defined(__thumb2__)
100	sub	r3,pc,#8		@ sha256_block_data_order
101#else
102	adr	r3,.Lsha256_block_data_order
103#endif
104#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
105	ldr	r12,.LOPENSSL_armcap
106	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
107#ifdef	__APPLE__
108	ldr	r12,[r12]
109#endif
110	tst	r12,#ARMV8_SHA256
111	bne	.LARMv8
112	tst	r12,#ARMV7_NEON
113	bne	.LNEON
114#endif
115	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
116	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
117	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
118	sub	r14,r3,#256+32	@ K256
119	sub	sp,sp,#16*4		@ alloca(X[16])
120.Loop:
121# if __ARM_ARCH__>=7
122	ldr	r2,[r1],#4
123# else
124	ldrb	r2,[r1,#3]
125# endif
126	eor	r3,r5,r6		@ magic
127	eor	r12,r12,r12
128#if __ARM_ARCH__>=7
129	@ ldr	r2,[r1],#4			@ 0
130# if 0==15
131	str	r1,[sp,#17*4]			@ make room for r1
132# endif
133	eor	r0,r8,r8,ror#5
134	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
135	eor	r0,r0,r8,ror#19	@ Sigma1(e)
136# ifndef __ARMEB__
137	rev	r2,r2
138# endif
139#else
140	@ ldrb	r2,[r1,#3]			@ 0
141	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
142	ldrb	r12,[r1,#2]
143	ldrb	r0,[r1,#1]
144	orr	r2,r2,r12,lsl#8
145	ldrb	r12,[r1],#4
146	orr	r2,r2,r0,lsl#16
147# if 0==15
148	str	r1,[sp,#17*4]			@ make room for r1
149# endif
150	eor	r0,r8,r8,ror#5
151	orr	r2,r2,r12,lsl#24
152	eor	r0,r0,r8,ror#19	@ Sigma1(e)
153#endif
154	ldr	r12,[r14],#4			@ *K256++
155	add	r11,r11,r2			@ h+=X[i]
156	str	r2,[sp,#0*4]
157	eor	r2,r9,r10
158	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
159	and	r2,r2,r8
160	add	r11,r11,r12			@ h+=K256[i]
161	eor	r2,r2,r10			@ Ch(e,f,g)
162	eor	r0,r4,r4,ror#11
163	add	r11,r11,r2			@ h+=Ch(e,f,g)
164#if 0==31
165	and	r12,r12,#0xff
166	cmp	r12,#0xf2			@ done?
167#endif
168#if 0<15
169# if __ARM_ARCH__>=7
170	ldr	r2,[r1],#4			@ prefetch
171# else
172	ldrb	r2,[r1,#3]
173# endif
174	eor	r12,r4,r5			@ a^b, b^c in next round
175#else
176	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
177	eor	r12,r4,r5			@ a^b, b^c in next round
178	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
179#endif
180	eor	r0,r0,r4,ror#20	@ Sigma0(a)
181	and	r3,r3,r12			@ (b^c)&=(a^b)
182	add	r7,r7,r11			@ d+=h
183	eor	r3,r3,r5			@ Maj(a,b,c)
184	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
185	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
186#if __ARM_ARCH__>=7
187	@ ldr	r2,[r1],#4			@ 1
188# if 1==15
189	str	r1,[sp,#17*4]			@ make room for r1
190# endif
191	eor	r0,r7,r7,ror#5
192	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
193	eor	r0,r0,r7,ror#19	@ Sigma1(e)
194# ifndef __ARMEB__
195	rev	r2,r2
196# endif
197#else
198	@ ldrb	r2,[r1,#3]			@ 1
199	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
200	ldrb	r3,[r1,#2]
201	ldrb	r0,[r1,#1]
202	orr	r2,r2,r3,lsl#8
203	ldrb	r3,[r1],#4
204	orr	r2,r2,r0,lsl#16
205# if 1==15
206	str	r1,[sp,#17*4]			@ make room for r1
207# endif
208	eor	r0,r7,r7,ror#5
209	orr	r2,r2,r3,lsl#24
210	eor	r0,r0,r7,ror#19	@ Sigma1(e)
211#endif
212	ldr	r3,[r14],#4			@ *K256++
213	add	r10,r10,r2			@ h+=X[i]
214	str	r2,[sp,#1*4]
215	eor	r2,r8,r9
216	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
217	and	r2,r2,r7
218	add	r10,r10,r3			@ h+=K256[i]
219	eor	r2,r2,r9			@ Ch(e,f,g)
220	eor	r0,r11,r11,ror#11
221	add	r10,r10,r2			@ h+=Ch(e,f,g)
222#if 1==31
223	and	r3,r3,#0xff
224	cmp	r3,#0xf2			@ done?
225#endif
226#if 1<15
227# if __ARM_ARCH__>=7
228	ldr	r2,[r1],#4			@ prefetch
229# else
230	ldrb	r2,[r1,#3]
231# endif
232	eor	r3,r11,r4			@ a^b, b^c in next round
233#else
234	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
235	eor	r3,r11,r4			@ a^b, b^c in next round
236	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
237#endif
238	eor	r0,r0,r11,ror#20	@ Sigma0(a)
239	and	r12,r12,r3			@ (b^c)&=(a^b)
240	add	r6,r6,r10			@ d+=h
241	eor	r12,r12,r4			@ Maj(a,b,c)
242	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
243	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
244#if __ARM_ARCH__>=7
245	@ ldr	r2,[r1],#4			@ 2
246# if 2==15
247	str	r1,[sp,#17*4]			@ make room for r1
248# endif
249	eor	r0,r6,r6,ror#5
250	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
251	eor	r0,r0,r6,ror#19	@ Sigma1(e)
252# ifndef __ARMEB__
253	rev	r2,r2
254# endif
255#else
256	@ ldrb	r2,[r1,#3]			@ 2
257	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
258	ldrb	r12,[r1,#2]
259	ldrb	r0,[r1,#1]
260	orr	r2,r2,r12,lsl#8
261	ldrb	r12,[r1],#4
262	orr	r2,r2,r0,lsl#16
263# if 2==15
264	str	r1,[sp,#17*4]			@ make room for r1
265# endif
266	eor	r0,r6,r6,ror#5
267	orr	r2,r2,r12,lsl#24
268	eor	r0,r0,r6,ror#19	@ Sigma1(e)
269#endif
270	ldr	r12,[r14],#4			@ *K256++
271	add	r9,r9,r2			@ h+=X[i]
272	str	r2,[sp,#2*4]
273	eor	r2,r7,r8
274	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
275	and	r2,r2,r6
276	add	r9,r9,r12			@ h+=K256[i]
277	eor	r2,r2,r8			@ Ch(e,f,g)
278	eor	r0,r10,r10,ror#11
279	add	r9,r9,r2			@ h+=Ch(e,f,g)
280#if 2==31
281	and	r12,r12,#0xff
282	cmp	r12,#0xf2			@ done?
283#endif
284#if 2<15
285# if __ARM_ARCH__>=7
286	ldr	r2,[r1],#4			@ prefetch
287# else
288	ldrb	r2,[r1,#3]
289# endif
290	eor	r12,r10,r11			@ a^b, b^c in next round
291#else
292	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
293	eor	r12,r10,r11			@ a^b, b^c in next round
294	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
295#endif
296	eor	r0,r0,r10,ror#20	@ Sigma0(a)
297	and	r3,r3,r12			@ (b^c)&=(a^b)
298	add	r5,r5,r9			@ d+=h
299	eor	r3,r3,r11			@ Maj(a,b,c)
300	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
301	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
302#if __ARM_ARCH__>=7
303	@ ldr	r2,[r1],#4			@ 3
304# if 3==15
305	str	r1,[sp,#17*4]			@ make room for r1
306# endif
307	eor	r0,r5,r5,ror#5
308	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
309	eor	r0,r0,r5,ror#19	@ Sigma1(e)
310# ifndef __ARMEB__
311	rev	r2,r2
312# endif
313#else
314	@ ldrb	r2,[r1,#3]			@ 3
315	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
316	ldrb	r3,[r1,#2]
317	ldrb	r0,[r1,#1]
318	orr	r2,r2,r3,lsl#8
319	ldrb	r3,[r1],#4
320	orr	r2,r2,r0,lsl#16
321# if 3==15
322	str	r1,[sp,#17*4]			@ make room for r1
323# endif
324	eor	r0,r5,r5,ror#5
325	orr	r2,r2,r3,lsl#24
326	eor	r0,r0,r5,ror#19	@ Sigma1(e)
327#endif
328	ldr	r3,[r14],#4			@ *K256++
329	add	r8,r8,r2			@ h+=X[i]
330	str	r2,[sp,#3*4]
331	eor	r2,r6,r7
332	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
333	and	r2,r2,r5
334	add	r8,r8,r3			@ h+=K256[i]
335	eor	r2,r2,r7			@ Ch(e,f,g)
336	eor	r0,r9,r9,ror#11
337	add	r8,r8,r2			@ h+=Ch(e,f,g)
338#if 3==31
339	and	r3,r3,#0xff
340	cmp	r3,#0xf2			@ done?
341#endif
342#if 3<15
343# if __ARM_ARCH__>=7
344	ldr	r2,[r1],#4			@ prefetch
345# else
346	ldrb	r2,[r1,#3]
347# endif
348	eor	r3,r9,r10			@ a^b, b^c in next round
349#else
350	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
351	eor	r3,r9,r10			@ a^b, b^c in next round
352	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
353#endif
354	eor	r0,r0,r9,ror#20	@ Sigma0(a)
355	and	r12,r12,r3			@ (b^c)&=(a^b)
356	add	r4,r4,r8			@ d+=h
357	eor	r12,r12,r10			@ Maj(a,b,c)
358	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
359	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
360#if __ARM_ARCH__>=7
361	@ ldr	r2,[r1],#4			@ 4
362# if 4==15
363	str	r1,[sp,#17*4]			@ make room for r1
364# endif
365	eor	r0,r4,r4,ror#5
366	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
367	eor	r0,r0,r4,ror#19	@ Sigma1(e)
368# ifndef __ARMEB__
369	rev	r2,r2
370# endif
371#else
372	@ ldrb	r2,[r1,#3]			@ 4
373	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
374	ldrb	r12,[r1,#2]
375	ldrb	r0,[r1,#1]
376	orr	r2,r2,r12,lsl#8
377	ldrb	r12,[r1],#4
378	orr	r2,r2,r0,lsl#16
379# if 4==15
380	str	r1,[sp,#17*4]			@ make room for r1
381# endif
382	eor	r0,r4,r4,ror#5
383	orr	r2,r2,r12,lsl#24
384	eor	r0,r0,r4,ror#19	@ Sigma1(e)
385#endif
386	ldr	r12,[r14],#4			@ *K256++
387	add	r7,r7,r2			@ h+=X[i]
388	str	r2,[sp,#4*4]
389	eor	r2,r5,r6
390	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
391	and	r2,r2,r4
392	add	r7,r7,r12			@ h+=K256[i]
393	eor	r2,r2,r6			@ Ch(e,f,g)
394	eor	r0,r8,r8,ror#11
395	add	r7,r7,r2			@ h+=Ch(e,f,g)
396#if 4==31
397	and	r12,r12,#0xff
398	cmp	r12,#0xf2			@ done?
399#endif
400#if 4<15
401# if __ARM_ARCH__>=7
402	ldr	r2,[r1],#4			@ prefetch
403# else
404	ldrb	r2,[r1,#3]
405# endif
406	eor	r12,r8,r9			@ a^b, b^c in next round
407#else
408	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
409	eor	r12,r8,r9			@ a^b, b^c in next round
410	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
411#endif
412	eor	r0,r0,r8,ror#20	@ Sigma0(a)
413	and	r3,r3,r12			@ (b^c)&=(a^b)
414	add	r11,r11,r7			@ d+=h
415	eor	r3,r3,r9			@ Maj(a,b,c)
416	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
417	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
418#if __ARM_ARCH__>=7
419	@ ldr	r2,[r1],#4			@ 5
420# if 5==15
421	str	r1,[sp,#17*4]			@ make room for r1
422# endif
423	eor	r0,r11,r11,ror#5
424	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
425	eor	r0,r0,r11,ror#19	@ Sigma1(e)
426# ifndef __ARMEB__
427	rev	r2,r2
428# endif
429#else
430	@ ldrb	r2,[r1,#3]			@ 5
431	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
432	ldrb	r3,[r1,#2]
433	ldrb	r0,[r1,#1]
434	orr	r2,r2,r3,lsl#8
435	ldrb	r3,[r1],#4
436	orr	r2,r2,r0,lsl#16
437# if 5==15
438	str	r1,[sp,#17*4]			@ make room for r1
439# endif
440	eor	r0,r11,r11,ror#5
441	orr	r2,r2,r3,lsl#24
442	eor	r0,r0,r11,ror#19	@ Sigma1(e)
443#endif
444	ldr	r3,[r14],#4			@ *K256++
445	add	r6,r6,r2			@ h+=X[i]
446	str	r2,[sp,#5*4]
447	eor	r2,r4,r5
448	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
449	and	r2,r2,r11
450	add	r6,r6,r3			@ h+=K256[i]
451	eor	r2,r2,r5			@ Ch(e,f,g)
452	eor	r0,r7,r7,ror#11
453	add	r6,r6,r2			@ h+=Ch(e,f,g)
454#if 5==31
455	and	r3,r3,#0xff
456	cmp	r3,#0xf2			@ done?
457#endif
458#if 5<15
459# if __ARM_ARCH__>=7
460	ldr	r2,[r1],#4			@ prefetch
461# else
462	ldrb	r2,[r1,#3]
463# endif
464	eor	r3,r7,r8			@ a^b, b^c in next round
465#else
466	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
467	eor	r3,r7,r8			@ a^b, b^c in next round
468	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
469#endif
470	eor	r0,r0,r7,ror#20	@ Sigma0(a)
471	and	r12,r12,r3			@ (b^c)&=(a^b)
472	add	r10,r10,r6			@ d+=h
473	eor	r12,r12,r8			@ Maj(a,b,c)
474	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
475	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
476#if __ARM_ARCH__>=7
477	@ ldr	r2,[r1],#4			@ 6
478# if 6==15
479	str	r1,[sp,#17*4]			@ make room for r1
480# endif
481	eor	r0,r10,r10,ror#5
482	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
483	eor	r0,r0,r10,ror#19	@ Sigma1(e)
484# ifndef __ARMEB__
485	rev	r2,r2
486# endif
487#else
488	@ ldrb	r2,[r1,#3]			@ 6
489	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
490	ldrb	r12,[r1,#2]
491	ldrb	r0,[r1,#1]
492	orr	r2,r2,r12,lsl#8
493	ldrb	r12,[r1],#4
494	orr	r2,r2,r0,lsl#16
495# if 6==15
496	str	r1,[sp,#17*4]			@ make room for r1
497# endif
498	eor	r0,r10,r10,ror#5
499	orr	r2,r2,r12,lsl#24
500	eor	r0,r0,r10,ror#19	@ Sigma1(e)
501#endif
502	ldr	r12,[r14],#4			@ *K256++
503	add	r5,r5,r2			@ h+=X[i]
504	str	r2,[sp,#6*4]
505	eor	r2,r11,r4
506	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
507	and	r2,r2,r10
508	add	r5,r5,r12			@ h+=K256[i]
509	eor	r2,r2,r4			@ Ch(e,f,g)
510	eor	r0,r6,r6,ror#11
511	add	r5,r5,r2			@ h+=Ch(e,f,g)
512#if 6==31
513	and	r12,r12,#0xff
514	cmp	r12,#0xf2			@ done?
515#endif
516#if 6<15
517# if __ARM_ARCH__>=7
518	ldr	r2,[r1],#4			@ prefetch
519# else
520	ldrb	r2,[r1,#3]
521# endif
522	eor	r12,r6,r7			@ a^b, b^c in next round
523#else
524	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
525	eor	r12,r6,r7			@ a^b, b^c in next round
526	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
527#endif
528	eor	r0,r0,r6,ror#20	@ Sigma0(a)
529	and	r3,r3,r12			@ (b^c)&=(a^b)
530	add	r9,r9,r5			@ d+=h
531	eor	r3,r3,r7			@ Maj(a,b,c)
532	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
533	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
534#if __ARM_ARCH__>=7
535	@ ldr	r2,[r1],#4			@ 7
536# if 7==15
537	str	r1,[sp,#17*4]			@ make room for r1
538# endif
539	eor	r0,r9,r9,ror#5
540	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
541	eor	r0,r0,r9,ror#19	@ Sigma1(e)
542# ifndef __ARMEB__
543	rev	r2,r2
544# endif
545#else
546	@ ldrb	r2,[r1,#3]			@ 7
547	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
548	ldrb	r3,[r1,#2]
549	ldrb	r0,[r1,#1]
550	orr	r2,r2,r3,lsl#8
551	ldrb	r3,[r1],#4
552	orr	r2,r2,r0,lsl#16
553# if 7==15
554	str	r1,[sp,#17*4]			@ make room for r1
555# endif
556	eor	r0,r9,r9,ror#5
557	orr	r2,r2,r3,lsl#24
558	eor	r0,r0,r9,ror#19	@ Sigma1(e)
559#endif
560	ldr	r3,[r14],#4			@ *K256++
561	add	r4,r4,r2			@ h+=X[i]
562	str	r2,[sp,#7*4]
563	eor	r2,r10,r11
564	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
565	and	r2,r2,r9
566	add	r4,r4,r3			@ h+=K256[i]
567	eor	r2,r2,r11			@ Ch(e,f,g)
568	eor	r0,r5,r5,ror#11
569	add	r4,r4,r2			@ h+=Ch(e,f,g)
570#if 7==31
571	and	r3,r3,#0xff
572	cmp	r3,#0xf2			@ done?
573#endif
574#if 7<15
575# if __ARM_ARCH__>=7
576	ldr	r2,[r1],#4			@ prefetch
577# else
578	ldrb	r2,[r1,#3]
579# endif
580	eor	r3,r5,r6			@ a^b, b^c in next round
581#else
582	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
583	eor	r3,r5,r6			@ a^b, b^c in next round
584	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
585#endif
586	eor	r0,r0,r5,ror#20	@ Sigma0(a)
587	and	r12,r12,r3			@ (b^c)&=(a^b)
588	add	r8,r8,r4			@ d+=h
589	eor	r12,r12,r6			@ Maj(a,b,c)
590	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
591	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
592#if __ARM_ARCH__>=7
593	@ ldr	r2,[r1],#4			@ 8
594# if 8==15
595	str	r1,[sp,#17*4]			@ make room for r1
596# endif
597	eor	r0,r8,r8,ror#5
598	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
599	eor	r0,r0,r8,ror#19	@ Sigma1(e)
600# ifndef __ARMEB__
601	rev	r2,r2
602# endif
603#else
604	@ ldrb	r2,[r1,#3]			@ 8
605	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
606	ldrb	r12,[r1,#2]
607	ldrb	r0,[r1,#1]
608	orr	r2,r2,r12,lsl#8
609	ldrb	r12,[r1],#4
610	orr	r2,r2,r0,lsl#16
611# if 8==15
612	str	r1,[sp,#17*4]			@ make room for r1
613# endif
614	eor	r0,r8,r8,ror#5
615	orr	r2,r2,r12,lsl#24
616	eor	r0,r0,r8,ror#19	@ Sigma1(e)
617#endif
618	ldr	r12,[r14],#4			@ *K256++
619	add	r11,r11,r2			@ h+=X[i]
620	str	r2,[sp,#8*4]
621	eor	r2,r9,r10
622	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
623	and	r2,r2,r8
624	add	r11,r11,r12			@ h+=K256[i]
625	eor	r2,r2,r10			@ Ch(e,f,g)
626	eor	r0,r4,r4,ror#11
627	add	r11,r11,r2			@ h+=Ch(e,f,g)
628#if 8==31
629	and	r12,r12,#0xff
630	cmp	r12,#0xf2			@ done?
631#endif
632#if 8<15
633# if __ARM_ARCH__>=7
634	ldr	r2,[r1],#4			@ prefetch
635# else
636	ldrb	r2,[r1,#3]
637# endif
638	eor	r12,r4,r5			@ a^b, b^c in next round
639#else
640	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
641	eor	r12,r4,r5			@ a^b, b^c in next round
642	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
643#endif
644	eor	r0,r0,r4,ror#20	@ Sigma0(a)
645	and	r3,r3,r12			@ (b^c)&=(a^b)
646	add	r7,r7,r11			@ d+=h
647	eor	r3,r3,r5			@ Maj(a,b,c)
648	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
649	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
650#if __ARM_ARCH__>=7
651	@ ldr	r2,[r1],#4			@ 9
652# if 9==15
653	str	r1,[sp,#17*4]			@ make room for r1
654# endif
655	eor	r0,r7,r7,ror#5
656	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
657	eor	r0,r0,r7,ror#19	@ Sigma1(e)
658# ifndef __ARMEB__
659	rev	r2,r2
660# endif
661#else
662	@ ldrb	r2,[r1,#3]			@ 9
663	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
664	ldrb	r3,[r1,#2]
665	ldrb	r0,[r1,#1]
666	orr	r2,r2,r3,lsl#8
667	ldrb	r3,[r1],#4
668	orr	r2,r2,r0,lsl#16
669# if 9==15
670	str	r1,[sp,#17*4]			@ make room for r1
671# endif
672	eor	r0,r7,r7,ror#5
673	orr	r2,r2,r3,lsl#24
674	eor	r0,r0,r7,ror#19	@ Sigma1(e)
675#endif
676	ldr	r3,[r14],#4			@ *K256++
677	add	r10,r10,r2			@ h+=X[i]
678	str	r2,[sp,#9*4]
679	eor	r2,r8,r9
680	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
681	and	r2,r2,r7
682	add	r10,r10,r3			@ h+=K256[i]
683	eor	r2,r2,r9			@ Ch(e,f,g)
684	eor	r0,r11,r11,ror#11
685	add	r10,r10,r2			@ h+=Ch(e,f,g)
686#if 9==31
687	and	r3,r3,#0xff
688	cmp	r3,#0xf2			@ done?
689#endif
690#if 9<15
691# if __ARM_ARCH__>=7
692	ldr	r2,[r1],#4			@ prefetch
693# else
694	ldrb	r2,[r1,#3]
695# endif
696	eor	r3,r11,r4			@ a^b, b^c in next round
697#else
698	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
699	eor	r3,r11,r4			@ a^b, b^c in next round
700	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
701#endif
702	eor	r0,r0,r11,ror#20	@ Sigma0(a)
703	and	r12,r12,r3			@ (b^c)&=(a^b)
704	add	r6,r6,r10			@ d+=h
705	eor	r12,r12,r4			@ Maj(a,b,c)
706	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
707	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
708#if __ARM_ARCH__>=7
709	@ ldr	r2,[r1],#4			@ 10
710# if 10==15
711	str	r1,[sp,#17*4]			@ make room for r1
712# endif
713	eor	r0,r6,r6,ror#5
714	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
715	eor	r0,r0,r6,ror#19	@ Sigma1(e)
716# ifndef __ARMEB__
717	rev	r2,r2
718# endif
719#else
720	@ ldrb	r2,[r1,#3]			@ 10
721	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
722	ldrb	r12,[r1,#2]
723	ldrb	r0,[r1,#1]
724	orr	r2,r2,r12,lsl#8
725	ldrb	r12,[r1],#4
726	orr	r2,r2,r0,lsl#16
727# if 10==15
728	str	r1,[sp,#17*4]			@ make room for r1
729# endif
730	eor	r0,r6,r6,ror#5
731	orr	r2,r2,r12,lsl#24
732	eor	r0,r0,r6,ror#19	@ Sigma1(e)
733#endif
734	ldr	r12,[r14],#4			@ *K256++
735	add	r9,r9,r2			@ h+=X[i]
736	str	r2,[sp,#10*4]
737	eor	r2,r7,r8
738	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
739	and	r2,r2,r6
740	add	r9,r9,r12			@ h+=K256[i]
741	eor	r2,r2,r8			@ Ch(e,f,g)
742	eor	r0,r10,r10,ror#11
743	add	r9,r9,r2			@ h+=Ch(e,f,g)
744#if 10==31
745	and	r12,r12,#0xff
746	cmp	r12,#0xf2			@ done?
747#endif
748#if 10<15
749# if __ARM_ARCH__>=7
750	ldr	r2,[r1],#4			@ prefetch
751# else
752	ldrb	r2,[r1,#3]
753# endif
754	eor	r12,r10,r11			@ a^b, b^c in next round
755#else
756	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
757	eor	r12,r10,r11			@ a^b, b^c in next round
758	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
759#endif
760	eor	r0,r0,r10,ror#20	@ Sigma0(a)
761	and	r3,r3,r12			@ (b^c)&=(a^b)
762	add	r5,r5,r9			@ d+=h
763	eor	r3,r3,r11			@ Maj(a,b,c)
764	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
765	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
766#if __ARM_ARCH__>=7
767	@ ldr	r2,[r1],#4			@ 11
768# if 11==15
769	str	r1,[sp,#17*4]			@ make room for r1
770# endif
771	eor	r0,r5,r5,ror#5
772	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
773	eor	r0,r0,r5,ror#19	@ Sigma1(e)
774# ifndef __ARMEB__
775	rev	r2,r2
776# endif
777#else
778	@ ldrb	r2,[r1,#3]			@ 11
779	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
780	ldrb	r3,[r1,#2]
781	ldrb	r0,[r1,#1]
782	orr	r2,r2,r3,lsl#8
783	ldrb	r3,[r1],#4
784	orr	r2,r2,r0,lsl#16
785# if 11==15
786	str	r1,[sp,#17*4]			@ make room for r1
787# endif
788	eor	r0,r5,r5,ror#5
789	orr	r2,r2,r3,lsl#24
790	eor	r0,r0,r5,ror#19	@ Sigma1(e)
791#endif
792	ldr	r3,[r14],#4			@ *K256++
793	add	r8,r8,r2			@ h+=X[i]
794	str	r2,[sp,#11*4]
795	eor	r2,r6,r7
796	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
797	and	r2,r2,r5
798	add	r8,r8,r3			@ h+=K256[i]
799	eor	r2,r2,r7			@ Ch(e,f,g)
800	eor	r0,r9,r9,ror#11
801	add	r8,r8,r2			@ h+=Ch(e,f,g)
802#if 11==31
803	and	r3,r3,#0xff
804	cmp	r3,#0xf2			@ done?
805#endif
806#if 11<15
807# if __ARM_ARCH__>=7
808	ldr	r2,[r1],#4			@ prefetch
809# else
810	ldrb	r2,[r1,#3]
811# endif
812	eor	r3,r9,r10			@ a^b, b^c in next round
813#else
814	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
815	eor	r3,r9,r10			@ a^b, b^c in next round
816	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
817#endif
818	eor	r0,r0,r9,ror#20	@ Sigma0(a)
819	and	r12,r12,r3			@ (b^c)&=(a^b)
820	add	r4,r4,r8			@ d+=h
821	eor	r12,r12,r10			@ Maj(a,b,c)
822	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
823	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
824#if __ARM_ARCH__>=7
825	@ ldr	r2,[r1],#4			@ 12
826# if 12==15
827	str	r1,[sp,#17*4]			@ make room for r1
828# endif
829	eor	r0,r4,r4,ror#5
830	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
831	eor	r0,r0,r4,ror#19	@ Sigma1(e)
832# ifndef __ARMEB__
833	rev	r2,r2
834# endif
835#else
836	@ ldrb	r2,[r1,#3]			@ 12
837	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
838	ldrb	r12,[r1,#2]
839	ldrb	r0,[r1,#1]
840	orr	r2,r2,r12,lsl#8
841	ldrb	r12,[r1],#4
842	orr	r2,r2,r0,lsl#16
843# if 12==15
844	str	r1,[sp,#17*4]			@ make room for r1
845# endif
846	eor	r0,r4,r4,ror#5
847	orr	r2,r2,r12,lsl#24
848	eor	r0,r0,r4,ror#19	@ Sigma1(e)
849#endif
850	ldr	r12,[r14],#4			@ *K256++
851	add	r7,r7,r2			@ h+=X[i]
852	str	r2,[sp,#12*4]
853	eor	r2,r5,r6
854	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
855	and	r2,r2,r4
856	add	r7,r7,r12			@ h+=K256[i]
857	eor	r2,r2,r6			@ Ch(e,f,g)
858	eor	r0,r8,r8,ror#11
859	add	r7,r7,r2			@ h+=Ch(e,f,g)
860#if 12==31
861	and	r12,r12,#0xff
862	cmp	r12,#0xf2			@ done?
863#endif
864#if 12<15
865# if __ARM_ARCH__>=7
866	ldr	r2,[r1],#4			@ prefetch
867# else
868	ldrb	r2,[r1,#3]
869# endif
870	eor	r12,r8,r9			@ a^b, b^c in next round
871#else
872	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
873	eor	r12,r8,r9			@ a^b, b^c in next round
874	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
875#endif
876	eor	r0,r0,r8,ror#20	@ Sigma0(a)
877	and	r3,r3,r12			@ (b^c)&=(a^b)
878	add	r11,r11,r7			@ d+=h
879	eor	r3,r3,r9			@ Maj(a,b,c)
880	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
881	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
882#if __ARM_ARCH__>=7
883	@ ldr	r2,[r1],#4			@ 13
884# if 13==15
885	str	r1,[sp,#17*4]			@ make room for r1
886# endif
887	eor	r0,r11,r11,ror#5
888	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
889	eor	r0,r0,r11,ror#19	@ Sigma1(e)
890# ifndef __ARMEB__
891	rev	r2,r2
892# endif
893#else
894	@ ldrb	r2,[r1,#3]			@ 13
895	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
896	ldrb	r3,[r1,#2]
897	ldrb	r0,[r1,#1]
898	orr	r2,r2,r3,lsl#8
899	ldrb	r3,[r1],#4
900	orr	r2,r2,r0,lsl#16
901# if 13==15
902	str	r1,[sp,#17*4]			@ make room for r1
903# endif
904	eor	r0,r11,r11,ror#5
905	orr	r2,r2,r3,lsl#24
906	eor	r0,r0,r11,ror#19	@ Sigma1(e)
907#endif
908	ldr	r3,[r14],#4			@ *K256++
909	add	r6,r6,r2			@ h+=X[i]
910	str	r2,[sp,#13*4]
911	eor	r2,r4,r5
912	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
913	and	r2,r2,r11
914	add	r6,r6,r3			@ h+=K256[i]
915	eor	r2,r2,r5			@ Ch(e,f,g)
916	eor	r0,r7,r7,ror#11
917	add	r6,r6,r2			@ h+=Ch(e,f,g)
918#if 13==31
919	and	r3,r3,#0xff
920	cmp	r3,#0xf2			@ done?
921#endif
922#if 13<15
923# if __ARM_ARCH__>=7
924	ldr	r2,[r1],#4			@ prefetch
925# else
926	ldrb	r2,[r1,#3]
927# endif
928	eor	r3,r7,r8			@ a^b, b^c in next round
929#else
930	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
931	eor	r3,r7,r8			@ a^b, b^c in next round
932	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
933#endif
934	eor	r0,r0,r7,ror#20	@ Sigma0(a)
935	and	r12,r12,r3			@ (b^c)&=(a^b)
936	add	r10,r10,r6			@ d+=h
937	eor	r12,r12,r8			@ Maj(a,b,c)
938	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
939	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
940#if __ARM_ARCH__>=7
941	@ ldr	r2,[r1],#4			@ 14
942# if 14==15
943	str	r1,[sp,#17*4]			@ make room for r1
944# endif
945	eor	r0,r10,r10,ror#5
946	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
947	eor	r0,r0,r10,ror#19	@ Sigma1(e)
948# ifndef __ARMEB__
949	rev	r2,r2
950# endif
951#else
952	@ ldrb	r2,[r1,#3]			@ 14
953	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
954	ldrb	r12,[r1,#2]
955	ldrb	r0,[r1,#1]
956	orr	r2,r2,r12,lsl#8
957	ldrb	r12,[r1],#4
958	orr	r2,r2,r0,lsl#16
959# if 14==15
960	str	r1,[sp,#17*4]			@ make room for r1
961# endif
962	eor	r0,r10,r10,ror#5
963	orr	r2,r2,r12,lsl#24
964	eor	r0,r0,r10,ror#19	@ Sigma1(e)
965#endif
966	ldr	r12,[r14],#4			@ *K256++
967	add	r5,r5,r2			@ h+=X[i]
968	str	r2,[sp,#14*4]
969	eor	r2,r11,r4
970	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
971	and	r2,r2,r10
972	add	r5,r5,r12			@ h+=K256[i]
973	eor	r2,r2,r4			@ Ch(e,f,g)
974	eor	r0,r6,r6,ror#11
975	add	r5,r5,r2			@ h+=Ch(e,f,g)
976#if 14==31
977	and	r12,r12,#0xff
978	cmp	r12,#0xf2			@ done?
979#endif
980#if 14<15
981# if __ARM_ARCH__>=7
982	ldr	r2,[r1],#4			@ prefetch
983# else
984	ldrb	r2,[r1,#3]
985# endif
986	eor	r12,r6,r7			@ a^b, b^c in next round
987#else
988	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
989	eor	r12,r6,r7			@ a^b, b^c in next round
990	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
991#endif
992	eor	r0,r0,r6,ror#20	@ Sigma0(a)
993	and	r3,r3,r12			@ (b^c)&=(a^b)
994	add	r9,r9,r5			@ d+=h
995	eor	r3,r3,r7			@ Maj(a,b,c)
996	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
997	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
998#if __ARM_ARCH__>=7
999	@ ldr	r2,[r1],#4			@ 15
1000# if 15==15
1001	str	r1,[sp,#17*4]			@ make room for r1
1002# endif
1003	eor	r0,r9,r9,ror#5
1004	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1005	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1006# ifndef __ARMEB__
1007	rev	r2,r2
1008# endif
1009#else
1010	@ ldrb	r2,[r1,#3]			@ 15
1011	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1012	ldrb	r3,[r1,#2]
1013	ldrb	r0,[r1,#1]
1014	orr	r2,r2,r3,lsl#8
1015	ldrb	r3,[r1],#4
1016	orr	r2,r2,r0,lsl#16
1017# if 15==15
1018	str	r1,[sp,#17*4]			@ make room for r1
1019# endif
1020	eor	r0,r9,r9,ror#5
1021	orr	r2,r2,r3,lsl#24
1022	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1023#endif
1024	ldr	r3,[r14],#4			@ *K256++
1025	add	r4,r4,r2			@ h+=X[i]
1026	str	r2,[sp,#15*4]
1027	eor	r2,r10,r11
1028	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1029	and	r2,r2,r9
1030	add	r4,r4,r3			@ h+=K256[i]
1031	eor	r2,r2,r11			@ Ch(e,f,g)
1032	eor	r0,r5,r5,ror#11
1033	add	r4,r4,r2			@ h+=Ch(e,f,g)
1034#if 15==31
1035	and	r3,r3,#0xff
1036	cmp	r3,#0xf2			@ done?
1037#endif
1038#if 15<15
1039# if __ARM_ARCH__>=7
1040	ldr	r2,[r1],#4			@ prefetch
1041# else
1042	ldrb	r2,[r1,#3]
1043# endif
1044	eor	r3,r5,r6			@ a^b, b^c in next round
1045#else
1046	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1047	eor	r3,r5,r6			@ a^b, b^c in next round
1048	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1049#endif
1050	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1051	and	r12,r12,r3			@ (b^c)&=(a^b)
1052	add	r8,r8,r4			@ d+=h
1053	eor	r12,r12,r6			@ Maj(a,b,c)
1054	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1055	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1056.Lrounds_16_xx:
1057	@ ldr	r2,[sp,#1*4]		@ 16
1058	@ ldr	r1,[sp,#14*4]
1059	mov	r0,r2,ror#7
1060	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1061	mov	r12,r1,ror#17
1062	eor	r0,r0,r2,ror#18
1063	eor	r12,r12,r1,ror#19
1064	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1065	ldr	r2,[sp,#0*4]
1066	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1067	ldr	r1,[sp,#9*4]
1068
1069	add	r12,r12,r0
1070	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1071	add	r2,r2,r12
1072	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1073	add	r2,r2,r1			@ X[i]
1074	ldr	r12,[r14],#4			@ *K256++
1075	add	r11,r11,r2			@ h+=X[i]
1076	str	r2,[sp,#0*4]
1077	eor	r2,r9,r10
1078	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1079	and	r2,r2,r8
1080	add	r11,r11,r12			@ h+=K256[i]
1081	eor	r2,r2,r10			@ Ch(e,f,g)
1082	eor	r0,r4,r4,ror#11
1083	add	r11,r11,r2			@ h+=Ch(e,f,g)
1084#if 16==31
1085	and	r12,r12,#0xff
1086	cmp	r12,#0xf2			@ done?
1087#endif
1088#if 16<15
1089# if __ARM_ARCH__>=7
1090	ldr	r2,[r1],#4			@ prefetch
1091# else
1092	ldrb	r2,[r1,#3]
1093# endif
1094	eor	r12,r4,r5			@ a^b, b^c in next round
1095#else
1096	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
1097	eor	r12,r4,r5			@ a^b, b^c in next round
1098	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
1099#endif
1100	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1101	and	r3,r3,r12			@ (b^c)&=(a^b)
1102	add	r7,r7,r11			@ d+=h
1103	eor	r3,r3,r5			@ Maj(a,b,c)
1104	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1105	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1106	@ ldr	r2,[sp,#2*4]		@ 17
1107	@ ldr	r1,[sp,#15*4]
1108	mov	r0,r2,ror#7
1109	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1110	mov	r3,r1,ror#17
1111	eor	r0,r0,r2,ror#18
1112	eor	r3,r3,r1,ror#19
1113	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1114	ldr	r2,[sp,#1*4]
1115	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1116	ldr	r1,[sp,#10*4]
1117
1118	add	r3,r3,r0
1119	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1120	add	r2,r2,r3
1121	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1122	add	r2,r2,r1			@ X[i]
1123	ldr	r3,[r14],#4			@ *K256++
1124	add	r10,r10,r2			@ h+=X[i]
1125	str	r2,[sp,#1*4]
1126	eor	r2,r8,r9
1127	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1128	and	r2,r2,r7
1129	add	r10,r10,r3			@ h+=K256[i]
1130	eor	r2,r2,r9			@ Ch(e,f,g)
1131	eor	r0,r11,r11,ror#11
1132	add	r10,r10,r2			@ h+=Ch(e,f,g)
1133#if 17==31
1134	and	r3,r3,#0xff
1135	cmp	r3,#0xf2			@ done?
1136#endif
1137#if 17<15
1138# if __ARM_ARCH__>=7
1139	ldr	r2,[r1],#4			@ prefetch
1140# else
1141	ldrb	r2,[r1,#3]
1142# endif
1143	eor	r3,r11,r4			@ a^b, b^c in next round
1144#else
1145	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
1146	eor	r3,r11,r4			@ a^b, b^c in next round
1147	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
1148#endif
1149	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1150	and	r12,r12,r3			@ (b^c)&=(a^b)
1151	add	r6,r6,r10			@ d+=h
1152	eor	r12,r12,r4			@ Maj(a,b,c)
1153	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1154	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1155	@ ldr	r2,[sp,#3*4]		@ 18
1156	@ ldr	r1,[sp,#0*4]
1157	mov	r0,r2,ror#7
1158	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1159	mov	r12,r1,ror#17
1160	eor	r0,r0,r2,ror#18
1161	eor	r12,r12,r1,ror#19
1162	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1163	ldr	r2,[sp,#2*4]
1164	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1165	ldr	r1,[sp,#11*4]
1166
1167	add	r12,r12,r0
1168	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1169	add	r2,r2,r12
1170	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1171	add	r2,r2,r1			@ X[i]
1172	ldr	r12,[r14],#4			@ *K256++
1173	add	r9,r9,r2			@ h+=X[i]
1174	str	r2,[sp,#2*4]
1175	eor	r2,r7,r8
1176	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1177	and	r2,r2,r6
1178	add	r9,r9,r12			@ h+=K256[i]
1179	eor	r2,r2,r8			@ Ch(e,f,g)
1180	eor	r0,r10,r10,ror#11
1181	add	r9,r9,r2			@ h+=Ch(e,f,g)
1182#if 18==31
1183	and	r12,r12,#0xff
1184	cmp	r12,#0xf2			@ done?
1185#endif
1186#if 18<15
1187# if __ARM_ARCH__>=7
1188	ldr	r2,[r1],#4			@ prefetch
1189# else
1190	ldrb	r2,[r1,#3]
1191# endif
1192	eor	r12,r10,r11			@ a^b, b^c in next round
1193#else
1194	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
1195	eor	r12,r10,r11			@ a^b, b^c in next round
1196	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
1197#endif
1198	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1199	and	r3,r3,r12			@ (b^c)&=(a^b)
1200	add	r5,r5,r9			@ d+=h
1201	eor	r3,r3,r11			@ Maj(a,b,c)
1202	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1203	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1204	@ ldr	r2,[sp,#4*4]		@ 19
1205	@ ldr	r1,[sp,#1*4]
1206	mov	r0,r2,ror#7
1207	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1208	mov	r3,r1,ror#17
1209	eor	r0,r0,r2,ror#18
1210	eor	r3,r3,r1,ror#19
1211	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1212	ldr	r2,[sp,#3*4]
1213	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1214	ldr	r1,[sp,#12*4]
1215
1216	add	r3,r3,r0
1217	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1218	add	r2,r2,r3
1219	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1220	add	r2,r2,r1			@ X[i]
1221	ldr	r3,[r14],#4			@ *K256++
1222	add	r8,r8,r2			@ h+=X[i]
1223	str	r2,[sp,#3*4]
1224	eor	r2,r6,r7
1225	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1226	and	r2,r2,r5
1227	add	r8,r8,r3			@ h+=K256[i]
1228	eor	r2,r2,r7			@ Ch(e,f,g)
1229	eor	r0,r9,r9,ror#11
1230	add	r8,r8,r2			@ h+=Ch(e,f,g)
1231#if 19==31
1232	and	r3,r3,#0xff
1233	cmp	r3,#0xf2			@ done?
1234#endif
1235#if 19<15
1236# if __ARM_ARCH__>=7
1237	ldr	r2,[r1],#4			@ prefetch
1238# else
1239	ldrb	r2,[r1,#3]
1240# endif
1241	eor	r3,r9,r10			@ a^b, b^c in next round
1242#else
1243	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
1244	eor	r3,r9,r10			@ a^b, b^c in next round
1245	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
1246#endif
1247	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1248	and	r12,r12,r3			@ (b^c)&=(a^b)
1249	add	r4,r4,r8			@ d+=h
1250	eor	r12,r12,r10			@ Maj(a,b,c)
1251	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1252	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1253	@ ldr	r2,[sp,#5*4]		@ 20
1254	@ ldr	r1,[sp,#2*4]
1255	mov	r0,r2,ror#7
1256	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1257	mov	r12,r1,ror#17
1258	eor	r0,r0,r2,ror#18
1259	eor	r12,r12,r1,ror#19
1260	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1261	ldr	r2,[sp,#4*4]
1262	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1263	ldr	r1,[sp,#13*4]
1264
1265	add	r12,r12,r0
1266	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1267	add	r2,r2,r12
1268	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1269	add	r2,r2,r1			@ X[i]
1270	ldr	r12,[r14],#4			@ *K256++
1271	add	r7,r7,r2			@ h+=X[i]
1272	str	r2,[sp,#4*4]
1273	eor	r2,r5,r6
1274	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1275	and	r2,r2,r4
1276	add	r7,r7,r12			@ h+=K256[i]
1277	eor	r2,r2,r6			@ Ch(e,f,g)
1278	eor	r0,r8,r8,ror#11
1279	add	r7,r7,r2			@ h+=Ch(e,f,g)
1280#if 20==31
1281	and	r12,r12,#0xff
1282	cmp	r12,#0xf2			@ done?
1283#endif
1284#if 20<15
1285# if __ARM_ARCH__>=7
1286	ldr	r2,[r1],#4			@ prefetch
1287# else
1288	ldrb	r2,[r1,#3]
1289# endif
1290	eor	r12,r8,r9			@ a^b, b^c in next round
1291#else
1292	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
1293	eor	r12,r8,r9			@ a^b, b^c in next round
1294	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
1295#endif
1296	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1297	and	r3,r3,r12			@ (b^c)&=(a^b)
1298	add	r11,r11,r7			@ d+=h
1299	eor	r3,r3,r9			@ Maj(a,b,c)
1300	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1301	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1302	@ ldr	r2,[sp,#6*4]		@ 21
1303	@ ldr	r1,[sp,#3*4]
1304	mov	r0,r2,ror#7
1305	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1306	mov	r3,r1,ror#17
1307	eor	r0,r0,r2,ror#18
1308	eor	r3,r3,r1,ror#19
1309	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1310	ldr	r2,[sp,#5*4]
1311	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1312	ldr	r1,[sp,#14*4]
1313
1314	add	r3,r3,r0
1315	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1316	add	r2,r2,r3
1317	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1318	add	r2,r2,r1			@ X[i]
1319	ldr	r3,[r14],#4			@ *K256++
1320	add	r6,r6,r2			@ h+=X[i]
1321	str	r2,[sp,#5*4]
1322	eor	r2,r4,r5
1323	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1324	and	r2,r2,r11
1325	add	r6,r6,r3			@ h+=K256[i]
1326	eor	r2,r2,r5			@ Ch(e,f,g)
1327	eor	r0,r7,r7,ror#11
1328	add	r6,r6,r2			@ h+=Ch(e,f,g)
1329#if 21==31
1330	and	r3,r3,#0xff
1331	cmp	r3,#0xf2			@ done?
1332#endif
1333#if 21<15
1334# if __ARM_ARCH__>=7
1335	ldr	r2,[r1],#4			@ prefetch
1336# else
1337	ldrb	r2,[r1,#3]
1338# endif
1339	eor	r3,r7,r8			@ a^b, b^c in next round
1340#else
1341	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
1342	eor	r3,r7,r8			@ a^b, b^c in next round
1343	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
1344#endif
1345	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1346	and	r12,r12,r3			@ (b^c)&=(a^b)
1347	add	r10,r10,r6			@ d+=h
1348	eor	r12,r12,r8			@ Maj(a,b,c)
1349	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1350	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1351	@ ldr	r2,[sp,#7*4]		@ 22
1352	@ ldr	r1,[sp,#4*4]
1353	mov	r0,r2,ror#7
1354	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1355	mov	r12,r1,ror#17
1356	eor	r0,r0,r2,ror#18
1357	eor	r12,r12,r1,ror#19
1358	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1359	ldr	r2,[sp,#6*4]
1360	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1361	ldr	r1,[sp,#15*4]
1362
1363	add	r12,r12,r0
1364	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1365	add	r2,r2,r12
1366	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1367	add	r2,r2,r1			@ X[i]
1368	ldr	r12,[r14],#4			@ *K256++
1369	add	r5,r5,r2			@ h+=X[i]
1370	str	r2,[sp,#6*4]
1371	eor	r2,r11,r4
1372	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1373	and	r2,r2,r10
1374	add	r5,r5,r12			@ h+=K256[i]
1375	eor	r2,r2,r4			@ Ch(e,f,g)
1376	eor	r0,r6,r6,ror#11
1377	add	r5,r5,r2			@ h+=Ch(e,f,g)
1378#if 22==31
1379	and	r12,r12,#0xff
1380	cmp	r12,#0xf2			@ done?
1381#endif
1382#if 22<15
1383# if __ARM_ARCH__>=7
1384	ldr	r2,[r1],#4			@ prefetch
1385# else
1386	ldrb	r2,[r1,#3]
1387# endif
1388	eor	r12,r6,r7			@ a^b, b^c in next round
1389#else
1390	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
1391	eor	r12,r6,r7			@ a^b, b^c in next round
1392	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
1393#endif
1394	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1395	and	r3,r3,r12			@ (b^c)&=(a^b)
1396	add	r9,r9,r5			@ d+=h
1397	eor	r3,r3,r7			@ Maj(a,b,c)
1398	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1399	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1400	@ ldr	r2,[sp,#8*4]		@ 23
1401	@ ldr	r1,[sp,#5*4]
1402	mov	r0,r2,ror#7
1403	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1404	mov	r3,r1,ror#17
1405	eor	r0,r0,r2,ror#18
1406	eor	r3,r3,r1,ror#19
1407	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1408	ldr	r2,[sp,#7*4]
1409	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1410	ldr	r1,[sp,#0*4]
1411
1412	add	r3,r3,r0
1413	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1414	add	r2,r2,r3
1415	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1416	add	r2,r2,r1			@ X[i]
1417	ldr	r3,[r14],#4			@ *K256++
1418	add	r4,r4,r2			@ h+=X[i]
1419	str	r2,[sp,#7*4]
1420	eor	r2,r10,r11
1421	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1422	and	r2,r2,r9
1423	add	r4,r4,r3			@ h+=K256[i]
1424	eor	r2,r2,r11			@ Ch(e,f,g)
1425	eor	r0,r5,r5,ror#11
1426	add	r4,r4,r2			@ h+=Ch(e,f,g)
1427#if 23==31
1428	and	r3,r3,#0xff
1429	cmp	r3,#0xf2			@ done?
1430#endif
1431#if 23<15
1432# if __ARM_ARCH__>=7
1433	ldr	r2,[r1],#4			@ prefetch
1434# else
1435	ldrb	r2,[r1,#3]
1436# endif
1437	eor	r3,r5,r6			@ a^b, b^c in next round
1438#else
1439	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
1440	eor	r3,r5,r6			@ a^b, b^c in next round
1441	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
1442#endif
1443	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1444	and	r12,r12,r3			@ (b^c)&=(a^b)
1445	add	r8,r8,r4			@ d+=h
1446	eor	r12,r12,r6			@ Maj(a,b,c)
1447	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1448	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1449	@ ldr	r2,[sp,#9*4]		@ 24
1450	@ ldr	r1,[sp,#6*4]
1451	mov	r0,r2,ror#7
1452	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1453	mov	r12,r1,ror#17
1454	eor	r0,r0,r2,ror#18
1455	eor	r12,r12,r1,ror#19
1456	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1457	ldr	r2,[sp,#8*4]
1458	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1459	ldr	r1,[sp,#1*4]
1460
1461	add	r12,r12,r0
1462	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1463	add	r2,r2,r12
1464	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1465	add	r2,r2,r1			@ X[i]
1466	ldr	r12,[r14],#4			@ *K256++
1467	add	r11,r11,r2			@ h+=X[i]
1468	str	r2,[sp,#8*4]
1469	eor	r2,r9,r10
1470	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1471	and	r2,r2,r8
1472	add	r11,r11,r12			@ h+=K256[i]
1473	eor	r2,r2,r10			@ Ch(e,f,g)
1474	eor	r0,r4,r4,ror#11
1475	add	r11,r11,r2			@ h+=Ch(e,f,g)
1476#if 24==31
1477	and	r12,r12,#0xff
1478	cmp	r12,#0xf2			@ done?
1479#endif
1480#if 24<15
1481# if __ARM_ARCH__>=7
1482	ldr	r2,[r1],#4			@ prefetch
1483# else
1484	ldrb	r2,[r1,#3]
1485# endif
1486	eor	r12,r4,r5			@ a^b, b^c in next round
1487#else
1488	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
1489	eor	r12,r4,r5			@ a^b, b^c in next round
1490	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
1491#endif
1492	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1493	and	r3,r3,r12			@ (b^c)&=(a^b)
1494	add	r7,r7,r11			@ d+=h
1495	eor	r3,r3,r5			@ Maj(a,b,c)
1496	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1497	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1498	@ ldr	r2,[sp,#10*4]		@ 25
1499	@ ldr	r1,[sp,#7*4]
1500	mov	r0,r2,ror#7
1501	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1502	mov	r3,r1,ror#17
1503	eor	r0,r0,r2,ror#18
1504	eor	r3,r3,r1,ror#19
1505	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1506	ldr	r2,[sp,#9*4]
1507	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1508	ldr	r1,[sp,#2*4]
1509
1510	add	r3,r3,r0
1511	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1512	add	r2,r2,r3
1513	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1514	add	r2,r2,r1			@ X[i]
1515	ldr	r3,[r14],#4			@ *K256++
1516	add	r10,r10,r2			@ h+=X[i]
1517	str	r2,[sp,#9*4]
1518	eor	r2,r8,r9
1519	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1520	and	r2,r2,r7
1521	add	r10,r10,r3			@ h+=K256[i]
1522	eor	r2,r2,r9			@ Ch(e,f,g)
1523	eor	r0,r11,r11,ror#11
1524	add	r10,r10,r2			@ h+=Ch(e,f,g)
1525#if 25==31
1526	and	r3,r3,#0xff
1527	cmp	r3,#0xf2			@ done?
1528#endif
1529#if 25<15
1530# if __ARM_ARCH__>=7
1531	ldr	r2,[r1],#4			@ prefetch
1532# else
1533	ldrb	r2,[r1,#3]
1534# endif
1535	eor	r3,r11,r4			@ a^b, b^c in next round
1536#else
1537	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
1538	eor	r3,r11,r4			@ a^b, b^c in next round
1539	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
1540#endif
1541	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1542	and	r12,r12,r3			@ (b^c)&=(a^b)
1543	add	r6,r6,r10			@ d+=h
1544	eor	r12,r12,r4			@ Maj(a,b,c)
1545	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1546	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1547	@ ldr	r2,[sp,#11*4]		@ 26
1548	@ ldr	r1,[sp,#8*4]
1549	mov	r0,r2,ror#7
1550	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1551	mov	r12,r1,ror#17
1552	eor	r0,r0,r2,ror#18
1553	eor	r12,r12,r1,ror#19
1554	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1555	ldr	r2,[sp,#10*4]
1556	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1557	ldr	r1,[sp,#3*4]
1558
1559	add	r12,r12,r0
1560	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1561	add	r2,r2,r12
1562	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1563	add	r2,r2,r1			@ X[i]
1564	ldr	r12,[r14],#4			@ *K256++
1565	add	r9,r9,r2			@ h+=X[i]
1566	str	r2,[sp,#10*4]
1567	eor	r2,r7,r8
1568	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1569	and	r2,r2,r6
1570	add	r9,r9,r12			@ h+=K256[i]
1571	eor	r2,r2,r8			@ Ch(e,f,g)
1572	eor	r0,r10,r10,ror#11
1573	add	r9,r9,r2			@ h+=Ch(e,f,g)
1574#if 26==31
1575	and	r12,r12,#0xff
1576	cmp	r12,#0xf2			@ done?
1577#endif
1578#if 26<15
1579# if __ARM_ARCH__>=7
1580	ldr	r2,[r1],#4			@ prefetch
1581# else
1582	ldrb	r2,[r1,#3]
1583# endif
1584	eor	r12,r10,r11			@ a^b, b^c in next round
1585#else
1586	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
1587	eor	r12,r10,r11			@ a^b, b^c in next round
1588	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
1589#endif
1590	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1591	and	r3,r3,r12			@ (b^c)&=(a^b)
1592	add	r5,r5,r9			@ d+=h
1593	eor	r3,r3,r11			@ Maj(a,b,c)
1594	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1595	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1596	@ ldr	r2,[sp,#12*4]		@ 27
1597	@ ldr	r1,[sp,#9*4]
1598	mov	r0,r2,ror#7
1599	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1600	mov	r3,r1,ror#17
1601	eor	r0,r0,r2,ror#18
1602	eor	r3,r3,r1,ror#19
1603	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1604	ldr	r2,[sp,#11*4]
1605	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1606	ldr	r1,[sp,#4*4]
1607
1608	add	r3,r3,r0
1609	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1610	add	r2,r2,r3
1611	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1612	add	r2,r2,r1			@ X[i]
1613	ldr	r3,[r14],#4			@ *K256++
1614	add	r8,r8,r2			@ h+=X[i]
1615	str	r2,[sp,#11*4]
1616	eor	r2,r6,r7
1617	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1618	and	r2,r2,r5
1619	add	r8,r8,r3			@ h+=K256[i]
1620	eor	r2,r2,r7			@ Ch(e,f,g)
1621	eor	r0,r9,r9,ror#11
1622	add	r8,r8,r2			@ h+=Ch(e,f,g)
1623#if 27==31
1624	and	r3,r3,#0xff
1625	cmp	r3,#0xf2			@ done?
1626#endif
1627#if 27<15
1628# if __ARM_ARCH__>=7
1629	ldr	r2,[r1],#4			@ prefetch
1630# else
1631	ldrb	r2,[r1,#3]
1632# endif
1633	eor	r3,r9,r10			@ a^b, b^c in next round
1634#else
1635	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
1636	eor	r3,r9,r10			@ a^b, b^c in next round
1637	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
1638#endif
1639	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1640	and	r12,r12,r3			@ (b^c)&=(a^b)
1641	add	r4,r4,r8			@ d+=h
1642	eor	r12,r12,r10			@ Maj(a,b,c)
1643	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1644	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1645	@ ldr	r2,[sp,#13*4]		@ 28
1646	@ ldr	r1,[sp,#10*4]
1647	mov	r0,r2,ror#7
1648	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1649	mov	r12,r1,ror#17
1650	eor	r0,r0,r2,ror#18
1651	eor	r12,r12,r1,ror#19
1652	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1653	ldr	r2,[sp,#12*4]
1654	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1655	ldr	r1,[sp,#5*4]
1656
1657	add	r12,r12,r0
1658	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1659	add	r2,r2,r12
1660	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1661	add	r2,r2,r1			@ X[i]
1662	ldr	r12,[r14],#4			@ *K256++
1663	add	r7,r7,r2			@ h+=X[i]
1664	str	r2,[sp,#12*4]
1665	eor	r2,r5,r6
1666	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1667	and	r2,r2,r4
1668	add	r7,r7,r12			@ h+=K256[i]
1669	eor	r2,r2,r6			@ Ch(e,f,g)
1670	eor	r0,r8,r8,ror#11
1671	add	r7,r7,r2			@ h+=Ch(e,f,g)
1672#if 28==31
1673	and	r12,r12,#0xff
1674	cmp	r12,#0xf2			@ done?
1675#endif
1676#if 28<15
1677# if __ARM_ARCH__>=7
1678	ldr	r2,[r1],#4			@ prefetch
1679# else
1680	ldrb	r2,[r1,#3]
1681# endif
1682	eor	r12,r8,r9			@ a^b, b^c in next round
1683#else
1684	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
1685	eor	r12,r8,r9			@ a^b, b^c in next round
1686	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
1687#endif
1688	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1689	and	r3,r3,r12			@ (b^c)&=(a^b)
1690	add	r11,r11,r7			@ d+=h
1691	eor	r3,r3,r9			@ Maj(a,b,c)
1692	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1693	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1694	@ ldr	r2,[sp,#14*4]		@ 29
1695	@ ldr	r1,[sp,#11*4]
1696	mov	r0,r2,ror#7
1697	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1698	mov	r3,r1,ror#17
1699	eor	r0,r0,r2,ror#18
1700	eor	r3,r3,r1,ror#19
1701	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1702	ldr	r2,[sp,#13*4]
1703	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1704	ldr	r1,[sp,#6*4]
1705
1706	add	r3,r3,r0
1707	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1708	add	r2,r2,r3
1709	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1710	add	r2,r2,r1			@ X[i]
1711	ldr	r3,[r14],#4			@ *K256++
1712	add	r6,r6,r2			@ h+=X[i]
1713	str	r2,[sp,#13*4]
1714	eor	r2,r4,r5
1715	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1716	and	r2,r2,r11
1717	add	r6,r6,r3			@ h+=K256[i]
1718	eor	r2,r2,r5			@ Ch(e,f,g)
1719	eor	r0,r7,r7,ror#11
1720	add	r6,r6,r2			@ h+=Ch(e,f,g)
1721#if 29==31
1722	and	r3,r3,#0xff
1723	cmp	r3,#0xf2			@ done?
1724#endif
1725#if 29<15
1726# if __ARM_ARCH__>=7
1727	ldr	r2,[r1],#4			@ prefetch
1728# else
1729	ldrb	r2,[r1,#3]
1730# endif
1731	eor	r3,r7,r8			@ a^b, b^c in next round
1732#else
1733	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
1734	eor	r3,r7,r8			@ a^b, b^c in next round
1735	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
1736#endif
1737	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1738	and	r12,r12,r3			@ (b^c)&=(a^b)
1739	add	r10,r10,r6			@ d+=h
1740	eor	r12,r12,r8			@ Maj(a,b,c)
1741	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1742	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1743	@ ldr	r2,[sp,#15*4]		@ 30
1744	@ ldr	r1,[sp,#12*4]
1745	mov	r0,r2,ror#7
1746	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1747	mov	r12,r1,ror#17
1748	eor	r0,r0,r2,ror#18
1749	eor	r12,r12,r1,ror#19
1750	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1751	ldr	r2,[sp,#14*4]
1752	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1753	ldr	r1,[sp,#7*4]
1754
1755	add	r12,r12,r0
1756	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1757	add	r2,r2,r12
1758	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1759	add	r2,r2,r1			@ X[i]
1760	ldr	r12,[r14],#4			@ *K256++
1761	add	r5,r5,r2			@ h+=X[i]
1762	str	r2,[sp,#14*4]
1763	eor	r2,r11,r4
1764	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1765	and	r2,r2,r10
1766	add	r5,r5,r12			@ h+=K256[i]
1767	eor	r2,r2,r4			@ Ch(e,f,g)
1768	eor	r0,r6,r6,ror#11
1769	add	r5,r5,r2			@ h+=Ch(e,f,g)
1770#if 30==31
1771	and	r12,r12,#0xff
1772	cmp	r12,#0xf2			@ done?
1773#endif
1774#if 30<15
1775# if __ARM_ARCH__>=7
1776	ldr	r2,[r1],#4			@ prefetch
1777# else
1778	ldrb	r2,[r1,#3]
1779# endif
1780	eor	r12,r6,r7			@ a^b, b^c in next round
1781#else
1782	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1783	eor	r12,r6,r7			@ a^b, b^c in next round
1784	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1785#endif
1786	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1787	and	r3,r3,r12			@ (b^c)&=(a^b)
1788	add	r9,r9,r5			@ d+=h
1789	eor	r3,r3,r7			@ Maj(a,b,c)
1790	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1791	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1792	@ ldr	r2,[sp,#0*4]		@ 31
1793	@ ldr	r1,[sp,#13*4]
1794	mov	r0,r2,ror#7
1795	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1796	mov	r3,r1,ror#17
1797	eor	r0,r0,r2,ror#18
1798	eor	r3,r3,r1,ror#19
1799	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1800	ldr	r2,[sp,#15*4]
1801	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1802	ldr	r1,[sp,#8*4]
1803
1804	add	r3,r3,r0
1805	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1806	add	r2,r2,r3
1807	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1808	add	r2,r2,r1			@ X[i]
1809	ldr	r3,[r14],#4			@ *K256++
1810	add	r4,r4,r2			@ h+=X[i]
1811	str	r2,[sp,#15*4]
1812	eor	r2,r10,r11
1813	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1814	and	r2,r2,r9
1815	add	r4,r4,r3			@ h+=K256[i]
1816	eor	r2,r2,r11			@ Ch(e,f,g)
1817	eor	r0,r5,r5,ror#11
1818	add	r4,r4,r2			@ h+=Ch(e,f,g)
1819#if 31==31
1820	and	r3,r3,#0xff
1821	cmp	r3,#0xf2			@ done?
1822#endif
1823#if 31<15
1824# if __ARM_ARCH__>=7
1825	ldr	r2,[r1],#4			@ prefetch
1826# else
1827	ldrb	r2,[r1,#3]
1828# endif
1829	eor	r3,r5,r6			@ a^b, b^c in next round
1830#else
1831	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1832	eor	r3,r5,r6			@ a^b, b^c in next round
1833	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1834#endif
1835	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1836	and	r12,r12,r3			@ (b^c)&=(a^b)
1837	add	r8,r8,r4			@ d+=h
1838	eor	r12,r12,r6			@ Maj(a,b,c)
1839	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1840	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1841#if __ARM_ARCH__>=7
1842	ite	eq			@ Thumb2 thing, sanity check in ARM
1843#endif
1844	ldreq	r3,[sp,#16*4]		@ pull ctx
1845	bne	.Lrounds_16_xx
1846
1847	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
1848	ldr	r0,[r3,#0]
1849	ldr	r2,[r3,#4]
1850	ldr	r12,[r3,#8]
1851	add	r4,r4,r0
1852	ldr	r0,[r3,#12]
1853	add	r5,r5,r2
1854	ldr	r2,[r3,#16]
1855	add	r6,r6,r12
1856	ldr	r12,[r3,#20]
1857	add	r7,r7,r0
1858	ldr	r0,[r3,#24]
1859	add	r8,r8,r2
1860	ldr	r2,[r3,#28]
1861	add	r9,r9,r12
1862	ldr	r1,[sp,#17*4]		@ pull inp
1863	ldr	r12,[sp,#18*4]		@ pull inp+len
1864	add	r10,r10,r0
1865	add	r11,r11,r2
1866	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1867	cmp	r1,r12
1868	sub	r14,r14,#256	@ rewind Ktbl
1869	bne	.Loop
1870
1871	add	sp,sp,#19*4	@ destroy frame
1872#if __ARM_ARCH__>=5
1873	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
1874#else
1875	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
1876	tst	lr,#1
1877	moveq	pc,lr			@ be binary compatible with V4, yet
1878.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
1879#endif
1880.size	sha256_block_data_order,.-sha256_block_data_order
1881#if __ARM_MAX_ARCH__>=7
1882.arch	armv7-a
1883.fpu	neon
1884
1885.globl	sha256_block_data_order_neon
1886.hidden	sha256_block_data_order_neon
1887.type	sha256_block_data_order_neon,%function
1888.align	5
1889.skip	16
1890sha256_block_data_order_neon:
1891.LNEON:
1892	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
1893
1894	sub	r11,sp,#16*4+16
1895	adr	r14,K256
1896	bic	r11,r11,#15		@ align for 128-bit stores
1897	mov	r12,sp
1898	mov	sp,r11			@ alloca
1899	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
1900
1901	vld1.8	{q0},[r1]!
1902	vld1.8	{q1},[r1]!
1903	vld1.8	{q2},[r1]!
1904	vld1.8	{q3},[r1]!
1905	vld1.32	{q8},[r14,:128]!
1906	vld1.32	{q9},[r14,:128]!
1907	vld1.32	{q10},[r14,:128]!
1908	vld1.32	{q11},[r14,:128]!
1909	vrev32.8	q0,q0		@ yes, even on
1910	str	r0,[sp,#64]
1911	vrev32.8	q1,q1		@ big-endian
1912	str	r1,[sp,#68]
1913	mov	r1,sp
1914	vrev32.8	q2,q2
1915	str	r2,[sp,#72]
1916	vrev32.8	q3,q3
1917	str	r12,[sp,#76]		@ save original sp
1918	vadd.i32	q8,q8,q0
1919	vadd.i32	q9,q9,q1
1920	vst1.32	{q8},[r1,:128]!
1921	vadd.i32	q10,q10,q2
1922	vst1.32	{q9},[r1,:128]!
1923	vadd.i32	q11,q11,q3
1924	vst1.32	{q10},[r1,:128]!
1925	vst1.32	{q11},[r1,:128]!
1926
1927	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
1928	sub	r1,r1,#64
1929	ldr	r2,[sp,#0]
1930	eor	r12,r12,r12
1931	eor	r3,r5,r6
1932	b	.L_00_48
1933
1934.align	4
1935.L_00_48:
1936	vext.8	q8,q0,q1,#4
1937	add	r11,r11,r2
1938	eor	r2,r9,r10
1939	eor	r0,r8,r8,ror#5
1940	vext.8	q9,q2,q3,#4
1941	add	r4,r4,r12
1942	and	r2,r2,r8
1943	eor	r12,r0,r8,ror#19
1944	vshr.u32	q10,q8,#7
1945	eor	r0,r4,r4,ror#11
1946	eor	r2,r2,r10
1947	vadd.i32	q0,q0,q9
1948	add	r11,r11,r12,ror#6
1949	eor	r12,r4,r5
1950	vshr.u32	q9,q8,#3
1951	eor	r0,r0,r4,ror#20
1952	add	r11,r11,r2
1953	vsli.32	q10,q8,#25
1954	ldr	r2,[sp,#4]
1955	and	r3,r3,r12
1956	vshr.u32	q11,q8,#18
1957	add	r7,r7,r11
1958	add	r11,r11,r0,ror#2
1959	eor	r3,r3,r5
1960	veor	q9,q9,q10
1961	add	r10,r10,r2
1962	vsli.32	q11,q8,#14
1963	eor	r2,r8,r9
1964	eor	r0,r7,r7,ror#5
1965	vshr.u32	d24,d7,#17
1966	add	r11,r11,r3
1967	and	r2,r2,r7
1968	veor	q9,q9,q11
1969	eor	r3,r0,r7,ror#19
1970	eor	r0,r11,r11,ror#11
1971	vsli.32	d24,d7,#15
1972	eor	r2,r2,r9
1973	add	r10,r10,r3,ror#6
1974	vshr.u32	d25,d7,#10
1975	eor	r3,r11,r4
1976	eor	r0,r0,r11,ror#20
1977	vadd.i32	q0,q0,q9
1978	add	r10,r10,r2
1979	ldr	r2,[sp,#8]
1980	veor	d25,d25,d24
1981	and	r12,r12,r3
1982	add	r6,r6,r10
1983	vshr.u32	d24,d7,#19
1984	add	r10,r10,r0,ror#2
1985	eor	r12,r12,r4
1986	vsli.32	d24,d7,#13
1987	add	r9,r9,r2
1988	eor	r2,r7,r8
1989	veor	d25,d25,d24
1990	eor	r0,r6,r6,ror#5
1991	add	r10,r10,r12
1992	vadd.i32	d0,d0,d25
1993	and	r2,r2,r6
1994	eor	r12,r0,r6,ror#19
1995	vshr.u32	d24,d0,#17
1996	eor	r0,r10,r10,ror#11
1997	eor	r2,r2,r8
1998	vsli.32	d24,d0,#15
1999	add	r9,r9,r12,ror#6
2000	eor	r12,r10,r11
2001	vshr.u32	d25,d0,#10
2002	eor	r0,r0,r10,ror#20
2003	add	r9,r9,r2
2004	veor	d25,d25,d24
2005	ldr	r2,[sp,#12]
2006	and	r3,r3,r12
2007	vshr.u32	d24,d0,#19
2008	add	r5,r5,r9
2009	add	r9,r9,r0,ror#2
2010	eor	r3,r3,r11
2011	vld1.32	{q8},[r14,:128]!
2012	add	r8,r8,r2
2013	vsli.32	d24,d0,#13
2014	eor	r2,r6,r7
2015	eor	r0,r5,r5,ror#5
2016	veor	d25,d25,d24
2017	add	r9,r9,r3
2018	and	r2,r2,r5
2019	vadd.i32	d1,d1,d25
2020	eor	r3,r0,r5,ror#19
2021	eor	r0,r9,r9,ror#11
2022	vadd.i32	q8,q8,q0
2023	eor	r2,r2,r7
2024	add	r8,r8,r3,ror#6
2025	eor	r3,r9,r10
2026	eor	r0,r0,r9,ror#20
2027	add	r8,r8,r2
2028	ldr	r2,[sp,#16]
2029	and	r12,r12,r3
2030	add	r4,r4,r8
2031	vst1.32	{q8},[r1,:128]!
2032	add	r8,r8,r0,ror#2
2033	eor	r12,r12,r10
2034	vext.8	q8,q1,q2,#4
2035	add	r7,r7,r2
2036	eor	r2,r5,r6
2037	eor	r0,r4,r4,ror#5
2038	vext.8	q9,q3,q0,#4
2039	add	r8,r8,r12
2040	and	r2,r2,r4
2041	eor	r12,r0,r4,ror#19
2042	vshr.u32	q10,q8,#7
2043	eor	r0,r8,r8,ror#11
2044	eor	r2,r2,r6
2045	vadd.i32	q1,q1,q9
2046	add	r7,r7,r12,ror#6
2047	eor	r12,r8,r9
2048	vshr.u32	q9,q8,#3
2049	eor	r0,r0,r8,ror#20
2050	add	r7,r7,r2
2051	vsli.32	q10,q8,#25
2052	ldr	r2,[sp,#20]
2053	and	r3,r3,r12
2054	vshr.u32	q11,q8,#18
2055	add	r11,r11,r7
2056	add	r7,r7,r0,ror#2
2057	eor	r3,r3,r9
2058	veor	q9,q9,q10
2059	add	r6,r6,r2
2060	vsli.32	q11,q8,#14
2061	eor	r2,r4,r5
2062	eor	r0,r11,r11,ror#5
2063	vshr.u32	d24,d1,#17
2064	add	r7,r7,r3
2065	and	r2,r2,r11
2066	veor	q9,q9,q11
2067	eor	r3,r0,r11,ror#19
2068	eor	r0,r7,r7,ror#11
2069	vsli.32	d24,d1,#15
2070	eor	r2,r2,r5
2071	add	r6,r6,r3,ror#6
2072	vshr.u32	d25,d1,#10
2073	eor	r3,r7,r8
2074	eor	r0,r0,r7,ror#20
2075	vadd.i32	q1,q1,q9
2076	add	r6,r6,r2
2077	ldr	r2,[sp,#24]
2078	veor	d25,d25,d24
2079	and	r12,r12,r3
2080	add	r10,r10,r6
2081	vshr.u32	d24,d1,#19
2082	add	r6,r6,r0,ror#2
2083	eor	r12,r12,r8
2084	vsli.32	d24,d1,#13
2085	add	r5,r5,r2
2086	eor	r2,r11,r4
2087	veor	d25,d25,d24
2088	eor	r0,r10,r10,ror#5
2089	add	r6,r6,r12
2090	vadd.i32	d2,d2,d25
2091	and	r2,r2,r10
2092	eor	r12,r0,r10,ror#19
2093	vshr.u32	d24,d2,#17
2094	eor	r0,r6,r6,ror#11
2095	eor	r2,r2,r4
2096	vsli.32	d24,d2,#15
2097	add	r5,r5,r12,ror#6
2098	eor	r12,r6,r7
2099	vshr.u32	d25,d2,#10
2100	eor	r0,r0,r6,ror#20
2101	add	r5,r5,r2
2102	veor	d25,d25,d24
2103	ldr	r2,[sp,#28]
2104	and	r3,r3,r12
2105	vshr.u32	d24,d2,#19
2106	add	r9,r9,r5
2107	add	r5,r5,r0,ror#2
2108	eor	r3,r3,r7
2109	vld1.32	{q8},[r14,:128]!
2110	add	r4,r4,r2
2111	vsli.32	d24,d2,#13
2112	eor	r2,r10,r11
2113	eor	r0,r9,r9,ror#5
2114	veor	d25,d25,d24
2115	add	r5,r5,r3
2116	and	r2,r2,r9
2117	vadd.i32	d3,d3,d25
2118	eor	r3,r0,r9,ror#19
2119	eor	r0,r5,r5,ror#11
2120	vadd.i32	q8,q8,q1
2121	eor	r2,r2,r11
2122	add	r4,r4,r3,ror#6
2123	eor	r3,r5,r6
2124	eor	r0,r0,r5,ror#20
2125	add	r4,r4,r2
2126	ldr	r2,[sp,#32]
2127	and	r12,r12,r3
2128	add	r8,r8,r4
2129	vst1.32	{q8},[r1,:128]!
2130	add	r4,r4,r0,ror#2
2131	eor	r12,r12,r6
2132	vext.8	q8,q2,q3,#4
2133	add	r11,r11,r2
2134	eor	r2,r9,r10
2135	eor	r0,r8,r8,ror#5
2136	vext.8	q9,q0,q1,#4
2137	add	r4,r4,r12
2138	and	r2,r2,r8
2139	eor	r12,r0,r8,ror#19
2140	vshr.u32	q10,q8,#7
2141	eor	r0,r4,r4,ror#11
2142	eor	r2,r2,r10
2143	vadd.i32	q2,q2,q9
2144	add	r11,r11,r12,ror#6
2145	eor	r12,r4,r5
2146	vshr.u32	q9,q8,#3
2147	eor	r0,r0,r4,ror#20
2148	add	r11,r11,r2
2149	vsli.32	q10,q8,#25
2150	ldr	r2,[sp,#36]
2151	and	r3,r3,r12
2152	vshr.u32	q11,q8,#18
2153	add	r7,r7,r11
2154	add	r11,r11,r0,ror#2
2155	eor	r3,r3,r5
2156	veor	q9,q9,q10
2157	add	r10,r10,r2
2158	vsli.32	q11,q8,#14
2159	eor	r2,r8,r9
2160	eor	r0,r7,r7,ror#5
2161	vshr.u32	d24,d3,#17
2162	add	r11,r11,r3
2163	and	r2,r2,r7
2164	veor	q9,q9,q11
2165	eor	r3,r0,r7,ror#19
2166	eor	r0,r11,r11,ror#11
2167	vsli.32	d24,d3,#15
2168	eor	r2,r2,r9
2169	add	r10,r10,r3,ror#6
2170	vshr.u32	d25,d3,#10
2171	eor	r3,r11,r4
2172	eor	r0,r0,r11,ror#20
2173	vadd.i32	q2,q2,q9
2174	add	r10,r10,r2
2175	ldr	r2,[sp,#40]
2176	veor	d25,d25,d24
2177	and	r12,r12,r3
2178	add	r6,r6,r10
2179	vshr.u32	d24,d3,#19
2180	add	r10,r10,r0,ror#2
2181	eor	r12,r12,r4
2182	vsli.32	d24,d3,#13
2183	add	r9,r9,r2
2184	eor	r2,r7,r8
2185	veor	d25,d25,d24
2186	eor	r0,r6,r6,ror#5
2187	add	r10,r10,r12
2188	vadd.i32	d4,d4,d25
2189	and	r2,r2,r6
2190	eor	r12,r0,r6,ror#19
2191	vshr.u32	d24,d4,#17
2192	eor	r0,r10,r10,ror#11
2193	eor	r2,r2,r8
2194	vsli.32	d24,d4,#15
2195	add	r9,r9,r12,ror#6
2196	eor	r12,r10,r11
2197	vshr.u32	d25,d4,#10
2198	eor	r0,r0,r10,ror#20
2199	add	r9,r9,r2
2200	veor	d25,d25,d24
2201	ldr	r2,[sp,#44]
2202	and	r3,r3,r12
2203	vshr.u32	d24,d4,#19
2204	add	r5,r5,r9
2205	add	r9,r9,r0,ror#2
2206	eor	r3,r3,r11
2207	vld1.32	{q8},[r14,:128]!
2208	add	r8,r8,r2
2209	vsli.32	d24,d4,#13
2210	eor	r2,r6,r7
2211	eor	r0,r5,r5,ror#5
2212	veor	d25,d25,d24
2213	add	r9,r9,r3
2214	and	r2,r2,r5
2215	vadd.i32	d5,d5,d25
2216	eor	r3,r0,r5,ror#19
2217	eor	r0,r9,r9,ror#11
2218	vadd.i32	q8,q8,q2
2219	eor	r2,r2,r7
2220	add	r8,r8,r3,ror#6
2221	eor	r3,r9,r10
2222	eor	r0,r0,r9,ror#20
2223	add	r8,r8,r2
2224	ldr	r2,[sp,#48]
2225	and	r12,r12,r3
2226	add	r4,r4,r8
2227	vst1.32	{q8},[r1,:128]!
2228	add	r8,r8,r0,ror#2
2229	eor	r12,r12,r10
2230	vext.8	q8,q3,q0,#4
2231	add	r7,r7,r2
2232	eor	r2,r5,r6
2233	eor	r0,r4,r4,ror#5
2234	vext.8	q9,q1,q2,#4
2235	add	r8,r8,r12
2236	and	r2,r2,r4
2237	eor	r12,r0,r4,ror#19
2238	vshr.u32	q10,q8,#7
2239	eor	r0,r8,r8,ror#11
2240	eor	r2,r2,r6
2241	vadd.i32	q3,q3,q9
2242	add	r7,r7,r12,ror#6
2243	eor	r12,r8,r9
2244	vshr.u32	q9,q8,#3
2245	eor	r0,r0,r8,ror#20
2246	add	r7,r7,r2
2247	vsli.32	q10,q8,#25
2248	ldr	r2,[sp,#52]
2249	and	r3,r3,r12
2250	vshr.u32	q11,q8,#18
2251	add	r11,r11,r7
2252	add	r7,r7,r0,ror#2
2253	eor	r3,r3,r9
2254	veor	q9,q9,q10
2255	add	r6,r6,r2
2256	vsli.32	q11,q8,#14
2257	eor	r2,r4,r5
2258	eor	r0,r11,r11,ror#5
2259	vshr.u32	d24,d5,#17
2260	add	r7,r7,r3
2261	and	r2,r2,r11
2262	veor	q9,q9,q11
2263	eor	r3,r0,r11,ror#19
2264	eor	r0,r7,r7,ror#11
2265	vsli.32	d24,d5,#15
2266	eor	r2,r2,r5
2267	add	r6,r6,r3,ror#6
2268	vshr.u32	d25,d5,#10
2269	eor	r3,r7,r8
2270	eor	r0,r0,r7,ror#20
2271	vadd.i32	q3,q3,q9
2272	add	r6,r6,r2
2273	ldr	r2,[sp,#56]
2274	veor	d25,d25,d24
2275	and	r12,r12,r3
2276	add	r10,r10,r6
2277	vshr.u32	d24,d5,#19
2278	add	r6,r6,r0,ror#2
2279	eor	r12,r12,r8
2280	vsli.32	d24,d5,#13
2281	add	r5,r5,r2
2282	eor	r2,r11,r4
2283	veor	d25,d25,d24
2284	eor	r0,r10,r10,ror#5
2285	add	r6,r6,r12
2286	vadd.i32	d6,d6,d25
2287	and	r2,r2,r10
2288	eor	r12,r0,r10,ror#19
2289	vshr.u32	d24,d6,#17
2290	eor	r0,r6,r6,ror#11
2291	eor	r2,r2,r4
2292	vsli.32	d24,d6,#15
2293	add	r5,r5,r12,ror#6
2294	eor	r12,r6,r7
2295	vshr.u32	d25,d6,#10
2296	eor	r0,r0,r6,ror#20
2297	add	r5,r5,r2
2298	veor	d25,d25,d24
2299	ldr	r2,[sp,#60]
2300	and	r3,r3,r12
2301	vshr.u32	d24,d6,#19
2302	add	r9,r9,r5
2303	add	r5,r5,r0,ror#2
2304	eor	r3,r3,r7
2305	vld1.32	{q8},[r14,:128]!
2306	add	r4,r4,r2
2307	vsli.32	d24,d6,#13
2308	eor	r2,r10,r11
2309	eor	r0,r9,r9,ror#5
2310	veor	d25,d25,d24
2311	add	r5,r5,r3
2312	and	r2,r2,r9
2313	vadd.i32	d7,d7,d25
2314	eor	r3,r0,r9,ror#19
2315	eor	r0,r5,r5,ror#11
2316	vadd.i32	q8,q8,q3
2317	eor	r2,r2,r11
2318	add	r4,r4,r3,ror#6
2319	eor	r3,r5,r6
2320	eor	r0,r0,r5,ror#20
2321	add	r4,r4,r2
2322	ldr	r2,[r14]
2323	and	r12,r12,r3
2324	add	r8,r8,r4
2325	vst1.32	{q8},[r1,:128]!
2326	add	r4,r4,r0,ror#2
2327	eor	r12,r12,r6
2328	teq	r2,#0				@ check for K256 terminator
2329	ldr	r2,[sp,#0]
2330	sub	r1,r1,#64
2331	bne	.L_00_48
2332
2333	ldr	r1,[sp,#68]
2334	ldr	r0,[sp,#72]
2335	sub	r14,r14,#256	@ rewind r14
2336	teq	r1,r0
2337	it	eq
2338	subeq	r1,r1,#64		@ avoid SEGV
2339	vld1.8	{q0},[r1]!		@ load next input block
2340	vld1.8	{q1},[r1]!
2341	vld1.8	{q2},[r1]!
2342	vld1.8	{q3},[r1]!
2343	it	ne
2344	strne	r1,[sp,#68]
2345	mov	r1,sp
2346	add	r11,r11,r2
2347	eor	r2,r9,r10
2348	eor	r0,r8,r8,ror#5
2349	add	r4,r4,r12
2350	vld1.32	{q8},[r14,:128]!
2351	and	r2,r2,r8
2352	eor	r12,r0,r8,ror#19
2353	eor	r0,r4,r4,ror#11
2354	eor	r2,r2,r10
2355	vrev32.8	q0,q0
2356	add	r11,r11,r12,ror#6
2357	eor	r12,r4,r5
2358	eor	r0,r0,r4,ror#20
2359	add	r11,r11,r2
2360	vadd.i32	q8,q8,q0
2361	ldr	r2,[sp,#4]
2362	and	r3,r3,r12
2363	add	r7,r7,r11
2364	add	r11,r11,r0,ror#2
2365	eor	r3,r3,r5
2366	add	r10,r10,r2
2367	eor	r2,r8,r9
2368	eor	r0,r7,r7,ror#5
2369	add	r11,r11,r3
2370	and	r2,r2,r7
2371	eor	r3,r0,r7,ror#19
2372	eor	r0,r11,r11,ror#11
2373	eor	r2,r2,r9
2374	add	r10,r10,r3,ror#6
2375	eor	r3,r11,r4
2376	eor	r0,r0,r11,ror#20
2377	add	r10,r10,r2
2378	ldr	r2,[sp,#8]
2379	and	r12,r12,r3
2380	add	r6,r6,r10
2381	add	r10,r10,r0,ror#2
2382	eor	r12,r12,r4
2383	add	r9,r9,r2
2384	eor	r2,r7,r8
2385	eor	r0,r6,r6,ror#5
2386	add	r10,r10,r12
2387	and	r2,r2,r6
2388	eor	r12,r0,r6,ror#19
2389	eor	r0,r10,r10,ror#11
2390	eor	r2,r2,r8
2391	add	r9,r9,r12,ror#6
2392	eor	r12,r10,r11
2393	eor	r0,r0,r10,ror#20
2394	add	r9,r9,r2
2395	ldr	r2,[sp,#12]
2396	and	r3,r3,r12
2397	add	r5,r5,r9
2398	add	r9,r9,r0,ror#2
2399	eor	r3,r3,r11
2400	add	r8,r8,r2
2401	eor	r2,r6,r7
2402	eor	r0,r5,r5,ror#5
2403	add	r9,r9,r3
2404	and	r2,r2,r5
2405	eor	r3,r0,r5,ror#19
2406	eor	r0,r9,r9,ror#11
2407	eor	r2,r2,r7
2408	add	r8,r8,r3,ror#6
2409	eor	r3,r9,r10
2410	eor	r0,r0,r9,ror#20
2411	add	r8,r8,r2
2412	ldr	r2,[sp,#16]
2413	and	r12,r12,r3
2414	add	r4,r4,r8
2415	add	r8,r8,r0,ror#2
2416	eor	r12,r12,r10
2417	vst1.32	{q8},[r1,:128]!
2418	add	r7,r7,r2
2419	eor	r2,r5,r6
2420	eor	r0,r4,r4,ror#5
2421	add	r8,r8,r12
2422	vld1.32	{q8},[r14,:128]!
2423	and	r2,r2,r4
2424	eor	r12,r0,r4,ror#19
2425	eor	r0,r8,r8,ror#11
2426	eor	r2,r2,r6
2427	vrev32.8	q1,q1
2428	add	r7,r7,r12,ror#6
2429	eor	r12,r8,r9
2430	eor	r0,r0,r8,ror#20
2431	add	r7,r7,r2
2432	vadd.i32	q8,q8,q1
2433	ldr	r2,[sp,#20]
2434	and	r3,r3,r12
2435	add	r11,r11,r7
2436	add	r7,r7,r0,ror#2
2437	eor	r3,r3,r9
2438	add	r6,r6,r2
2439	eor	r2,r4,r5
2440	eor	r0,r11,r11,ror#5
2441	add	r7,r7,r3
2442	and	r2,r2,r11
2443	eor	r3,r0,r11,ror#19
2444	eor	r0,r7,r7,ror#11
2445	eor	r2,r2,r5
2446	add	r6,r6,r3,ror#6
2447	eor	r3,r7,r8
2448	eor	r0,r0,r7,ror#20
2449	add	r6,r6,r2
2450	ldr	r2,[sp,#24]
2451	and	r12,r12,r3
2452	add	r10,r10,r6
2453	add	r6,r6,r0,ror#2
2454	eor	r12,r12,r8
2455	add	r5,r5,r2
2456	eor	r2,r11,r4
2457	eor	r0,r10,r10,ror#5
2458	add	r6,r6,r12
2459	and	r2,r2,r10
2460	eor	r12,r0,r10,ror#19
2461	eor	r0,r6,r6,ror#11
2462	eor	r2,r2,r4
2463	add	r5,r5,r12,ror#6
2464	eor	r12,r6,r7
2465	eor	r0,r0,r6,ror#20
2466	add	r5,r5,r2
2467	ldr	r2,[sp,#28]
2468	and	r3,r3,r12
2469	add	r9,r9,r5
2470	add	r5,r5,r0,ror#2
2471	eor	r3,r3,r7
2472	add	r4,r4,r2
2473	eor	r2,r10,r11
2474	eor	r0,r9,r9,ror#5
2475	add	r5,r5,r3
2476	and	r2,r2,r9
2477	eor	r3,r0,r9,ror#19
2478	eor	r0,r5,r5,ror#11
2479	eor	r2,r2,r11
2480	add	r4,r4,r3,ror#6
2481	eor	r3,r5,r6
2482	eor	r0,r0,r5,ror#20
2483	add	r4,r4,r2
2484	ldr	r2,[sp,#32]
2485	and	r12,r12,r3
2486	add	r8,r8,r4
2487	add	r4,r4,r0,ror#2
2488	eor	r12,r12,r6
2489	vst1.32	{q8},[r1,:128]!
2490	add	r11,r11,r2
2491	eor	r2,r9,r10
2492	eor	r0,r8,r8,ror#5
2493	add	r4,r4,r12
2494	vld1.32	{q8},[r14,:128]!
2495	and	r2,r2,r8
2496	eor	r12,r0,r8,ror#19
2497	eor	r0,r4,r4,ror#11
2498	eor	r2,r2,r10
2499	vrev32.8	q2,q2
2500	add	r11,r11,r12,ror#6
2501	eor	r12,r4,r5
2502	eor	r0,r0,r4,ror#20
2503	add	r11,r11,r2
2504	vadd.i32	q8,q8,q2
2505	ldr	r2,[sp,#36]
2506	and	r3,r3,r12
2507	add	r7,r7,r11
2508	add	r11,r11,r0,ror#2
2509	eor	r3,r3,r5
2510	add	r10,r10,r2
2511	eor	r2,r8,r9
2512	eor	r0,r7,r7,ror#5
2513	add	r11,r11,r3
2514	and	r2,r2,r7
2515	eor	r3,r0,r7,ror#19
2516	eor	r0,r11,r11,ror#11
2517	eor	r2,r2,r9
2518	add	r10,r10,r3,ror#6
2519	eor	r3,r11,r4
2520	eor	r0,r0,r11,ror#20
2521	add	r10,r10,r2
2522	ldr	r2,[sp,#40]
2523	and	r12,r12,r3
2524	add	r6,r6,r10
2525	add	r10,r10,r0,ror#2
2526	eor	r12,r12,r4
2527	add	r9,r9,r2
2528	eor	r2,r7,r8
2529	eor	r0,r6,r6,ror#5
2530	add	r10,r10,r12
2531	and	r2,r2,r6
2532	eor	r12,r0,r6,ror#19
2533	eor	r0,r10,r10,ror#11
2534	eor	r2,r2,r8
2535	add	r9,r9,r12,ror#6
2536	eor	r12,r10,r11
2537	eor	r0,r0,r10,ror#20
2538	add	r9,r9,r2
2539	ldr	r2,[sp,#44]
2540	and	r3,r3,r12
2541	add	r5,r5,r9
2542	add	r9,r9,r0,ror#2
2543	eor	r3,r3,r11
2544	add	r8,r8,r2
2545	eor	r2,r6,r7
2546	eor	r0,r5,r5,ror#5
2547	add	r9,r9,r3
2548	and	r2,r2,r5
2549	eor	r3,r0,r5,ror#19
2550	eor	r0,r9,r9,ror#11
2551	eor	r2,r2,r7
2552	add	r8,r8,r3,ror#6
2553	eor	r3,r9,r10
2554	eor	r0,r0,r9,ror#20
2555	add	r8,r8,r2
2556	ldr	r2,[sp,#48]
2557	and	r12,r12,r3
2558	add	r4,r4,r8
2559	add	r8,r8,r0,ror#2
2560	eor	r12,r12,r10
2561	vst1.32	{q8},[r1,:128]!
2562	add	r7,r7,r2
2563	eor	r2,r5,r6
2564	eor	r0,r4,r4,ror#5
2565	add	r8,r8,r12
2566	vld1.32	{q8},[r14,:128]!
2567	and	r2,r2,r4
2568	eor	r12,r0,r4,ror#19
2569	eor	r0,r8,r8,ror#11
2570	eor	r2,r2,r6
2571	vrev32.8	q3,q3
2572	add	r7,r7,r12,ror#6
2573	eor	r12,r8,r9
2574	eor	r0,r0,r8,ror#20
2575	add	r7,r7,r2
2576	vadd.i32	q8,q8,q3
2577	ldr	r2,[sp,#52]
2578	and	r3,r3,r12
2579	add	r11,r11,r7
2580	add	r7,r7,r0,ror#2
2581	eor	r3,r3,r9
2582	add	r6,r6,r2
2583	eor	r2,r4,r5
2584	eor	r0,r11,r11,ror#5
2585	add	r7,r7,r3
2586	and	r2,r2,r11
2587	eor	r3,r0,r11,ror#19
2588	eor	r0,r7,r7,ror#11
2589	eor	r2,r2,r5
2590	add	r6,r6,r3,ror#6
2591	eor	r3,r7,r8
2592	eor	r0,r0,r7,ror#20
2593	add	r6,r6,r2
2594	ldr	r2,[sp,#56]
2595	and	r12,r12,r3
2596	add	r10,r10,r6
2597	add	r6,r6,r0,ror#2
2598	eor	r12,r12,r8
2599	add	r5,r5,r2
2600	eor	r2,r11,r4
2601	eor	r0,r10,r10,ror#5
2602	add	r6,r6,r12
2603	and	r2,r2,r10
2604	eor	r12,r0,r10,ror#19
2605	eor	r0,r6,r6,ror#11
2606	eor	r2,r2,r4
2607	add	r5,r5,r12,ror#6
2608	eor	r12,r6,r7
2609	eor	r0,r0,r6,ror#20
2610	add	r5,r5,r2
2611	ldr	r2,[sp,#60]
2612	and	r3,r3,r12
2613	add	r9,r9,r5
2614	add	r5,r5,r0,ror#2
2615	eor	r3,r3,r7
2616	add	r4,r4,r2
2617	eor	r2,r10,r11
2618	eor	r0,r9,r9,ror#5
2619	add	r5,r5,r3
2620	and	r2,r2,r9
2621	eor	r3,r0,r9,ror#19
2622	eor	r0,r5,r5,ror#11
2623	eor	r2,r2,r11
2624	add	r4,r4,r3,ror#6
2625	eor	r3,r5,r6
2626	eor	r0,r0,r5,ror#20
2627	add	r4,r4,r2
2628	ldr	r2,[sp,#64]
2629	and	r12,r12,r3
2630	add	r8,r8,r4
2631	add	r4,r4,r0,ror#2
2632	eor	r12,r12,r6
2633	vst1.32	{q8},[r1,:128]!
2634	ldr	r0,[r2,#0]
2635	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
2636	ldr	r12,[r2,#4]
2637	ldr	r3,[r2,#8]
2638	ldr	r1,[r2,#12]
2639	add	r4,r4,r0			@ accumulate
2640	ldr	r0,[r2,#16]
2641	add	r5,r5,r12
2642	ldr	r12,[r2,#20]
2643	add	r6,r6,r3
2644	ldr	r3,[r2,#24]
2645	add	r7,r7,r1
2646	ldr	r1,[r2,#28]
2647	add	r8,r8,r0
2648	str	r4,[r2],#4
2649	add	r9,r9,r12
2650	str	r5,[r2],#4
2651	add	r10,r10,r3
2652	str	r6,[r2],#4
2653	add	r11,r11,r1
2654	str	r7,[r2],#4
2655	stmia	r2,{r8,r9,r10,r11}
2656
2657	ittte	ne
2658	movne	r1,sp
2659	ldrne	r2,[sp,#0]
2660	eorne	r12,r12,r12
2661	ldreq	sp,[sp,#76]			@ restore original sp
2662	itt	ne
2663	eorne	r3,r5,r6
2664	bne	.L_00_48
2665
2666	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
2667.size	sha256_block_data_order_neon,.-sha256_block_data_order_neon
2668#endif
2669#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2670
2671# if defined(__thumb2__)
2672#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
2673# else
2674#  define INST(a,b,c,d)	.byte	a,b,c,d
2675# endif
2676
2677.type	sha256_block_data_order_armv8,%function
2678.align	5
2679sha256_block_data_order_armv8:
2680.LARMv8:
2681	vld1.32	{q0,q1},[r0]
2682	sub	r3,r3,#256+32
2683	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
2684	b	.Loop_v8
2685
2686.align	4
2687.Loop_v8:
2688	vld1.8	{q8,q9},[r1]!
2689	vld1.8	{q10,q11},[r1]!
2690	vld1.32	{q12},[r3]!
2691	vrev32.8	q8,q8
2692	vrev32.8	q9,q9
2693	vrev32.8	q10,q10
2694	vrev32.8	q11,q11
2695	vmov	q14,q0	@ offload
2696	vmov	q15,q1
2697	teq	r1,r2
2698	vld1.32	{q13},[r3]!
2699	vadd.i32	q12,q12,q8
2700	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2701	vmov	q2,q0
2702	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2703	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2704	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2705	vld1.32	{q12},[r3]!
2706	vadd.i32	q13,q13,q9
2707	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2708	vmov	q2,q0
2709	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2710	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2711	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2712	vld1.32	{q13},[r3]!
2713	vadd.i32	q12,q12,q10
2714	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2715	vmov	q2,q0
2716	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2717	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2718	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2719	vld1.32	{q12},[r3]!
2720	vadd.i32	q13,q13,q11
2721	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2722	vmov	q2,q0
2723	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2724	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2725	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2726	vld1.32	{q13},[r3]!
2727	vadd.i32	q12,q12,q8
2728	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2729	vmov	q2,q0
2730	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2731	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2732	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2733	vld1.32	{q12},[r3]!
2734	vadd.i32	q13,q13,q9
2735	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2736	vmov	q2,q0
2737	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2738	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2739	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2740	vld1.32	{q13},[r3]!
2741	vadd.i32	q12,q12,q10
2742	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2743	vmov	q2,q0
2744	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2745	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2746	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2747	vld1.32	{q12},[r3]!
2748	vadd.i32	q13,q13,q11
2749	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2750	vmov	q2,q0
2751	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2752	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2753	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2754	vld1.32	{q13},[r3]!
2755	vadd.i32	q12,q12,q8
2756	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2757	vmov	q2,q0
2758	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2759	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2760	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2761	vld1.32	{q12},[r3]!
2762	vadd.i32	q13,q13,q9
2763	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2764	vmov	q2,q0
2765	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2766	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2767	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2768	vld1.32	{q13},[r3]!
2769	vadd.i32	q12,q12,q10
2770	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2771	vmov	q2,q0
2772	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2773	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2774	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2775	vld1.32	{q12},[r3]!
2776	vadd.i32	q13,q13,q11
2777	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2778	vmov	q2,q0
2779	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2780	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2781	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2782	vld1.32	{q13},[r3]!
2783	vadd.i32	q12,q12,q8
2784	vmov	q2,q0
2785	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2786	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2787
2788	vld1.32	{q12},[r3]!
2789	vadd.i32	q13,q13,q9
2790	vmov	q2,q0
2791	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2792	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2793
2794	vld1.32	{q13},[r3]
2795	vadd.i32	q12,q12,q10
2796	sub	r3,r3,#256-16	@ rewind
2797	vmov	q2,q0
2798	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2799	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2800
2801	vadd.i32	q13,q13,q11
2802	vmov	q2,q0
2803	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2804	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2805
2806	vadd.i32	q0,q0,q14
2807	vadd.i32	q1,q1,q15
2808	it	ne
2809	bne	.Loop_v8
2810
2811	vst1.32	{q0,q1},[r0]
2812
2813	bx	lr		@ bx lr
2814.size	sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2815#endif
2816.byte	83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2817.align	2
2818.align	2
2819#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2820.comm	OPENSSL_armcap_P,4,4
2821.hidden	OPENSSL_armcap_P
2822#endif
2823#endif
2824