1#include "arm_arch.h"
2
3.text
4.code	32
5
6.type	K256,%object
7.align	5
8K256:
9.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
10.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
11.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
12.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
13.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
14.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
15.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
16.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
17.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
18.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
19.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
20.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
21.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
22.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
23.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
24.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
25.size	K256,.-K256
26.word	0				@ terminator
27.LOPENSSL_armcap:
28.word	OPENSSL_armcap_P-sha256_block_data_order
29.align	5
30
31.global	sha256_block_data_order
32.type	sha256_block_data_order,%function
33sha256_block_data_order:
34	sub	r3,pc,#8		@ sha256_block_data_order
35	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
36#if __ARM_ARCH__>=7
37	ldr	r12,.LOPENSSL_armcap
38	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
39	tst	r12,#ARMV8_SHA256
40	bne	.LARMv8
41	tst	r12,#ARMV7_NEON
42	bne	.LNEON
43#endif
44	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
45	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
46	sub	r14,r3,#256+32	@ K256
47	sub	sp,sp,#16*4		@ alloca(X[16])
48.Loop:
49# if __ARM_ARCH__>=7
50	ldr	r2,[r1],#4
51# else
52	ldrb	r2,[r1,#3]
53# endif
54	eor	r3,r5,r6		@ magic
55	eor	r12,r12,r12
56#if __ARM_ARCH__>=7
57	@ ldr	r2,[r1],#4			@ 0
58# if 0==15
59	str	r1,[sp,#17*4]			@ make room for r1
60# endif
61	eor	r0,r8,r8,ror#5
62	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
63	eor	r0,r0,r8,ror#19	@ Sigma1(e)
64	rev	r2,r2
65#else
66	@ ldrb	r2,[r1,#3]			@ 0
67	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
68	ldrb	r12,[r1,#2]
69	ldrb	r0,[r1,#1]
70	orr	r2,r2,r12,lsl#8
71	ldrb	r12,[r1],#4
72	orr	r2,r2,r0,lsl#16
73# if 0==15
74	str	r1,[sp,#17*4]			@ make room for r1
75# endif
76	eor	r0,r8,r8,ror#5
77	orr	r2,r2,r12,lsl#24
78	eor	r0,r0,r8,ror#19	@ Sigma1(e)
79#endif
80	ldr	r12,[r14],#4			@ *K256++
81	add	r11,r11,r2			@ h+=X[i]
82	str	r2,[sp,#0*4]
83	eor	r2,r9,r10
84	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
85	and	r2,r2,r8
86	add	r11,r11,r12			@ h+=K256[i]
87	eor	r2,r2,r10			@ Ch(e,f,g)
88	eor	r0,r4,r4,ror#11
89	add	r11,r11,r2			@ h+=Ch(e,f,g)
90#if 0==31
91	and	r12,r12,#0xff
92	cmp	r12,#0xf2			@ done?
93#endif
94#if 0<15
95# if __ARM_ARCH__>=7
96	ldr	r2,[r1],#4			@ prefetch
97# else
98	ldrb	r2,[r1,#3]
99# endif
100	eor	r12,r4,r5			@ a^b, b^c in next round
101#else
102	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
103	eor	r12,r4,r5			@ a^b, b^c in next round
104	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
105#endif
106	eor	r0,r0,r4,ror#20	@ Sigma0(a)
107	and	r3,r3,r12			@ (b^c)&=(a^b)
108	add	r7,r7,r11			@ d+=h
109	eor	r3,r3,r5			@ Maj(a,b,c)
110	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
111	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
112#if __ARM_ARCH__>=7
113	@ ldr	r2,[r1],#4			@ 1
114# if 1==15
115	str	r1,[sp,#17*4]			@ make room for r1
116# endif
117	eor	r0,r7,r7,ror#5
118	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
119	eor	r0,r0,r7,ror#19	@ Sigma1(e)
120	rev	r2,r2
121#else
122	@ ldrb	r2,[r1,#3]			@ 1
123	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
124	ldrb	r3,[r1,#2]
125	ldrb	r0,[r1,#1]
126	orr	r2,r2,r3,lsl#8
127	ldrb	r3,[r1],#4
128	orr	r2,r2,r0,lsl#16
129# if 1==15
130	str	r1,[sp,#17*4]			@ make room for r1
131# endif
132	eor	r0,r7,r7,ror#5
133	orr	r2,r2,r3,lsl#24
134	eor	r0,r0,r7,ror#19	@ Sigma1(e)
135#endif
136	ldr	r3,[r14],#4			@ *K256++
137	add	r10,r10,r2			@ h+=X[i]
138	str	r2,[sp,#1*4]
139	eor	r2,r8,r9
140	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
141	and	r2,r2,r7
142	add	r10,r10,r3			@ h+=K256[i]
143	eor	r2,r2,r9			@ Ch(e,f,g)
144	eor	r0,r11,r11,ror#11
145	add	r10,r10,r2			@ h+=Ch(e,f,g)
146#if 1==31
147	and	r3,r3,#0xff
148	cmp	r3,#0xf2			@ done?
149#endif
150#if 1<15
151# if __ARM_ARCH__>=7
152	ldr	r2,[r1],#4			@ prefetch
153# else
154	ldrb	r2,[r1,#3]
155# endif
156	eor	r3,r11,r4			@ a^b, b^c in next round
157#else
158	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
159	eor	r3,r11,r4			@ a^b, b^c in next round
160	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
161#endif
162	eor	r0,r0,r11,ror#20	@ Sigma0(a)
163	and	r12,r12,r3			@ (b^c)&=(a^b)
164	add	r6,r6,r10			@ d+=h
165	eor	r12,r12,r4			@ Maj(a,b,c)
166	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
167	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
168#if __ARM_ARCH__>=7
169	@ ldr	r2,[r1],#4			@ 2
170# if 2==15
171	str	r1,[sp,#17*4]			@ make room for r1
172# endif
173	eor	r0,r6,r6,ror#5
174	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
175	eor	r0,r0,r6,ror#19	@ Sigma1(e)
176	rev	r2,r2
177#else
178	@ ldrb	r2,[r1,#3]			@ 2
179	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
180	ldrb	r12,[r1,#2]
181	ldrb	r0,[r1,#1]
182	orr	r2,r2,r12,lsl#8
183	ldrb	r12,[r1],#4
184	orr	r2,r2,r0,lsl#16
185# if 2==15
186	str	r1,[sp,#17*4]			@ make room for r1
187# endif
188	eor	r0,r6,r6,ror#5
189	orr	r2,r2,r12,lsl#24
190	eor	r0,r0,r6,ror#19	@ Sigma1(e)
191#endif
192	ldr	r12,[r14],#4			@ *K256++
193	add	r9,r9,r2			@ h+=X[i]
194	str	r2,[sp,#2*4]
195	eor	r2,r7,r8
196	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
197	and	r2,r2,r6
198	add	r9,r9,r12			@ h+=K256[i]
199	eor	r2,r2,r8			@ Ch(e,f,g)
200	eor	r0,r10,r10,ror#11
201	add	r9,r9,r2			@ h+=Ch(e,f,g)
202#if 2==31
203	and	r12,r12,#0xff
204	cmp	r12,#0xf2			@ done?
205#endif
206#if 2<15
207# if __ARM_ARCH__>=7
208	ldr	r2,[r1],#4			@ prefetch
209# else
210	ldrb	r2,[r1,#3]
211# endif
212	eor	r12,r10,r11			@ a^b, b^c in next round
213#else
214	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
215	eor	r12,r10,r11			@ a^b, b^c in next round
216	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
217#endif
218	eor	r0,r0,r10,ror#20	@ Sigma0(a)
219	and	r3,r3,r12			@ (b^c)&=(a^b)
220	add	r5,r5,r9			@ d+=h
221	eor	r3,r3,r11			@ Maj(a,b,c)
222	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
223	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
224#if __ARM_ARCH__>=7
225	@ ldr	r2,[r1],#4			@ 3
226# if 3==15
227	str	r1,[sp,#17*4]			@ make room for r1
228# endif
229	eor	r0,r5,r5,ror#5
230	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
231	eor	r0,r0,r5,ror#19	@ Sigma1(e)
232	rev	r2,r2
233#else
234	@ ldrb	r2,[r1,#3]			@ 3
235	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
236	ldrb	r3,[r1,#2]
237	ldrb	r0,[r1,#1]
238	orr	r2,r2,r3,lsl#8
239	ldrb	r3,[r1],#4
240	orr	r2,r2,r0,lsl#16
241# if 3==15
242	str	r1,[sp,#17*4]			@ make room for r1
243# endif
244	eor	r0,r5,r5,ror#5
245	orr	r2,r2,r3,lsl#24
246	eor	r0,r0,r5,ror#19	@ Sigma1(e)
247#endif
248	ldr	r3,[r14],#4			@ *K256++
249	add	r8,r8,r2			@ h+=X[i]
250	str	r2,[sp,#3*4]
251	eor	r2,r6,r7
252	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
253	and	r2,r2,r5
254	add	r8,r8,r3			@ h+=K256[i]
255	eor	r2,r2,r7			@ Ch(e,f,g)
256	eor	r0,r9,r9,ror#11
257	add	r8,r8,r2			@ h+=Ch(e,f,g)
258#if 3==31
259	and	r3,r3,#0xff
260	cmp	r3,#0xf2			@ done?
261#endif
262#if 3<15
263# if __ARM_ARCH__>=7
264	ldr	r2,[r1],#4			@ prefetch
265# else
266	ldrb	r2,[r1,#3]
267# endif
268	eor	r3,r9,r10			@ a^b, b^c in next round
269#else
270	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
271	eor	r3,r9,r10			@ a^b, b^c in next round
272	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
273#endif
274	eor	r0,r0,r9,ror#20	@ Sigma0(a)
275	and	r12,r12,r3			@ (b^c)&=(a^b)
276	add	r4,r4,r8			@ d+=h
277	eor	r12,r12,r10			@ Maj(a,b,c)
278	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
279	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
280#if __ARM_ARCH__>=7
281	@ ldr	r2,[r1],#4			@ 4
282# if 4==15
283	str	r1,[sp,#17*4]			@ make room for r1
284# endif
285	eor	r0,r4,r4,ror#5
286	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
287	eor	r0,r0,r4,ror#19	@ Sigma1(e)
288	rev	r2,r2
289#else
290	@ ldrb	r2,[r1,#3]			@ 4
291	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
292	ldrb	r12,[r1,#2]
293	ldrb	r0,[r1,#1]
294	orr	r2,r2,r12,lsl#8
295	ldrb	r12,[r1],#4
296	orr	r2,r2,r0,lsl#16
297# if 4==15
298	str	r1,[sp,#17*4]			@ make room for r1
299# endif
300	eor	r0,r4,r4,ror#5
301	orr	r2,r2,r12,lsl#24
302	eor	r0,r0,r4,ror#19	@ Sigma1(e)
303#endif
304	ldr	r12,[r14],#4			@ *K256++
305	add	r7,r7,r2			@ h+=X[i]
306	str	r2,[sp,#4*4]
307	eor	r2,r5,r6
308	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
309	and	r2,r2,r4
310	add	r7,r7,r12			@ h+=K256[i]
311	eor	r2,r2,r6			@ Ch(e,f,g)
312	eor	r0,r8,r8,ror#11
313	add	r7,r7,r2			@ h+=Ch(e,f,g)
314#if 4==31
315	and	r12,r12,#0xff
316	cmp	r12,#0xf2			@ done?
317#endif
318#if 4<15
319# if __ARM_ARCH__>=7
320	ldr	r2,[r1],#4			@ prefetch
321# else
322	ldrb	r2,[r1,#3]
323# endif
324	eor	r12,r8,r9			@ a^b, b^c in next round
325#else
326	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
327	eor	r12,r8,r9			@ a^b, b^c in next round
328	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
329#endif
330	eor	r0,r0,r8,ror#20	@ Sigma0(a)
331	and	r3,r3,r12			@ (b^c)&=(a^b)
332	add	r11,r11,r7			@ d+=h
333	eor	r3,r3,r9			@ Maj(a,b,c)
334	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
335	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
336#if __ARM_ARCH__>=7
337	@ ldr	r2,[r1],#4			@ 5
338# if 5==15
339	str	r1,[sp,#17*4]			@ make room for r1
340# endif
341	eor	r0,r11,r11,ror#5
342	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
343	eor	r0,r0,r11,ror#19	@ Sigma1(e)
344	rev	r2,r2
345#else
346	@ ldrb	r2,[r1,#3]			@ 5
347	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
348	ldrb	r3,[r1,#2]
349	ldrb	r0,[r1,#1]
350	orr	r2,r2,r3,lsl#8
351	ldrb	r3,[r1],#4
352	orr	r2,r2,r0,lsl#16
353# if 5==15
354	str	r1,[sp,#17*4]			@ make room for r1
355# endif
356	eor	r0,r11,r11,ror#5
357	orr	r2,r2,r3,lsl#24
358	eor	r0,r0,r11,ror#19	@ Sigma1(e)
359#endif
360	ldr	r3,[r14],#4			@ *K256++
361	add	r6,r6,r2			@ h+=X[i]
362	str	r2,[sp,#5*4]
363	eor	r2,r4,r5
364	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
365	and	r2,r2,r11
366	add	r6,r6,r3			@ h+=K256[i]
367	eor	r2,r2,r5			@ Ch(e,f,g)
368	eor	r0,r7,r7,ror#11
369	add	r6,r6,r2			@ h+=Ch(e,f,g)
370#if 5==31
371	and	r3,r3,#0xff
372	cmp	r3,#0xf2			@ done?
373#endif
374#if 5<15
375# if __ARM_ARCH__>=7
376	ldr	r2,[r1],#4			@ prefetch
377# else
378	ldrb	r2,[r1,#3]
379# endif
380	eor	r3,r7,r8			@ a^b, b^c in next round
381#else
382	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
383	eor	r3,r7,r8			@ a^b, b^c in next round
384	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
385#endif
386	eor	r0,r0,r7,ror#20	@ Sigma0(a)
387	and	r12,r12,r3			@ (b^c)&=(a^b)
388	add	r10,r10,r6			@ d+=h
389	eor	r12,r12,r8			@ Maj(a,b,c)
390	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
391	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
392#if __ARM_ARCH__>=7
393	@ ldr	r2,[r1],#4			@ 6
394# if 6==15
395	str	r1,[sp,#17*4]			@ make room for r1
396# endif
397	eor	r0,r10,r10,ror#5
398	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
399	eor	r0,r0,r10,ror#19	@ Sigma1(e)
400	rev	r2,r2
401#else
402	@ ldrb	r2,[r1,#3]			@ 6
403	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
404	ldrb	r12,[r1,#2]
405	ldrb	r0,[r1,#1]
406	orr	r2,r2,r12,lsl#8
407	ldrb	r12,[r1],#4
408	orr	r2,r2,r0,lsl#16
409# if 6==15
410	str	r1,[sp,#17*4]			@ make room for r1
411# endif
412	eor	r0,r10,r10,ror#5
413	orr	r2,r2,r12,lsl#24
414	eor	r0,r0,r10,ror#19	@ Sigma1(e)
415#endif
416	ldr	r12,[r14],#4			@ *K256++
417	add	r5,r5,r2			@ h+=X[i]
418	str	r2,[sp,#6*4]
419	eor	r2,r11,r4
420	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
421	and	r2,r2,r10
422	add	r5,r5,r12			@ h+=K256[i]
423	eor	r2,r2,r4			@ Ch(e,f,g)
424	eor	r0,r6,r6,ror#11
425	add	r5,r5,r2			@ h+=Ch(e,f,g)
426#if 6==31
427	and	r12,r12,#0xff
428	cmp	r12,#0xf2			@ done?
429#endif
430#if 6<15
431# if __ARM_ARCH__>=7
432	ldr	r2,[r1],#4			@ prefetch
433# else
434	ldrb	r2,[r1,#3]
435# endif
436	eor	r12,r6,r7			@ a^b, b^c in next round
437#else
438	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
439	eor	r12,r6,r7			@ a^b, b^c in next round
440	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
441#endif
442	eor	r0,r0,r6,ror#20	@ Sigma0(a)
443	and	r3,r3,r12			@ (b^c)&=(a^b)
444	add	r9,r9,r5			@ d+=h
445	eor	r3,r3,r7			@ Maj(a,b,c)
446	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
447	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
448#if __ARM_ARCH__>=7
449	@ ldr	r2,[r1],#4			@ 7
450# if 7==15
451	str	r1,[sp,#17*4]			@ make room for r1
452# endif
453	eor	r0,r9,r9,ror#5
454	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
455	eor	r0,r0,r9,ror#19	@ Sigma1(e)
456	rev	r2,r2
457#else
458	@ ldrb	r2,[r1,#3]			@ 7
459	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
460	ldrb	r3,[r1,#2]
461	ldrb	r0,[r1,#1]
462	orr	r2,r2,r3,lsl#8
463	ldrb	r3,[r1],#4
464	orr	r2,r2,r0,lsl#16
465# if 7==15
466	str	r1,[sp,#17*4]			@ make room for r1
467# endif
468	eor	r0,r9,r9,ror#5
469	orr	r2,r2,r3,lsl#24
470	eor	r0,r0,r9,ror#19	@ Sigma1(e)
471#endif
472	ldr	r3,[r14],#4			@ *K256++
473	add	r4,r4,r2			@ h+=X[i]
474	str	r2,[sp,#7*4]
475	eor	r2,r10,r11
476	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
477	and	r2,r2,r9
478	add	r4,r4,r3			@ h+=K256[i]
479	eor	r2,r2,r11			@ Ch(e,f,g)
480	eor	r0,r5,r5,ror#11
481	add	r4,r4,r2			@ h+=Ch(e,f,g)
482#if 7==31
483	and	r3,r3,#0xff
484	cmp	r3,#0xf2			@ done?
485#endif
486#if 7<15
487# if __ARM_ARCH__>=7
488	ldr	r2,[r1],#4			@ prefetch
489# else
490	ldrb	r2,[r1,#3]
491# endif
492	eor	r3,r5,r6			@ a^b, b^c in next round
493#else
494	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
495	eor	r3,r5,r6			@ a^b, b^c in next round
496	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
497#endif
498	eor	r0,r0,r5,ror#20	@ Sigma0(a)
499	and	r12,r12,r3			@ (b^c)&=(a^b)
500	add	r8,r8,r4			@ d+=h
501	eor	r12,r12,r6			@ Maj(a,b,c)
502	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
503	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
504#if __ARM_ARCH__>=7
505	@ ldr	r2,[r1],#4			@ 8
506# if 8==15
507	str	r1,[sp,#17*4]			@ make room for r1
508# endif
509	eor	r0,r8,r8,ror#5
510	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
511	eor	r0,r0,r8,ror#19	@ Sigma1(e)
512	rev	r2,r2
513#else
514	@ ldrb	r2,[r1,#3]			@ 8
515	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
516	ldrb	r12,[r1,#2]
517	ldrb	r0,[r1,#1]
518	orr	r2,r2,r12,lsl#8
519	ldrb	r12,[r1],#4
520	orr	r2,r2,r0,lsl#16
521# if 8==15
522	str	r1,[sp,#17*4]			@ make room for r1
523# endif
524	eor	r0,r8,r8,ror#5
525	orr	r2,r2,r12,lsl#24
526	eor	r0,r0,r8,ror#19	@ Sigma1(e)
527#endif
528	ldr	r12,[r14],#4			@ *K256++
529	add	r11,r11,r2			@ h+=X[i]
530	str	r2,[sp,#8*4]
531	eor	r2,r9,r10
532	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
533	and	r2,r2,r8
534	add	r11,r11,r12			@ h+=K256[i]
535	eor	r2,r2,r10			@ Ch(e,f,g)
536	eor	r0,r4,r4,ror#11
537	add	r11,r11,r2			@ h+=Ch(e,f,g)
538#if 8==31
539	and	r12,r12,#0xff
540	cmp	r12,#0xf2			@ done?
541#endif
542#if 8<15
543# if __ARM_ARCH__>=7
544	ldr	r2,[r1],#4			@ prefetch
545# else
546	ldrb	r2,[r1,#3]
547# endif
548	eor	r12,r4,r5			@ a^b, b^c in next round
549#else
550	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
551	eor	r12,r4,r5			@ a^b, b^c in next round
552	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
553#endif
554	eor	r0,r0,r4,ror#20	@ Sigma0(a)
555	and	r3,r3,r12			@ (b^c)&=(a^b)
556	add	r7,r7,r11			@ d+=h
557	eor	r3,r3,r5			@ Maj(a,b,c)
558	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
559	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
560#if __ARM_ARCH__>=7
561	@ ldr	r2,[r1],#4			@ 9
562# if 9==15
563	str	r1,[sp,#17*4]			@ make room for r1
564# endif
565	eor	r0,r7,r7,ror#5
566	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
567	eor	r0,r0,r7,ror#19	@ Sigma1(e)
568	rev	r2,r2
569#else
570	@ ldrb	r2,[r1,#3]			@ 9
571	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
572	ldrb	r3,[r1,#2]
573	ldrb	r0,[r1,#1]
574	orr	r2,r2,r3,lsl#8
575	ldrb	r3,[r1],#4
576	orr	r2,r2,r0,lsl#16
577# if 9==15
578	str	r1,[sp,#17*4]			@ make room for r1
579# endif
580	eor	r0,r7,r7,ror#5
581	orr	r2,r2,r3,lsl#24
582	eor	r0,r0,r7,ror#19	@ Sigma1(e)
583#endif
584	ldr	r3,[r14],#4			@ *K256++
585	add	r10,r10,r2			@ h+=X[i]
586	str	r2,[sp,#9*4]
587	eor	r2,r8,r9
588	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
589	and	r2,r2,r7
590	add	r10,r10,r3			@ h+=K256[i]
591	eor	r2,r2,r9			@ Ch(e,f,g)
592	eor	r0,r11,r11,ror#11
593	add	r10,r10,r2			@ h+=Ch(e,f,g)
594#if 9==31
595	and	r3,r3,#0xff
596	cmp	r3,#0xf2			@ done?
597#endif
598#if 9<15
599# if __ARM_ARCH__>=7
600	ldr	r2,[r1],#4			@ prefetch
601# else
602	ldrb	r2,[r1,#3]
603# endif
604	eor	r3,r11,r4			@ a^b, b^c in next round
605#else
606	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
607	eor	r3,r11,r4			@ a^b, b^c in next round
608	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
609#endif
610	eor	r0,r0,r11,ror#20	@ Sigma0(a)
611	and	r12,r12,r3			@ (b^c)&=(a^b)
612	add	r6,r6,r10			@ d+=h
613	eor	r12,r12,r4			@ Maj(a,b,c)
614	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
615	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
616#if __ARM_ARCH__>=7
617	@ ldr	r2,[r1],#4			@ 10
618# if 10==15
619	str	r1,[sp,#17*4]			@ make room for r1
620# endif
621	eor	r0,r6,r6,ror#5
622	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
623	eor	r0,r0,r6,ror#19	@ Sigma1(e)
624	rev	r2,r2
625#else
626	@ ldrb	r2,[r1,#3]			@ 10
627	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
628	ldrb	r12,[r1,#2]
629	ldrb	r0,[r1,#1]
630	orr	r2,r2,r12,lsl#8
631	ldrb	r12,[r1],#4
632	orr	r2,r2,r0,lsl#16
633# if 10==15
634	str	r1,[sp,#17*4]			@ make room for r1
635# endif
636	eor	r0,r6,r6,ror#5
637	orr	r2,r2,r12,lsl#24
638	eor	r0,r0,r6,ror#19	@ Sigma1(e)
639#endif
640	ldr	r12,[r14],#4			@ *K256++
641	add	r9,r9,r2			@ h+=X[i]
642	str	r2,[sp,#10*4]
643	eor	r2,r7,r8
644	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
645	and	r2,r2,r6
646	add	r9,r9,r12			@ h+=K256[i]
647	eor	r2,r2,r8			@ Ch(e,f,g)
648	eor	r0,r10,r10,ror#11
649	add	r9,r9,r2			@ h+=Ch(e,f,g)
650#if 10==31
651	and	r12,r12,#0xff
652	cmp	r12,#0xf2			@ done?
653#endif
654#if 10<15
655# if __ARM_ARCH__>=7
656	ldr	r2,[r1],#4			@ prefetch
657# else
658	ldrb	r2,[r1,#3]
659# endif
660	eor	r12,r10,r11			@ a^b, b^c in next round
661#else
662	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
663	eor	r12,r10,r11			@ a^b, b^c in next round
664	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
665#endif
666	eor	r0,r0,r10,ror#20	@ Sigma0(a)
667	and	r3,r3,r12			@ (b^c)&=(a^b)
668	add	r5,r5,r9			@ d+=h
669	eor	r3,r3,r11			@ Maj(a,b,c)
670	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
671	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
672#if __ARM_ARCH__>=7
673	@ ldr	r2,[r1],#4			@ 11
674# if 11==15
675	str	r1,[sp,#17*4]			@ make room for r1
676# endif
677	eor	r0,r5,r5,ror#5
678	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
679	eor	r0,r0,r5,ror#19	@ Sigma1(e)
680	rev	r2,r2
681#else
682	@ ldrb	r2,[r1,#3]			@ 11
683	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
684	ldrb	r3,[r1,#2]
685	ldrb	r0,[r1,#1]
686	orr	r2,r2,r3,lsl#8
687	ldrb	r3,[r1],#4
688	orr	r2,r2,r0,lsl#16
689# if 11==15
690	str	r1,[sp,#17*4]			@ make room for r1
691# endif
692	eor	r0,r5,r5,ror#5
693	orr	r2,r2,r3,lsl#24
694	eor	r0,r0,r5,ror#19	@ Sigma1(e)
695#endif
696	ldr	r3,[r14],#4			@ *K256++
697	add	r8,r8,r2			@ h+=X[i]
698	str	r2,[sp,#11*4]
699	eor	r2,r6,r7
700	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
701	and	r2,r2,r5
702	add	r8,r8,r3			@ h+=K256[i]
703	eor	r2,r2,r7			@ Ch(e,f,g)
704	eor	r0,r9,r9,ror#11
705	add	r8,r8,r2			@ h+=Ch(e,f,g)
706#if 11==31
707	and	r3,r3,#0xff
708	cmp	r3,#0xf2			@ done?
709#endif
710#if 11<15
711# if __ARM_ARCH__>=7
712	ldr	r2,[r1],#4			@ prefetch
713# else
714	ldrb	r2,[r1,#3]
715# endif
716	eor	r3,r9,r10			@ a^b, b^c in next round
717#else
718	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
719	eor	r3,r9,r10			@ a^b, b^c in next round
720	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
721#endif
722	eor	r0,r0,r9,ror#20	@ Sigma0(a)
723	and	r12,r12,r3			@ (b^c)&=(a^b)
724	add	r4,r4,r8			@ d+=h
725	eor	r12,r12,r10			@ Maj(a,b,c)
726	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
727	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
728#if __ARM_ARCH__>=7
729	@ ldr	r2,[r1],#4			@ 12
730# if 12==15
731	str	r1,[sp,#17*4]			@ make room for r1
732# endif
733	eor	r0,r4,r4,ror#5
734	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
735	eor	r0,r0,r4,ror#19	@ Sigma1(e)
736	rev	r2,r2
737#else
738	@ ldrb	r2,[r1,#3]			@ 12
739	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
740	ldrb	r12,[r1,#2]
741	ldrb	r0,[r1,#1]
742	orr	r2,r2,r12,lsl#8
743	ldrb	r12,[r1],#4
744	orr	r2,r2,r0,lsl#16
745# if 12==15
746	str	r1,[sp,#17*4]			@ make room for r1
747# endif
748	eor	r0,r4,r4,ror#5
749	orr	r2,r2,r12,lsl#24
750	eor	r0,r0,r4,ror#19	@ Sigma1(e)
751#endif
752	ldr	r12,[r14],#4			@ *K256++
753	add	r7,r7,r2			@ h+=X[i]
754	str	r2,[sp,#12*4]
755	eor	r2,r5,r6
756	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
757	and	r2,r2,r4
758	add	r7,r7,r12			@ h+=K256[i]
759	eor	r2,r2,r6			@ Ch(e,f,g)
760	eor	r0,r8,r8,ror#11
761	add	r7,r7,r2			@ h+=Ch(e,f,g)
762#if 12==31
763	and	r12,r12,#0xff
764	cmp	r12,#0xf2			@ done?
765#endif
766#if 12<15
767# if __ARM_ARCH__>=7
768	ldr	r2,[r1],#4			@ prefetch
769# else
770	ldrb	r2,[r1,#3]
771# endif
772	eor	r12,r8,r9			@ a^b, b^c in next round
773#else
774	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
775	eor	r12,r8,r9			@ a^b, b^c in next round
776	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
777#endif
778	eor	r0,r0,r8,ror#20	@ Sigma0(a)
779	and	r3,r3,r12			@ (b^c)&=(a^b)
780	add	r11,r11,r7			@ d+=h
781	eor	r3,r3,r9			@ Maj(a,b,c)
782	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
783	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
784#if __ARM_ARCH__>=7
785	@ ldr	r2,[r1],#4			@ 13
786# if 13==15
787	str	r1,[sp,#17*4]			@ make room for r1
788# endif
789	eor	r0,r11,r11,ror#5
790	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
791	eor	r0,r0,r11,ror#19	@ Sigma1(e)
792	rev	r2,r2
793#else
794	@ ldrb	r2,[r1,#3]			@ 13
795	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
796	ldrb	r3,[r1,#2]
797	ldrb	r0,[r1,#1]
798	orr	r2,r2,r3,lsl#8
799	ldrb	r3,[r1],#4
800	orr	r2,r2,r0,lsl#16
801# if 13==15
802	str	r1,[sp,#17*4]			@ make room for r1
803# endif
804	eor	r0,r11,r11,ror#5
805	orr	r2,r2,r3,lsl#24
806	eor	r0,r0,r11,ror#19	@ Sigma1(e)
807#endif
808	ldr	r3,[r14],#4			@ *K256++
809	add	r6,r6,r2			@ h+=X[i]
810	str	r2,[sp,#13*4]
811	eor	r2,r4,r5
812	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
813	and	r2,r2,r11
814	add	r6,r6,r3			@ h+=K256[i]
815	eor	r2,r2,r5			@ Ch(e,f,g)
816	eor	r0,r7,r7,ror#11
817	add	r6,r6,r2			@ h+=Ch(e,f,g)
818#if 13==31
819	and	r3,r3,#0xff
820	cmp	r3,#0xf2			@ done?
821#endif
822#if 13<15
823# if __ARM_ARCH__>=7
824	ldr	r2,[r1],#4			@ prefetch
825# else
826	ldrb	r2,[r1,#3]
827# endif
828	eor	r3,r7,r8			@ a^b, b^c in next round
829#else
830	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
831	eor	r3,r7,r8			@ a^b, b^c in next round
832	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
833#endif
834	eor	r0,r0,r7,ror#20	@ Sigma0(a)
835	and	r12,r12,r3			@ (b^c)&=(a^b)
836	add	r10,r10,r6			@ d+=h
837	eor	r12,r12,r8			@ Maj(a,b,c)
838	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
839	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
840#if __ARM_ARCH__>=7
841	@ ldr	r2,[r1],#4			@ 14
842# if 14==15
843	str	r1,[sp,#17*4]			@ make room for r1
844# endif
845	eor	r0,r10,r10,ror#5
846	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
847	eor	r0,r0,r10,ror#19	@ Sigma1(e)
848	rev	r2,r2
849#else
850	@ ldrb	r2,[r1,#3]			@ 14
851	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
852	ldrb	r12,[r1,#2]
853	ldrb	r0,[r1,#1]
854	orr	r2,r2,r12,lsl#8
855	ldrb	r12,[r1],#4
856	orr	r2,r2,r0,lsl#16
857# if 14==15
858	str	r1,[sp,#17*4]			@ make room for r1
859# endif
860	eor	r0,r10,r10,ror#5
861	orr	r2,r2,r12,lsl#24
862	eor	r0,r0,r10,ror#19	@ Sigma1(e)
863#endif
864	ldr	r12,[r14],#4			@ *K256++
865	add	r5,r5,r2			@ h+=X[i]
866	str	r2,[sp,#14*4]
867	eor	r2,r11,r4
868	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
869	and	r2,r2,r10
870	add	r5,r5,r12			@ h+=K256[i]
871	eor	r2,r2,r4			@ Ch(e,f,g)
872	eor	r0,r6,r6,ror#11
873	add	r5,r5,r2			@ h+=Ch(e,f,g)
874#if 14==31
875	and	r12,r12,#0xff
876	cmp	r12,#0xf2			@ done?
877#endif
878#if 14<15
879# if __ARM_ARCH__>=7
880	ldr	r2,[r1],#4			@ prefetch
881# else
882	ldrb	r2,[r1,#3]
883# endif
884	eor	r12,r6,r7			@ a^b, b^c in next round
885#else
886	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
887	eor	r12,r6,r7			@ a^b, b^c in next round
888	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
889#endif
890	eor	r0,r0,r6,ror#20	@ Sigma0(a)
891	and	r3,r3,r12			@ (b^c)&=(a^b)
892	add	r9,r9,r5			@ d+=h
893	eor	r3,r3,r7			@ Maj(a,b,c)
894	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
895	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
896#if __ARM_ARCH__>=7
897	@ ldr	r2,[r1],#4			@ 15
898# if 15==15
899	str	r1,[sp,#17*4]			@ make room for r1
900# endif
901	eor	r0,r9,r9,ror#5
902	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
903	eor	r0,r0,r9,ror#19	@ Sigma1(e)
904	rev	r2,r2
905#else
906	@ ldrb	r2,[r1,#3]			@ 15
907	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
908	ldrb	r3,[r1,#2]
909	ldrb	r0,[r1,#1]
910	orr	r2,r2,r3,lsl#8
911	ldrb	r3,[r1],#4
912	orr	r2,r2,r0,lsl#16
913# if 15==15
914	str	r1,[sp,#17*4]			@ make room for r1
915# endif
916	eor	r0,r9,r9,ror#5
917	orr	r2,r2,r3,lsl#24
918	eor	r0,r0,r9,ror#19	@ Sigma1(e)
919#endif
920	ldr	r3,[r14],#4			@ *K256++
921	add	r4,r4,r2			@ h+=X[i]
922	str	r2,[sp,#15*4]
923	eor	r2,r10,r11
924	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
925	and	r2,r2,r9
926	add	r4,r4,r3			@ h+=K256[i]
927	eor	r2,r2,r11			@ Ch(e,f,g)
928	eor	r0,r5,r5,ror#11
929	add	r4,r4,r2			@ h+=Ch(e,f,g)
930#if 15==31
931	and	r3,r3,#0xff
932	cmp	r3,#0xf2			@ done?
933#endif
934#if 15<15
935# if __ARM_ARCH__>=7
936	ldr	r2,[r1],#4			@ prefetch
937# else
938	ldrb	r2,[r1,#3]
939# endif
940	eor	r3,r5,r6			@ a^b, b^c in next round
941#else
942	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
943	eor	r3,r5,r6			@ a^b, b^c in next round
944	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
945#endif
946	eor	r0,r0,r5,ror#20	@ Sigma0(a)
947	and	r12,r12,r3			@ (b^c)&=(a^b)
948	add	r8,r8,r4			@ d+=h
949	eor	r12,r12,r6			@ Maj(a,b,c)
950	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
951	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
952.Lrounds_16_xx:
953	@ ldr	r2,[sp,#1*4]		@ 16
954	@ ldr	r1,[sp,#14*4]
955	mov	r0,r2,ror#7
956	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
957	mov	r12,r1,ror#17
958	eor	r0,r0,r2,ror#18
959	eor	r12,r12,r1,ror#19
960	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
961	ldr	r2,[sp,#0*4]
962	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
963	ldr	r1,[sp,#9*4]
964
965	add	r12,r12,r0
966	eor	r0,r8,r8,ror#5	@ from BODY_00_15
967	add	r2,r2,r12
968	eor	r0,r0,r8,ror#19	@ Sigma1(e)
969	add	r2,r2,r1			@ X[i]
970	ldr	r12,[r14],#4			@ *K256++
971	add	r11,r11,r2			@ h+=X[i]
972	str	r2,[sp,#0*4]
973	eor	r2,r9,r10
974	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
975	and	r2,r2,r8
976	add	r11,r11,r12			@ h+=K256[i]
977	eor	r2,r2,r10			@ Ch(e,f,g)
978	eor	r0,r4,r4,ror#11
979	add	r11,r11,r2			@ h+=Ch(e,f,g)
980#if 16==31
981	and	r12,r12,#0xff
982	cmp	r12,#0xf2			@ done?
983#endif
984#if 16<15
985# if __ARM_ARCH__>=7
986	ldr	r2,[r1],#4			@ prefetch
987# else
988	ldrb	r2,[r1,#3]
989# endif
990	eor	r12,r4,r5			@ a^b, b^c in next round
991#else
992	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
993	eor	r12,r4,r5			@ a^b, b^c in next round
994	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
995#endif
996	eor	r0,r0,r4,ror#20	@ Sigma0(a)
997	and	r3,r3,r12			@ (b^c)&=(a^b)
998	add	r7,r7,r11			@ d+=h
999	eor	r3,r3,r5			@ Maj(a,b,c)
1000	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1001	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1002	@ ldr	r2,[sp,#2*4]		@ 17
1003	@ ldr	r1,[sp,#15*4]
1004	mov	r0,r2,ror#7
1005	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1006	mov	r3,r1,ror#17
1007	eor	r0,r0,r2,ror#18
1008	eor	r3,r3,r1,ror#19
1009	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1010	ldr	r2,[sp,#1*4]
1011	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1012	ldr	r1,[sp,#10*4]
1013
1014	add	r3,r3,r0
1015	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1016	add	r2,r2,r3
1017	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1018	add	r2,r2,r1			@ X[i]
1019	ldr	r3,[r14],#4			@ *K256++
1020	add	r10,r10,r2			@ h+=X[i]
1021	str	r2,[sp,#1*4]
1022	eor	r2,r8,r9
1023	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1024	and	r2,r2,r7
1025	add	r10,r10,r3			@ h+=K256[i]
1026	eor	r2,r2,r9			@ Ch(e,f,g)
1027	eor	r0,r11,r11,ror#11
1028	add	r10,r10,r2			@ h+=Ch(e,f,g)
1029#if 17==31
1030	and	r3,r3,#0xff
1031	cmp	r3,#0xf2			@ done?
1032#endif
1033#if 17<15
1034# if __ARM_ARCH__>=7
1035	ldr	r2,[r1],#4			@ prefetch
1036# else
1037	ldrb	r2,[r1,#3]
1038# endif
1039	eor	r3,r11,r4			@ a^b, b^c in next round
1040#else
1041	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
1042	eor	r3,r11,r4			@ a^b, b^c in next round
1043	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
1044#endif
1045	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1046	and	r12,r12,r3			@ (b^c)&=(a^b)
1047	add	r6,r6,r10			@ d+=h
1048	eor	r12,r12,r4			@ Maj(a,b,c)
1049	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1050	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1051	@ ldr	r2,[sp,#3*4]		@ 18
1052	@ ldr	r1,[sp,#0*4]
1053	mov	r0,r2,ror#7
1054	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1055	mov	r12,r1,ror#17
1056	eor	r0,r0,r2,ror#18
1057	eor	r12,r12,r1,ror#19
1058	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1059	ldr	r2,[sp,#2*4]
1060	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1061	ldr	r1,[sp,#11*4]
1062
1063	add	r12,r12,r0
1064	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1065	add	r2,r2,r12
1066	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1067	add	r2,r2,r1			@ X[i]
1068	ldr	r12,[r14],#4			@ *K256++
1069	add	r9,r9,r2			@ h+=X[i]
1070	str	r2,[sp,#2*4]
1071	eor	r2,r7,r8
1072	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1073	and	r2,r2,r6
1074	add	r9,r9,r12			@ h+=K256[i]
1075	eor	r2,r2,r8			@ Ch(e,f,g)
1076	eor	r0,r10,r10,ror#11
1077	add	r9,r9,r2			@ h+=Ch(e,f,g)
1078#if 18==31
1079	and	r12,r12,#0xff
1080	cmp	r12,#0xf2			@ done?
1081#endif
1082#if 18<15
1083# if __ARM_ARCH__>=7
1084	ldr	r2,[r1],#4			@ prefetch
1085# else
1086	ldrb	r2,[r1,#3]
1087# endif
1088	eor	r12,r10,r11			@ a^b, b^c in next round
1089#else
1090	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
1091	eor	r12,r10,r11			@ a^b, b^c in next round
1092	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
1093#endif
1094	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1095	and	r3,r3,r12			@ (b^c)&=(a^b)
1096	add	r5,r5,r9			@ d+=h
1097	eor	r3,r3,r11			@ Maj(a,b,c)
1098	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1099	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1100	@ ldr	r2,[sp,#4*4]		@ 19
1101	@ ldr	r1,[sp,#1*4]
1102	mov	r0,r2,ror#7
1103	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1104	mov	r3,r1,ror#17
1105	eor	r0,r0,r2,ror#18
1106	eor	r3,r3,r1,ror#19
1107	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1108	ldr	r2,[sp,#3*4]
1109	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1110	ldr	r1,[sp,#12*4]
1111
1112	add	r3,r3,r0
1113	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1114	add	r2,r2,r3
1115	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1116	add	r2,r2,r1			@ X[i]
1117	ldr	r3,[r14],#4			@ *K256++
1118	add	r8,r8,r2			@ h+=X[i]
1119	str	r2,[sp,#3*4]
1120	eor	r2,r6,r7
1121	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1122	and	r2,r2,r5
1123	add	r8,r8,r3			@ h+=K256[i]
1124	eor	r2,r2,r7			@ Ch(e,f,g)
1125	eor	r0,r9,r9,ror#11
1126	add	r8,r8,r2			@ h+=Ch(e,f,g)
1127#if 19==31
1128	and	r3,r3,#0xff
1129	cmp	r3,#0xf2			@ done?
1130#endif
1131#if 19<15
1132# if __ARM_ARCH__>=7
1133	ldr	r2,[r1],#4			@ prefetch
1134# else
1135	ldrb	r2,[r1,#3]
1136# endif
1137	eor	r3,r9,r10			@ a^b, b^c in next round
1138#else
1139	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
1140	eor	r3,r9,r10			@ a^b, b^c in next round
1141	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
1142#endif
1143	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1144	and	r12,r12,r3			@ (b^c)&=(a^b)
1145	add	r4,r4,r8			@ d+=h
1146	eor	r12,r12,r10			@ Maj(a,b,c)
1147	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1148	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1149	@ ldr	r2,[sp,#5*4]		@ 20
1150	@ ldr	r1,[sp,#2*4]
1151	mov	r0,r2,ror#7
1152	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1153	mov	r12,r1,ror#17
1154	eor	r0,r0,r2,ror#18
1155	eor	r12,r12,r1,ror#19
1156	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1157	ldr	r2,[sp,#4*4]
1158	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1159	ldr	r1,[sp,#13*4]
1160
1161	add	r12,r12,r0
1162	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1163	add	r2,r2,r12
1164	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1165	add	r2,r2,r1			@ X[i]
1166	ldr	r12,[r14],#4			@ *K256++
1167	add	r7,r7,r2			@ h+=X[i]
1168	str	r2,[sp,#4*4]
1169	eor	r2,r5,r6
1170	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1171	and	r2,r2,r4
1172	add	r7,r7,r12			@ h+=K256[i]
1173	eor	r2,r2,r6			@ Ch(e,f,g)
1174	eor	r0,r8,r8,ror#11
1175	add	r7,r7,r2			@ h+=Ch(e,f,g)
1176#if 20==31
1177	and	r12,r12,#0xff
1178	cmp	r12,#0xf2			@ done?
1179#endif
1180#if 20<15
1181# if __ARM_ARCH__>=7
1182	ldr	r2,[r1],#4			@ prefetch
1183# else
1184	ldrb	r2,[r1,#3]
1185# endif
1186	eor	r12,r8,r9			@ a^b, b^c in next round
1187#else
1188	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
1189	eor	r12,r8,r9			@ a^b, b^c in next round
1190	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
1191#endif
1192	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1193	and	r3,r3,r12			@ (b^c)&=(a^b)
1194	add	r11,r11,r7			@ d+=h
1195	eor	r3,r3,r9			@ Maj(a,b,c)
1196	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1197	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1198	@ ldr	r2,[sp,#6*4]		@ 21
1199	@ ldr	r1,[sp,#3*4]
1200	mov	r0,r2,ror#7
1201	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1202	mov	r3,r1,ror#17
1203	eor	r0,r0,r2,ror#18
1204	eor	r3,r3,r1,ror#19
1205	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1206	ldr	r2,[sp,#5*4]
1207	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1208	ldr	r1,[sp,#14*4]
1209
1210	add	r3,r3,r0
1211	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1212	add	r2,r2,r3
1213	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1214	add	r2,r2,r1			@ X[i]
1215	ldr	r3,[r14],#4			@ *K256++
1216	add	r6,r6,r2			@ h+=X[i]
1217	str	r2,[sp,#5*4]
1218	eor	r2,r4,r5
1219	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1220	and	r2,r2,r11
1221	add	r6,r6,r3			@ h+=K256[i]
1222	eor	r2,r2,r5			@ Ch(e,f,g)
1223	eor	r0,r7,r7,ror#11
1224	add	r6,r6,r2			@ h+=Ch(e,f,g)
1225#if 21==31
1226	and	r3,r3,#0xff
1227	cmp	r3,#0xf2			@ done?
1228#endif
1229#if 21<15
1230# if __ARM_ARCH__>=7
1231	ldr	r2,[r1],#4			@ prefetch
1232# else
1233	ldrb	r2,[r1,#3]
1234# endif
1235	eor	r3,r7,r8			@ a^b, b^c in next round
1236#else
1237	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
1238	eor	r3,r7,r8			@ a^b, b^c in next round
1239	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
1240#endif
1241	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1242	and	r12,r12,r3			@ (b^c)&=(a^b)
1243	add	r10,r10,r6			@ d+=h
1244	eor	r12,r12,r8			@ Maj(a,b,c)
1245	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1246	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1247	@ ldr	r2,[sp,#7*4]		@ 22
1248	@ ldr	r1,[sp,#4*4]
1249	mov	r0,r2,ror#7
1250	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1251	mov	r12,r1,ror#17
1252	eor	r0,r0,r2,ror#18
1253	eor	r12,r12,r1,ror#19
1254	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1255	ldr	r2,[sp,#6*4]
1256	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1257	ldr	r1,[sp,#15*4]
1258
1259	add	r12,r12,r0
1260	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1261	add	r2,r2,r12
1262	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1263	add	r2,r2,r1			@ X[i]
1264	ldr	r12,[r14],#4			@ *K256++
1265	add	r5,r5,r2			@ h+=X[i]
1266	str	r2,[sp,#6*4]
1267	eor	r2,r11,r4
1268	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1269	and	r2,r2,r10
1270	add	r5,r5,r12			@ h+=K256[i]
1271	eor	r2,r2,r4			@ Ch(e,f,g)
1272	eor	r0,r6,r6,ror#11
1273	add	r5,r5,r2			@ h+=Ch(e,f,g)
1274#if 22==31
1275	and	r12,r12,#0xff
1276	cmp	r12,#0xf2			@ done?
1277#endif
1278#if 22<15
1279# if __ARM_ARCH__>=7
1280	ldr	r2,[r1],#4			@ prefetch
1281# else
1282	ldrb	r2,[r1,#3]
1283# endif
1284	eor	r12,r6,r7			@ a^b, b^c in next round
1285#else
1286	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
1287	eor	r12,r6,r7			@ a^b, b^c in next round
1288	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
1289#endif
1290	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1291	and	r3,r3,r12			@ (b^c)&=(a^b)
1292	add	r9,r9,r5			@ d+=h
1293	eor	r3,r3,r7			@ Maj(a,b,c)
1294	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1295	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1296	@ ldr	r2,[sp,#8*4]		@ 23
1297	@ ldr	r1,[sp,#5*4]
1298	mov	r0,r2,ror#7
1299	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1300	mov	r3,r1,ror#17
1301	eor	r0,r0,r2,ror#18
1302	eor	r3,r3,r1,ror#19
1303	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1304	ldr	r2,[sp,#7*4]
1305	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1306	ldr	r1,[sp,#0*4]
1307
1308	add	r3,r3,r0
1309	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1310	add	r2,r2,r3
1311	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1312	add	r2,r2,r1			@ X[i]
1313	ldr	r3,[r14],#4			@ *K256++
1314	add	r4,r4,r2			@ h+=X[i]
1315	str	r2,[sp,#7*4]
1316	eor	r2,r10,r11
1317	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1318	and	r2,r2,r9
1319	add	r4,r4,r3			@ h+=K256[i]
1320	eor	r2,r2,r11			@ Ch(e,f,g)
1321	eor	r0,r5,r5,ror#11
1322	add	r4,r4,r2			@ h+=Ch(e,f,g)
1323#if 23==31
1324	and	r3,r3,#0xff
1325	cmp	r3,#0xf2			@ done?
1326#endif
1327#if 23<15
1328# if __ARM_ARCH__>=7
1329	ldr	r2,[r1],#4			@ prefetch
1330# else
1331	ldrb	r2,[r1,#3]
1332# endif
1333	eor	r3,r5,r6			@ a^b, b^c in next round
1334#else
1335	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
1336	eor	r3,r5,r6			@ a^b, b^c in next round
1337	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
1338#endif
1339	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1340	and	r12,r12,r3			@ (b^c)&=(a^b)
1341	add	r8,r8,r4			@ d+=h
1342	eor	r12,r12,r6			@ Maj(a,b,c)
1343	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1344	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1345	@ ldr	r2,[sp,#9*4]		@ 24
1346	@ ldr	r1,[sp,#6*4]
1347	mov	r0,r2,ror#7
1348	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1349	mov	r12,r1,ror#17
1350	eor	r0,r0,r2,ror#18
1351	eor	r12,r12,r1,ror#19
1352	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1353	ldr	r2,[sp,#8*4]
1354	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1355	ldr	r1,[sp,#1*4]
1356
1357	add	r12,r12,r0
1358	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1359	add	r2,r2,r12
1360	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1361	add	r2,r2,r1			@ X[i]
1362	ldr	r12,[r14],#4			@ *K256++
1363	add	r11,r11,r2			@ h+=X[i]
1364	str	r2,[sp,#8*4]
1365	eor	r2,r9,r10
1366	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1367	and	r2,r2,r8
1368	add	r11,r11,r12			@ h+=K256[i]
1369	eor	r2,r2,r10			@ Ch(e,f,g)
1370	eor	r0,r4,r4,ror#11
1371	add	r11,r11,r2			@ h+=Ch(e,f,g)
1372#if 24==31
1373	and	r12,r12,#0xff
1374	cmp	r12,#0xf2			@ done?
1375#endif
1376#if 24<15
1377# if __ARM_ARCH__>=7
1378	ldr	r2,[r1],#4			@ prefetch
1379# else
1380	ldrb	r2,[r1,#3]
1381# endif
1382	eor	r12,r4,r5			@ a^b, b^c in next round
1383#else
1384	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
1385	eor	r12,r4,r5			@ a^b, b^c in next round
1386	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
1387#endif
1388	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1389	and	r3,r3,r12			@ (b^c)&=(a^b)
1390	add	r7,r7,r11			@ d+=h
1391	eor	r3,r3,r5			@ Maj(a,b,c)
1392	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1393	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1394	@ ldr	r2,[sp,#10*4]		@ 25
1395	@ ldr	r1,[sp,#7*4]
1396	mov	r0,r2,ror#7
1397	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1398	mov	r3,r1,ror#17
1399	eor	r0,r0,r2,ror#18
1400	eor	r3,r3,r1,ror#19
1401	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1402	ldr	r2,[sp,#9*4]
1403	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1404	ldr	r1,[sp,#2*4]
1405
1406	add	r3,r3,r0
1407	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1408	add	r2,r2,r3
1409	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1410	add	r2,r2,r1			@ X[i]
1411	ldr	r3,[r14],#4			@ *K256++
1412	add	r10,r10,r2			@ h+=X[i]
1413	str	r2,[sp,#9*4]
1414	eor	r2,r8,r9
1415	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1416	and	r2,r2,r7
1417	add	r10,r10,r3			@ h+=K256[i]
1418	eor	r2,r2,r9			@ Ch(e,f,g)
1419	eor	r0,r11,r11,ror#11
1420	add	r10,r10,r2			@ h+=Ch(e,f,g)
1421#if 25==31
1422	and	r3,r3,#0xff
1423	cmp	r3,#0xf2			@ done?
1424#endif
1425#if 25<15
1426# if __ARM_ARCH__>=7
1427	ldr	r2,[r1],#4			@ prefetch
1428# else
1429	ldrb	r2,[r1,#3]
1430# endif
1431	eor	r3,r11,r4			@ a^b, b^c in next round
1432#else
1433	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
1434	eor	r3,r11,r4			@ a^b, b^c in next round
1435	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
1436#endif
1437	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1438	and	r12,r12,r3			@ (b^c)&=(a^b)
1439	add	r6,r6,r10			@ d+=h
1440	eor	r12,r12,r4			@ Maj(a,b,c)
1441	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1442	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1443	@ ldr	r2,[sp,#11*4]		@ 26
1444	@ ldr	r1,[sp,#8*4]
1445	mov	r0,r2,ror#7
1446	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1447	mov	r12,r1,ror#17
1448	eor	r0,r0,r2,ror#18
1449	eor	r12,r12,r1,ror#19
1450	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1451	ldr	r2,[sp,#10*4]
1452	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1453	ldr	r1,[sp,#3*4]
1454
1455	add	r12,r12,r0
1456	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1457	add	r2,r2,r12
1458	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1459	add	r2,r2,r1			@ X[i]
1460	ldr	r12,[r14],#4			@ *K256++
1461	add	r9,r9,r2			@ h+=X[i]
1462	str	r2,[sp,#10*4]
1463	eor	r2,r7,r8
1464	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1465	and	r2,r2,r6
1466	add	r9,r9,r12			@ h+=K256[i]
1467	eor	r2,r2,r8			@ Ch(e,f,g)
1468	eor	r0,r10,r10,ror#11
1469	add	r9,r9,r2			@ h+=Ch(e,f,g)
1470#if 26==31
1471	and	r12,r12,#0xff
1472	cmp	r12,#0xf2			@ done?
1473#endif
1474#if 26<15
1475# if __ARM_ARCH__>=7
1476	ldr	r2,[r1],#4			@ prefetch
1477# else
1478	ldrb	r2,[r1,#3]
1479# endif
1480	eor	r12,r10,r11			@ a^b, b^c in next round
1481#else
1482	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
1483	eor	r12,r10,r11			@ a^b, b^c in next round
1484	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
1485#endif
1486	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1487	and	r3,r3,r12			@ (b^c)&=(a^b)
1488	add	r5,r5,r9			@ d+=h
1489	eor	r3,r3,r11			@ Maj(a,b,c)
1490	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1491	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1492	@ ldr	r2,[sp,#12*4]		@ 27
1493	@ ldr	r1,[sp,#9*4]
1494	mov	r0,r2,ror#7
1495	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1496	mov	r3,r1,ror#17
1497	eor	r0,r0,r2,ror#18
1498	eor	r3,r3,r1,ror#19
1499	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1500	ldr	r2,[sp,#11*4]
1501	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1502	ldr	r1,[sp,#4*4]
1503
1504	add	r3,r3,r0
1505	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1506	add	r2,r2,r3
1507	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1508	add	r2,r2,r1			@ X[i]
1509	ldr	r3,[r14],#4			@ *K256++
1510	add	r8,r8,r2			@ h+=X[i]
1511	str	r2,[sp,#11*4]
1512	eor	r2,r6,r7
1513	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1514	and	r2,r2,r5
1515	add	r8,r8,r3			@ h+=K256[i]
1516	eor	r2,r2,r7			@ Ch(e,f,g)
1517	eor	r0,r9,r9,ror#11
1518	add	r8,r8,r2			@ h+=Ch(e,f,g)
1519#if 27==31
1520	and	r3,r3,#0xff
1521	cmp	r3,#0xf2			@ done?
1522#endif
1523#if 27<15
1524# if __ARM_ARCH__>=7
1525	ldr	r2,[r1],#4			@ prefetch
1526# else
1527	ldrb	r2,[r1,#3]
1528# endif
1529	eor	r3,r9,r10			@ a^b, b^c in next round
1530#else
1531	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
1532	eor	r3,r9,r10			@ a^b, b^c in next round
1533	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
1534#endif
1535	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1536	and	r12,r12,r3			@ (b^c)&=(a^b)
1537	add	r4,r4,r8			@ d+=h
1538	eor	r12,r12,r10			@ Maj(a,b,c)
1539	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1540	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1541	@ ldr	r2,[sp,#13*4]		@ 28
1542	@ ldr	r1,[sp,#10*4]
1543	mov	r0,r2,ror#7
1544	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1545	mov	r12,r1,ror#17
1546	eor	r0,r0,r2,ror#18
1547	eor	r12,r12,r1,ror#19
1548	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1549	ldr	r2,[sp,#12*4]
1550	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1551	ldr	r1,[sp,#5*4]
1552
1553	add	r12,r12,r0
1554	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1555	add	r2,r2,r12
1556	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1557	add	r2,r2,r1			@ X[i]
1558	ldr	r12,[r14],#4			@ *K256++
1559	add	r7,r7,r2			@ h+=X[i]
1560	str	r2,[sp,#12*4]
1561	eor	r2,r5,r6
1562	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1563	and	r2,r2,r4
1564	add	r7,r7,r12			@ h+=K256[i]
1565	eor	r2,r2,r6			@ Ch(e,f,g)
1566	eor	r0,r8,r8,ror#11
1567	add	r7,r7,r2			@ h+=Ch(e,f,g)
1568#if 28==31
1569	and	r12,r12,#0xff
1570	cmp	r12,#0xf2			@ done?
1571#endif
1572#if 28<15
1573# if __ARM_ARCH__>=7
1574	ldr	r2,[r1],#4			@ prefetch
1575# else
1576	ldrb	r2,[r1,#3]
1577# endif
1578	eor	r12,r8,r9			@ a^b, b^c in next round
1579#else
1580	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
1581	eor	r12,r8,r9			@ a^b, b^c in next round
1582	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
1583#endif
1584	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1585	and	r3,r3,r12			@ (b^c)&=(a^b)
1586	add	r11,r11,r7			@ d+=h
1587	eor	r3,r3,r9			@ Maj(a,b,c)
1588	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1589	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1590	@ ldr	r2,[sp,#14*4]		@ 29
1591	@ ldr	r1,[sp,#11*4]
1592	mov	r0,r2,ror#7
1593	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1594	mov	r3,r1,ror#17
1595	eor	r0,r0,r2,ror#18
1596	eor	r3,r3,r1,ror#19
1597	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1598	ldr	r2,[sp,#13*4]
1599	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1600	ldr	r1,[sp,#6*4]
1601
1602	add	r3,r3,r0
1603	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1604	add	r2,r2,r3
1605	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1606	add	r2,r2,r1			@ X[i]
1607	ldr	r3,[r14],#4			@ *K256++
1608	add	r6,r6,r2			@ h+=X[i]
1609	str	r2,[sp,#13*4]
1610	eor	r2,r4,r5
1611	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1612	and	r2,r2,r11
1613	add	r6,r6,r3			@ h+=K256[i]
1614	eor	r2,r2,r5			@ Ch(e,f,g)
1615	eor	r0,r7,r7,ror#11
1616	add	r6,r6,r2			@ h+=Ch(e,f,g)
1617#if 29==31
1618	and	r3,r3,#0xff
1619	cmp	r3,#0xf2			@ done?
1620#endif
1621#if 29<15
1622# if __ARM_ARCH__>=7
1623	ldr	r2,[r1],#4			@ prefetch
1624# else
1625	ldrb	r2,[r1,#3]
1626# endif
1627	eor	r3,r7,r8			@ a^b, b^c in next round
1628#else
1629	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
1630	eor	r3,r7,r8			@ a^b, b^c in next round
1631	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
1632#endif
1633	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1634	and	r12,r12,r3			@ (b^c)&=(a^b)
1635	add	r10,r10,r6			@ d+=h
1636	eor	r12,r12,r8			@ Maj(a,b,c)
1637	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1638	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1639	@ ldr	r2,[sp,#15*4]		@ 30
1640	@ ldr	r1,[sp,#12*4]
1641	mov	r0,r2,ror#7
1642	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1643	mov	r12,r1,ror#17
1644	eor	r0,r0,r2,ror#18
1645	eor	r12,r12,r1,ror#19
1646	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1647	ldr	r2,[sp,#14*4]
1648	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1649	ldr	r1,[sp,#7*4]
1650
1651	add	r12,r12,r0
1652	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1653	add	r2,r2,r12
1654	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1655	add	r2,r2,r1			@ X[i]
1656	ldr	r12,[r14],#4			@ *K256++
1657	add	r5,r5,r2			@ h+=X[i]
1658	str	r2,[sp,#14*4]
1659	eor	r2,r11,r4
1660	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1661	and	r2,r2,r10
1662	add	r5,r5,r12			@ h+=K256[i]
1663	eor	r2,r2,r4			@ Ch(e,f,g)
1664	eor	r0,r6,r6,ror#11
1665	add	r5,r5,r2			@ h+=Ch(e,f,g)
1666#if 30==31
1667	and	r12,r12,#0xff
1668	cmp	r12,#0xf2			@ done?
1669#endif
1670#if 30<15
1671# if __ARM_ARCH__>=7
1672	ldr	r2,[r1],#4			@ prefetch
1673# else
1674	ldrb	r2,[r1,#3]
1675# endif
1676	eor	r12,r6,r7			@ a^b, b^c in next round
1677#else
1678	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1679	eor	r12,r6,r7			@ a^b, b^c in next round
1680	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1681#endif
1682	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1683	and	r3,r3,r12			@ (b^c)&=(a^b)
1684	add	r9,r9,r5			@ d+=h
1685	eor	r3,r3,r7			@ Maj(a,b,c)
1686	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1687	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1688	@ ldr	r2,[sp,#0*4]		@ 31
1689	@ ldr	r1,[sp,#13*4]
1690	mov	r0,r2,ror#7
1691	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1692	mov	r3,r1,ror#17
1693	eor	r0,r0,r2,ror#18
1694	eor	r3,r3,r1,ror#19
1695	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1696	ldr	r2,[sp,#15*4]
1697	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1698	ldr	r1,[sp,#8*4]
1699
1700	add	r3,r3,r0
1701	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1702	add	r2,r2,r3
1703	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1704	add	r2,r2,r1			@ X[i]
1705	ldr	r3,[r14],#4			@ *K256++
1706	add	r4,r4,r2			@ h+=X[i]
1707	str	r2,[sp,#15*4]
1708	eor	r2,r10,r11
1709	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1710	and	r2,r2,r9
1711	add	r4,r4,r3			@ h+=K256[i]
1712	eor	r2,r2,r11			@ Ch(e,f,g)
1713	eor	r0,r5,r5,ror#11
1714	add	r4,r4,r2			@ h+=Ch(e,f,g)
1715#if 31==31
1716	and	r3,r3,#0xff
1717	cmp	r3,#0xf2			@ done?
1718#endif
1719#if 31<15
1720# if __ARM_ARCH__>=7
1721	ldr	r2,[r1],#4			@ prefetch
1722# else
1723	ldrb	r2,[r1,#3]
1724# endif
1725	eor	r3,r5,r6			@ a^b, b^c in next round
1726#else
1727	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1728	eor	r3,r5,r6			@ a^b, b^c in next round
1729	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1730#endif
1731	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1732	and	r12,r12,r3			@ (b^c)&=(a^b)
1733	add	r8,r8,r4			@ d+=h
1734	eor	r12,r12,r6			@ Maj(a,b,c)
1735	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1736	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1737	ldreq	r3,[sp,#16*4]		@ pull ctx
1738	bne	.Lrounds_16_xx
1739
1740	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
1741	ldr	r0,[r3,#0]
1742	ldr	r2,[r3,#4]
1743	ldr	r12,[r3,#8]
1744	add	r4,r4,r0
1745	ldr	r0,[r3,#12]
1746	add	r5,r5,r2
1747	ldr	r2,[r3,#16]
1748	add	r6,r6,r12
1749	ldr	r12,[r3,#20]
1750	add	r7,r7,r0
1751	ldr	r0,[r3,#24]
1752	add	r8,r8,r2
1753	ldr	r2,[r3,#28]
1754	add	r9,r9,r12
1755	ldr	r1,[sp,#17*4]		@ pull inp
1756	ldr	r12,[sp,#18*4]		@ pull inp+len
1757	add	r10,r10,r0
1758	add	r11,r11,r2
1759	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1760	cmp	r1,r12
1761	sub	r14,r14,#256	@ rewind Ktbl
1762	bne	.Loop
1763
1764	add	sp,sp,#19*4	@ destroy frame
1765#if __ARM_ARCH__>=5
1766	ldmia	sp!,{r4-r11,pc}
1767#else
1768	ldmia	sp!,{r4-r11,lr}
1769	tst	lr,#1
1770	moveq	pc,lr			@ be binary compatible with V4, yet
1771	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
1772#endif
1773.size	sha256_block_data_order,.-sha256_block_data_order
1774#if __ARM_ARCH__>=7
1775.fpu	neon
1776
1777.type	sha256_block_data_order_neon,%function
1778.align	4
1779sha256_block_data_order_neon:
1780.LNEON:
1781	stmdb	sp!,{r4-r12,lr}
1782
1783	mov	r12,sp
1784	sub	sp,sp,#16*4+16		@ alloca
1785	sub	r14,r3,#256+32	@ K256
1786	bic	sp,sp,#15		@ align for 128-bit stores
1787
1788	vld1.8		{q0},[r1]!
1789	vld1.8		{q1},[r1]!
1790	vld1.8		{q2},[r1]!
1791	vld1.8		{q3},[r1]!
1792	vld1.32		{q8},[r14,:128]!
1793	vld1.32		{q9},[r14,:128]!
1794	vld1.32		{q10},[r14,:128]!
1795	vld1.32		{q11},[r14,:128]!
1796	vrev32.8	q0,q0		@ yes, even on
1797	str		r0,[sp,#64]
1798	vrev32.8	q1,q1		@ big-endian
1799	str		r1,[sp,#68]
1800	mov		r1,sp
1801	vrev32.8	q2,q2
1802	str		r2,[sp,#72]
1803	vrev32.8	q3,q3
1804	str		r12,[sp,#76]		@ save original sp
1805	vadd.i32	q8,q8,q0
1806	vadd.i32	q9,q9,q1
1807	vst1.32		{q8},[r1,:128]!
1808	vadd.i32	q10,q10,q2
1809	vst1.32		{q9},[r1,:128]!
1810	vadd.i32	q11,q11,q3
1811	vst1.32		{q10},[r1,:128]!
1812	vst1.32		{q11},[r1,:128]!
1813
1814	ldmia		r0,{r4-r11}
1815	sub		r1,r1,#64
1816	ldr		r2,[sp,#0]
1817	eor		r12,r12,r12
1818	eor		r3,r5,r6
1819	b		.L_00_48
1820
1821.align	4
1822.L_00_48:
1823	vext.8	q8,q0,q1,#4
1824	add	r11,r11,r2
1825	eor	r2,r9,r10
1826	eor	r0,r8,r8,ror#5
1827	vext.8	q9,q2,q3,#4
1828	add	r4,r4,r12
1829	and	r2,r2,r8
1830	eor	r12,r0,r8,ror#19
1831	vshr.u32	q10,q8,#7
1832	eor	r0,r4,r4,ror#11
1833	eor	r2,r2,r10
1834	vadd.i32	q0,q0,q9
1835	add	r11,r11,r12,ror#6
1836	eor	r12,r4,r5
1837	vshr.u32	q9,q8,#3
1838	eor	r0,r0,r4,ror#20
1839	add	r11,r11,r2
1840	vsli.32	q10,q8,#25
1841	ldr	r2,[sp,#4]
1842	and	r3,r3,r12
1843	vshr.u32	q11,q8,#18
1844	add	r7,r7,r11
1845	add	r11,r11,r0,ror#2
1846	eor	r3,r3,r5
1847	veor	q9,q9,q10
1848	add	r10,r10,r2
1849	vsli.32	q11,q8,#14
1850	eor	r2,r8,r9
1851	eor	r0,r7,r7,ror#5
1852	vshr.u32	d24,d7,#17
1853	add	r11,r11,r3
1854	and	r2,r2,r7
1855	veor	q9,q9,q11
1856	eor	r3,r0,r7,ror#19
1857	eor	r0,r11,r11,ror#11
1858	vsli.32	d24,d7,#15
1859	eor	r2,r2,r9
1860	add	r10,r10,r3,ror#6
1861	vshr.u32	d25,d7,#10
1862	eor	r3,r11,r4
1863	eor	r0,r0,r11,ror#20
1864	vadd.i32	q0,q0,q9
1865	add	r10,r10,r2
1866	ldr	r2,[sp,#8]
1867	veor	d25,d25,d24
1868	and	r12,r12,r3
1869	add	r6,r6,r10
1870	vshr.u32	d24,d7,#19
1871	add	r10,r10,r0,ror#2
1872	eor	r12,r12,r4
1873	vsli.32	d24,d7,#13
1874	add	r9,r9,r2
1875	eor	r2,r7,r8
1876	veor	d25,d25,d24
1877	eor	r0,r6,r6,ror#5
1878	add	r10,r10,r12
1879	vadd.i32	d0,d0,d25
1880	and	r2,r2,r6
1881	eor	r12,r0,r6,ror#19
1882	vshr.u32	d24,d0,#17
1883	eor	r0,r10,r10,ror#11
1884	eor	r2,r2,r8
1885	vsli.32	d24,d0,#15
1886	add	r9,r9,r12,ror#6
1887	eor	r12,r10,r11
1888	vshr.u32	d25,d0,#10
1889	eor	r0,r0,r10,ror#20
1890	add	r9,r9,r2
1891	veor	d25,d25,d24
1892	ldr	r2,[sp,#12]
1893	and	r3,r3,r12
1894	vshr.u32	d24,d0,#19
1895	add	r5,r5,r9
1896	add	r9,r9,r0,ror#2
1897	eor	r3,r3,r11
1898	vld1.32	{q8},[r14,:128]!
1899	add	r8,r8,r2
1900	vsli.32	d24,d0,#13
1901	eor	r2,r6,r7
1902	eor	r0,r5,r5,ror#5
1903	veor	d25,d25,d24
1904	add	r9,r9,r3
1905	and	r2,r2,r5
1906	vadd.i32	d1,d1,d25
1907	eor	r3,r0,r5,ror#19
1908	eor	r0,r9,r9,ror#11
1909	vadd.i32	q8,q8,q0
1910	eor	r2,r2,r7
1911	add	r8,r8,r3,ror#6
1912	eor	r3,r9,r10
1913	eor	r0,r0,r9,ror#20
1914	add	r8,r8,r2
1915	ldr	r2,[sp,#16]
1916	and	r12,r12,r3
1917	add	r4,r4,r8
1918	vst1.32	{q8},[r1,:128]!
1919	add	r8,r8,r0,ror#2
1920	eor	r12,r12,r10
1921	vext.8	q8,q1,q2,#4
1922	add	r7,r7,r2
1923	eor	r2,r5,r6
1924	eor	r0,r4,r4,ror#5
1925	vext.8	q9,q3,q0,#4
1926	add	r8,r8,r12
1927	and	r2,r2,r4
1928	eor	r12,r0,r4,ror#19
1929	vshr.u32	q10,q8,#7
1930	eor	r0,r8,r8,ror#11
1931	eor	r2,r2,r6
1932	vadd.i32	q1,q1,q9
1933	add	r7,r7,r12,ror#6
1934	eor	r12,r8,r9
1935	vshr.u32	q9,q8,#3
1936	eor	r0,r0,r8,ror#20
1937	add	r7,r7,r2
1938	vsli.32	q10,q8,#25
1939	ldr	r2,[sp,#20]
1940	and	r3,r3,r12
1941	vshr.u32	q11,q8,#18
1942	add	r11,r11,r7
1943	add	r7,r7,r0,ror#2
1944	eor	r3,r3,r9
1945	veor	q9,q9,q10
1946	add	r6,r6,r2
1947	vsli.32	q11,q8,#14
1948	eor	r2,r4,r5
1949	eor	r0,r11,r11,ror#5
1950	vshr.u32	d24,d1,#17
1951	add	r7,r7,r3
1952	and	r2,r2,r11
1953	veor	q9,q9,q11
1954	eor	r3,r0,r11,ror#19
1955	eor	r0,r7,r7,ror#11
1956	vsli.32	d24,d1,#15
1957	eor	r2,r2,r5
1958	add	r6,r6,r3,ror#6
1959	vshr.u32	d25,d1,#10
1960	eor	r3,r7,r8
1961	eor	r0,r0,r7,ror#20
1962	vadd.i32	q1,q1,q9
1963	add	r6,r6,r2
1964	ldr	r2,[sp,#24]
1965	veor	d25,d25,d24
1966	and	r12,r12,r3
1967	add	r10,r10,r6
1968	vshr.u32	d24,d1,#19
1969	add	r6,r6,r0,ror#2
1970	eor	r12,r12,r8
1971	vsli.32	d24,d1,#13
1972	add	r5,r5,r2
1973	eor	r2,r11,r4
1974	veor	d25,d25,d24
1975	eor	r0,r10,r10,ror#5
1976	add	r6,r6,r12
1977	vadd.i32	d2,d2,d25
1978	and	r2,r2,r10
1979	eor	r12,r0,r10,ror#19
1980	vshr.u32	d24,d2,#17
1981	eor	r0,r6,r6,ror#11
1982	eor	r2,r2,r4
1983	vsli.32	d24,d2,#15
1984	add	r5,r5,r12,ror#6
1985	eor	r12,r6,r7
1986	vshr.u32	d25,d2,#10
1987	eor	r0,r0,r6,ror#20
1988	add	r5,r5,r2
1989	veor	d25,d25,d24
1990	ldr	r2,[sp,#28]
1991	and	r3,r3,r12
1992	vshr.u32	d24,d2,#19
1993	add	r9,r9,r5
1994	add	r5,r5,r0,ror#2
1995	eor	r3,r3,r7
1996	vld1.32	{q8},[r14,:128]!
1997	add	r4,r4,r2
1998	vsli.32	d24,d2,#13
1999	eor	r2,r10,r11
2000	eor	r0,r9,r9,ror#5
2001	veor	d25,d25,d24
2002	add	r5,r5,r3
2003	and	r2,r2,r9
2004	vadd.i32	d3,d3,d25
2005	eor	r3,r0,r9,ror#19
2006	eor	r0,r5,r5,ror#11
2007	vadd.i32	q8,q8,q1
2008	eor	r2,r2,r11
2009	add	r4,r4,r3,ror#6
2010	eor	r3,r5,r6
2011	eor	r0,r0,r5,ror#20
2012	add	r4,r4,r2
2013	ldr	r2,[sp,#32]
2014	and	r12,r12,r3
2015	add	r8,r8,r4
2016	vst1.32	{q8},[r1,:128]!
2017	add	r4,r4,r0,ror#2
2018	eor	r12,r12,r6
2019	vext.8	q8,q2,q3,#4
2020	add	r11,r11,r2
2021	eor	r2,r9,r10
2022	eor	r0,r8,r8,ror#5
2023	vext.8	q9,q0,q1,#4
2024	add	r4,r4,r12
2025	and	r2,r2,r8
2026	eor	r12,r0,r8,ror#19
2027	vshr.u32	q10,q8,#7
2028	eor	r0,r4,r4,ror#11
2029	eor	r2,r2,r10
2030	vadd.i32	q2,q2,q9
2031	add	r11,r11,r12,ror#6
2032	eor	r12,r4,r5
2033	vshr.u32	q9,q8,#3
2034	eor	r0,r0,r4,ror#20
2035	add	r11,r11,r2
2036	vsli.32	q10,q8,#25
2037	ldr	r2,[sp,#36]
2038	and	r3,r3,r12
2039	vshr.u32	q11,q8,#18
2040	add	r7,r7,r11
2041	add	r11,r11,r0,ror#2
2042	eor	r3,r3,r5
2043	veor	q9,q9,q10
2044	add	r10,r10,r2
2045	vsli.32	q11,q8,#14
2046	eor	r2,r8,r9
2047	eor	r0,r7,r7,ror#5
2048	vshr.u32	d24,d3,#17
2049	add	r11,r11,r3
2050	and	r2,r2,r7
2051	veor	q9,q9,q11
2052	eor	r3,r0,r7,ror#19
2053	eor	r0,r11,r11,ror#11
2054	vsli.32	d24,d3,#15
2055	eor	r2,r2,r9
2056	add	r10,r10,r3,ror#6
2057	vshr.u32	d25,d3,#10
2058	eor	r3,r11,r4
2059	eor	r0,r0,r11,ror#20
2060	vadd.i32	q2,q2,q9
2061	add	r10,r10,r2
2062	ldr	r2,[sp,#40]
2063	veor	d25,d25,d24
2064	and	r12,r12,r3
2065	add	r6,r6,r10
2066	vshr.u32	d24,d3,#19
2067	add	r10,r10,r0,ror#2
2068	eor	r12,r12,r4
2069	vsli.32	d24,d3,#13
2070	add	r9,r9,r2
2071	eor	r2,r7,r8
2072	veor	d25,d25,d24
2073	eor	r0,r6,r6,ror#5
2074	add	r10,r10,r12
2075	vadd.i32	d4,d4,d25
2076	and	r2,r2,r6
2077	eor	r12,r0,r6,ror#19
2078	vshr.u32	d24,d4,#17
2079	eor	r0,r10,r10,ror#11
2080	eor	r2,r2,r8
2081	vsli.32	d24,d4,#15
2082	add	r9,r9,r12,ror#6
2083	eor	r12,r10,r11
2084	vshr.u32	d25,d4,#10
2085	eor	r0,r0,r10,ror#20
2086	add	r9,r9,r2
2087	veor	d25,d25,d24
2088	ldr	r2,[sp,#44]
2089	and	r3,r3,r12
2090	vshr.u32	d24,d4,#19
2091	add	r5,r5,r9
2092	add	r9,r9,r0,ror#2
2093	eor	r3,r3,r11
2094	vld1.32	{q8},[r14,:128]!
2095	add	r8,r8,r2
2096	vsli.32	d24,d4,#13
2097	eor	r2,r6,r7
2098	eor	r0,r5,r5,ror#5
2099	veor	d25,d25,d24
2100	add	r9,r9,r3
2101	and	r2,r2,r5
2102	vadd.i32	d5,d5,d25
2103	eor	r3,r0,r5,ror#19
2104	eor	r0,r9,r9,ror#11
2105	vadd.i32	q8,q8,q2
2106	eor	r2,r2,r7
2107	add	r8,r8,r3,ror#6
2108	eor	r3,r9,r10
2109	eor	r0,r0,r9,ror#20
2110	add	r8,r8,r2
2111	ldr	r2,[sp,#48]
2112	and	r12,r12,r3
2113	add	r4,r4,r8
2114	vst1.32	{q8},[r1,:128]!
2115	add	r8,r8,r0,ror#2
2116	eor	r12,r12,r10
2117	vext.8	q8,q3,q0,#4
2118	add	r7,r7,r2
2119	eor	r2,r5,r6
2120	eor	r0,r4,r4,ror#5
2121	vext.8	q9,q1,q2,#4
2122	add	r8,r8,r12
2123	and	r2,r2,r4
2124	eor	r12,r0,r4,ror#19
2125	vshr.u32	q10,q8,#7
2126	eor	r0,r8,r8,ror#11
2127	eor	r2,r2,r6
2128	vadd.i32	q3,q3,q9
2129	add	r7,r7,r12,ror#6
2130	eor	r12,r8,r9
2131	vshr.u32	q9,q8,#3
2132	eor	r0,r0,r8,ror#20
2133	add	r7,r7,r2
2134	vsli.32	q10,q8,#25
2135	ldr	r2,[sp,#52]
2136	and	r3,r3,r12
2137	vshr.u32	q11,q8,#18
2138	add	r11,r11,r7
2139	add	r7,r7,r0,ror#2
2140	eor	r3,r3,r9
2141	veor	q9,q9,q10
2142	add	r6,r6,r2
2143	vsli.32	q11,q8,#14
2144	eor	r2,r4,r5
2145	eor	r0,r11,r11,ror#5
2146	vshr.u32	d24,d5,#17
2147	add	r7,r7,r3
2148	and	r2,r2,r11
2149	veor	q9,q9,q11
2150	eor	r3,r0,r11,ror#19
2151	eor	r0,r7,r7,ror#11
2152	vsli.32	d24,d5,#15
2153	eor	r2,r2,r5
2154	add	r6,r6,r3,ror#6
2155	vshr.u32	d25,d5,#10
2156	eor	r3,r7,r8
2157	eor	r0,r0,r7,ror#20
2158	vadd.i32	q3,q3,q9
2159	add	r6,r6,r2
2160	ldr	r2,[sp,#56]
2161	veor	d25,d25,d24
2162	and	r12,r12,r3
2163	add	r10,r10,r6
2164	vshr.u32	d24,d5,#19
2165	add	r6,r6,r0,ror#2
2166	eor	r12,r12,r8
2167	vsli.32	d24,d5,#13
2168	add	r5,r5,r2
2169	eor	r2,r11,r4
2170	veor	d25,d25,d24
2171	eor	r0,r10,r10,ror#5
2172	add	r6,r6,r12
2173	vadd.i32	d6,d6,d25
2174	and	r2,r2,r10
2175	eor	r12,r0,r10,ror#19
2176	vshr.u32	d24,d6,#17
2177	eor	r0,r6,r6,ror#11
2178	eor	r2,r2,r4
2179	vsli.32	d24,d6,#15
2180	add	r5,r5,r12,ror#6
2181	eor	r12,r6,r7
2182	vshr.u32	d25,d6,#10
2183	eor	r0,r0,r6,ror#20
2184	add	r5,r5,r2
2185	veor	d25,d25,d24
2186	ldr	r2,[sp,#60]
2187	and	r3,r3,r12
2188	vshr.u32	d24,d6,#19
2189	add	r9,r9,r5
2190	add	r5,r5,r0,ror#2
2191	eor	r3,r3,r7
2192	vld1.32	{q8},[r14,:128]!
2193	add	r4,r4,r2
2194	vsli.32	d24,d6,#13
2195	eor	r2,r10,r11
2196	eor	r0,r9,r9,ror#5
2197	veor	d25,d25,d24
2198	add	r5,r5,r3
2199	and	r2,r2,r9
2200	vadd.i32	d7,d7,d25
2201	eor	r3,r0,r9,ror#19
2202	eor	r0,r5,r5,ror#11
2203	vadd.i32	q8,q8,q3
2204	eor	r2,r2,r11
2205	add	r4,r4,r3,ror#6
2206	eor	r3,r5,r6
2207	eor	r0,r0,r5,ror#20
2208	add	r4,r4,r2
2209	ldr	r2,[r14]
2210	and	r12,r12,r3
2211	add	r8,r8,r4
2212	vst1.32	{q8},[r1,:128]!
2213	add	r4,r4,r0,ror#2
2214	eor	r12,r12,r6
2215	teq	r2,#0				@ check for K256 terminator
2216	ldr	r2,[sp,#0]
2217	sub	r1,r1,#64
2218	bne	.L_00_48
2219
2220	ldr		r1,[sp,#68]
2221	ldr		r0,[sp,#72]
2222	sub		r14,r14,#256	@ rewind r14
2223	teq		r1,r0
2224	subeq		r1,r1,#64		@ avoid SEGV
2225	vld1.8		{q0},[r1]!		@ load next input block
2226	vld1.8		{q1},[r1]!
2227	vld1.8		{q2},[r1]!
2228	vld1.8		{q3},[r1]!
2229	strne		r1,[sp,#68]
2230	mov		r1,sp
2231	add	r11,r11,r2
2232	eor	r2,r9,r10
2233	eor	r0,r8,r8,ror#5
2234	add	r4,r4,r12
2235	vld1.32	{q8},[r14,:128]!
2236	and	r2,r2,r8
2237	eor	r12,r0,r8,ror#19
2238	eor	r0,r4,r4,ror#11
2239	eor	r2,r2,r10
2240	vrev32.8	q0,q0
2241	add	r11,r11,r12,ror#6
2242	eor	r12,r4,r5
2243	eor	r0,r0,r4,ror#20
2244	add	r11,r11,r2
2245	vadd.i32	q8,q8,q0
2246	ldr	r2,[sp,#4]
2247	and	r3,r3,r12
2248	add	r7,r7,r11
2249	add	r11,r11,r0,ror#2
2250	eor	r3,r3,r5
2251	add	r10,r10,r2
2252	eor	r2,r8,r9
2253	eor	r0,r7,r7,ror#5
2254	add	r11,r11,r3
2255	and	r2,r2,r7
2256	eor	r3,r0,r7,ror#19
2257	eor	r0,r11,r11,ror#11
2258	eor	r2,r2,r9
2259	add	r10,r10,r3,ror#6
2260	eor	r3,r11,r4
2261	eor	r0,r0,r11,ror#20
2262	add	r10,r10,r2
2263	ldr	r2,[sp,#8]
2264	and	r12,r12,r3
2265	add	r6,r6,r10
2266	add	r10,r10,r0,ror#2
2267	eor	r12,r12,r4
2268	add	r9,r9,r2
2269	eor	r2,r7,r8
2270	eor	r0,r6,r6,ror#5
2271	add	r10,r10,r12
2272	and	r2,r2,r6
2273	eor	r12,r0,r6,ror#19
2274	eor	r0,r10,r10,ror#11
2275	eor	r2,r2,r8
2276	add	r9,r9,r12,ror#6
2277	eor	r12,r10,r11
2278	eor	r0,r0,r10,ror#20
2279	add	r9,r9,r2
2280	ldr	r2,[sp,#12]
2281	and	r3,r3,r12
2282	add	r5,r5,r9
2283	add	r9,r9,r0,ror#2
2284	eor	r3,r3,r11
2285	add	r8,r8,r2
2286	eor	r2,r6,r7
2287	eor	r0,r5,r5,ror#5
2288	add	r9,r9,r3
2289	and	r2,r2,r5
2290	eor	r3,r0,r5,ror#19
2291	eor	r0,r9,r9,ror#11
2292	eor	r2,r2,r7
2293	add	r8,r8,r3,ror#6
2294	eor	r3,r9,r10
2295	eor	r0,r0,r9,ror#20
2296	add	r8,r8,r2
2297	ldr	r2,[sp,#16]
2298	and	r12,r12,r3
2299	add	r4,r4,r8
2300	add	r8,r8,r0,ror#2
2301	eor	r12,r12,r10
2302	vst1.32	{q8},[r1,:128]!
2303	add	r7,r7,r2
2304	eor	r2,r5,r6
2305	eor	r0,r4,r4,ror#5
2306	add	r8,r8,r12
2307	vld1.32	{q8},[r14,:128]!
2308	and	r2,r2,r4
2309	eor	r12,r0,r4,ror#19
2310	eor	r0,r8,r8,ror#11
2311	eor	r2,r2,r6
2312	vrev32.8	q1,q1
2313	add	r7,r7,r12,ror#6
2314	eor	r12,r8,r9
2315	eor	r0,r0,r8,ror#20
2316	add	r7,r7,r2
2317	vadd.i32	q8,q8,q1
2318	ldr	r2,[sp,#20]
2319	and	r3,r3,r12
2320	add	r11,r11,r7
2321	add	r7,r7,r0,ror#2
2322	eor	r3,r3,r9
2323	add	r6,r6,r2
2324	eor	r2,r4,r5
2325	eor	r0,r11,r11,ror#5
2326	add	r7,r7,r3
2327	and	r2,r2,r11
2328	eor	r3,r0,r11,ror#19
2329	eor	r0,r7,r7,ror#11
2330	eor	r2,r2,r5
2331	add	r6,r6,r3,ror#6
2332	eor	r3,r7,r8
2333	eor	r0,r0,r7,ror#20
2334	add	r6,r6,r2
2335	ldr	r2,[sp,#24]
2336	and	r12,r12,r3
2337	add	r10,r10,r6
2338	add	r6,r6,r0,ror#2
2339	eor	r12,r12,r8
2340	add	r5,r5,r2
2341	eor	r2,r11,r4
2342	eor	r0,r10,r10,ror#5
2343	add	r6,r6,r12
2344	and	r2,r2,r10
2345	eor	r12,r0,r10,ror#19
2346	eor	r0,r6,r6,ror#11
2347	eor	r2,r2,r4
2348	add	r5,r5,r12,ror#6
2349	eor	r12,r6,r7
2350	eor	r0,r0,r6,ror#20
2351	add	r5,r5,r2
2352	ldr	r2,[sp,#28]
2353	and	r3,r3,r12
2354	add	r9,r9,r5
2355	add	r5,r5,r0,ror#2
2356	eor	r3,r3,r7
2357	add	r4,r4,r2
2358	eor	r2,r10,r11
2359	eor	r0,r9,r9,ror#5
2360	add	r5,r5,r3
2361	and	r2,r2,r9
2362	eor	r3,r0,r9,ror#19
2363	eor	r0,r5,r5,ror#11
2364	eor	r2,r2,r11
2365	add	r4,r4,r3,ror#6
2366	eor	r3,r5,r6
2367	eor	r0,r0,r5,ror#20
2368	add	r4,r4,r2
2369	ldr	r2,[sp,#32]
2370	and	r12,r12,r3
2371	add	r8,r8,r4
2372	add	r4,r4,r0,ror#2
2373	eor	r12,r12,r6
2374	vst1.32	{q8},[r1,:128]!
2375	add	r11,r11,r2
2376	eor	r2,r9,r10
2377	eor	r0,r8,r8,ror#5
2378	add	r4,r4,r12
2379	vld1.32	{q8},[r14,:128]!
2380	and	r2,r2,r8
2381	eor	r12,r0,r8,ror#19
2382	eor	r0,r4,r4,ror#11
2383	eor	r2,r2,r10
2384	vrev32.8	q2,q2
2385	add	r11,r11,r12,ror#6
2386	eor	r12,r4,r5
2387	eor	r0,r0,r4,ror#20
2388	add	r11,r11,r2
2389	vadd.i32	q8,q8,q2
2390	ldr	r2,[sp,#36]
2391	and	r3,r3,r12
2392	add	r7,r7,r11
2393	add	r11,r11,r0,ror#2
2394	eor	r3,r3,r5
2395	add	r10,r10,r2
2396	eor	r2,r8,r9
2397	eor	r0,r7,r7,ror#5
2398	add	r11,r11,r3
2399	and	r2,r2,r7
2400	eor	r3,r0,r7,ror#19
2401	eor	r0,r11,r11,ror#11
2402	eor	r2,r2,r9
2403	add	r10,r10,r3,ror#6
2404	eor	r3,r11,r4
2405	eor	r0,r0,r11,ror#20
2406	add	r10,r10,r2
2407	ldr	r2,[sp,#40]
2408	and	r12,r12,r3
2409	add	r6,r6,r10
2410	add	r10,r10,r0,ror#2
2411	eor	r12,r12,r4
2412	add	r9,r9,r2
2413	eor	r2,r7,r8
2414	eor	r0,r6,r6,ror#5
2415	add	r10,r10,r12
2416	and	r2,r2,r6
2417	eor	r12,r0,r6,ror#19
2418	eor	r0,r10,r10,ror#11
2419	eor	r2,r2,r8
2420	add	r9,r9,r12,ror#6
2421	eor	r12,r10,r11
2422	eor	r0,r0,r10,ror#20
2423	add	r9,r9,r2
2424	ldr	r2,[sp,#44]
2425	and	r3,r3,r12
2426	add	r5,r5,r9
2427	add	r9,r9,r0,ror#2
2428	eor	r3,r3,r11
2429	add	r8,r8,r2
2430	eor	r2,r6,r7
2431	eor	r0,r5,r5,ror#5
2432	add	r9,r9,r3
2433	and	r2,r2,r5
2434	eor	r3,r0,r5,ror#19
2435	eor	r0,r9,r9,ror#11
2436	eor	r2,r2,r7
2437	add	r8,r8,r3,ror#6
2438	eor	r3,r9,r10
2439	eor	r0,r0,r9,ror#20
2440	add	r8,r8,r2
2441	ldr	r2,[sp,#48]
2442	and	r12,r12,r3
2443	add	r4,r4,r8
2444	add	r8,r8,r0,ror#2
2445	eor	r12,r12,r10
2446	vst1.32	{q8},[r1,:128]!
2447	add	r7,r7,r2
2448	eor	r2,r5,r6
2449	eor	r0,r4,r4,ror#5
2450	add	r8,r8,r12
2451	vld1.32	{q8},[r14,:128]!
2452	and	r2,r2,r4
2453	eor	r12,r0,r4,ror#19
2454	eor	r0,r8,r8,ror#11
2455	eor	r2,r2,r6
2456	vrev32.8	q3,q3
2457	add	r7,r7,r12,ror#6
2458	eor	r12,r8,r9
2459	eor	r0,r0,r8,ror#20
2460	add	r7,r7,r2
2461	vadd.i32	q8,q8,q3
2462	ldr	r2,[sp,#52]
2463	and	r3,r3,r12
2464	add	r11,r11,r7
2465	add	r7,r7,r0,ror#2
2466	eor	r3,r3,r9
2467	add	r6,r6,r2
2468	eor	r2,r4,r5
2469	eor	r0,r11,r11,ror#5
2470	add	r7,r7,r3
2471	and	r2,r2,r11
2472	eor	r3,r0,r11,ror#19
2473	eor	r0,r7,r7,ror#11
2474	eor	r2,r2,r5
2475	add	r6,r6,r3,ror#6
2476	eor	r3,r7,r8
2477	eor	r0,r0,r7,ror#20
2478	add	r6,r6,r2
2479	ldr	r2,[sp,#56]
2480	and	r12,r12,r3
2481	add	r10,r10,r6
2482	add	r6,r6,r0,ror#2
2483	eor	r12,r12,r8
2484	add	r5,r5,r2
2485	eor	r2,r11,r4
2486	eor	r0,r10,r10,ror#5
2487	add	r6,r6,r12
2488	and	r2,r2,r10
2489	eor	r12,r0,r10,ror#19
2490	eor	r0,r6,r6,ror#11
2491	eor	r2,r2,r4
2492	add	r5,r5,r12,ror#6
2493	eor	r12,r6,r7
2494	eor	r0,r0,r6,ror#20
2495	add	r5,r5,r2
2496	ldr	r2,[sp,#60]
2497	and	r3,r3,r12
2498	add	r9,r9,r5
2499	add	r5,r5,r0,ror#2
2500	eor	r3,r3,r7
2501	add	r4,r4,r2
2502	eor	r2,r10,r11
2503	eor	r0,r9,r9,ror#5
2504	add	r5,r5,r3
2505	and	r2,r2,r9
2506	eor	r3,r0,r9,ror#19
2507	eor	r0,r5,r5,ror#11
2508	eor	r2,r2,r11
2509	add	r4,r4,r3,ror#6
2510	eor	r3,r5,r6
2511	eor	r0,r0,r5,ror#20
2512	add	r4,r4,r2
2513	ldr	r2,[sp,#64]
2514	and	r12,r12,r3
2515	add	r8,r8,r4
2516	add	r4,r4,r0,ror#2
2517	eor	r12,r12,r6
2518	vst1.32	{q8},[r1,:128]!
2519	ldr	r0,[r2,#0]
2520	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
2521	ldr	r12,[r2,#4]
2522	ldr	r3,[r2,#8]
2523	ldr	r1,[r2,#12]
2524	add	r4,r4,r0			@ accumulate
2525	ldr	r0,[r2,#16]
2526	add	r5,r5,r12
2527	ldr	r12,[r2,#20]
2528	add	r6,r6,r3
2529	ldr	r3,[r2,#24]
2530	add	r7,r7,r1
2531	ldr	r1,[r2,#28]
2532	add	r8,r8,r0
2533	str	r4,[r2],#4
2534	add	r9,r9,r12
2535	str	r5,[r2],#4
2536	add	r10,r10,r3
2537	str	r6,[r2],#4
2538	add	r11,r11,r1
2539	str	r7,[r2],#4
2540	stmia	r2,{r8-r11}
2541
2542	movne	r1,sp
2543	ldrne	r2,[sp,#0]
2544	eorne	r12,r12,r12
2545	ldreq	sp,[sp,#76]			@ restore original sp
2546	eorne	r3,r5,r6
2547	bne	.L_00_48
2548
2549	ldmia	sp!,{r4-r12,pc}
2550.size	sha256_block_data_order_neon,.-sha256_block_data_order_neon
2551#endif
2552#if __ARM_ARCH__>=7
2553.type	sha256_block_data_order_armv8,%function
2554.align	5
2555sha256_block_data_order_armv8:
2556.LARMv8:
2557	vld1.32	{q0,q1},[r0]
2558	sub	r3,r3,#sha256_block_data_order-K256
2559
2560.Loop_v8:
2561	vld1.8		{q8-q9},[r1]!
2562	vld1.8		{q10-q11},[r1]!
2563	vld1.32		{q12},[r3]!
2564	vrev32.8	q8,q8
2565	vrev32.8	q9,q9
2566	vrev32.8	q10,q10
2567	vrev32.8	q11,q11
2568	vmov		q14,q0	@ offload
2569	vmov		q15,q1
2570	teq		r1,r2
2571	vld1.32		{q13},[r3]!
2572	vadd.i32	q12,q12,q8
2573	.byte	0xe2,0x03,0xfa,0xf3	@ sha256su0 q8,q9
2574	vmov		q2,q0
2575	.byte	0x68,0x0c,0x02,0xf3	@ sha256h q0,q1,q12
2576	.byte	0x68,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q12
2577	.byte	0xe6,0x0c,0x64,0xf3	@ sha256su1 q8,q10,q11
2578	vld1.32		{q12},[r3]!
2579	vadd.i32	q13,q13,q9
2580	.byte	0xe4,0x23,0xfa,0xf3	@ sha256su0 q9,q10
2581	vmov		q2,q0
2582	.byte	0x6a,0x0c,0x02,0xf3	@ sha256h q0,q1,q13
2583	.byte	0x6a,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q13
2584	.byte	0xe0,0x2c,0x66,0xf3	@ sha256su1 q9,q11,q8
2585	vld1.32		{q13},[r3]!
2586	vadd.i32	q12,q12,q10
2587	.byte	0xe6,0x43,0xfa,0xf3	@ sha256su0 q10,q11
2588	vmov		q2,q0
2589	.byte	0x68,0x0c,0x02,0xf3	@ sha256h q0,q1,q12
2590	.byte	0x68,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q12
2591	.byte	0xe2,0x4c,0x60,0xf3	@ sha256su1 q10,q8,q9
2592	vld1.32		{q12},[r3]!
2593	vadd.i32	q13,q13,q11
2594	.byte	0xe0,0x63,0xfa,0xf3	@ sha256su0 q11,q8
2595	vmov		q2,q0
2596	.byte	0x6a,0x0c,0x02,0xf3	@ sha256h q0,q1,q13
2597	.byte	0x6a,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q13
2598	.byte	0xe4,0x6c,0x62,0xf3	@ sha256su1 q11,q9,q10
2599	vld1.32		{q13},[r3]!
2600	vadd.i32	q12,q12,q8
2601	.byte	0xe2,0x03,0xfa,0xf3	@ sha256su0 q8,q9
2602	vmov		q2,q0
2603	.byte	0x68,0x0c,0x02,0xf3	@ sha256h q0,q1,q12
2604	.byte	0x68,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q12
2605	.byte	0xe6,0x0c,0x64,0xf3	@ sha256su1 q8,q10,q11
2606	vld1.32		{q12},[r3]!
2607	vadd.i32	q13,q13,q9
2608	.byte	0xe4,0x23,0xfa,0xf3	@ sha256su0 q9,q10
2609	vmov		q2,q0
2610	.byte	0x6a,0x0c,0x02,0xf3	@ sha256h q0,q1,q13
2611	.byte	0x6a,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q13
2612	.byte	0xe0,0x2c,0x66,0xf3	@ sha256su1 q9,q11,q8
2613	vld1.32		{q13},[r3]!
2614	vadd.i32	q12,q12,q10
2615	.byte	0xe6,0x43,0xfa,0xf3	@ sha256su0 q10,q11
2616	vmov		q2,q0
2617	.byte	0x68,0x0c,0x02,0xf3	@ sha256h q0,q1,q12
2618	.byte	0x68,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q12
2619	.byte	0xe2,0x4c,0x60,0xf3	@ sha256su1 q10,q8,q9
2620	vld1.32		{q12},[r3]!
2621	vadd.i32	q13,q13,q11
2622	.byte	0xe0,0x63,0xfa,0xf3	@ sha256su0 q11,q8
2623	vmov		q2,q0
2624	.byte	0x6a,0x0c,0x02,0xf3	@ sha256h q0,q1,q13
2625	.byte	0x6a,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q13
2626	.byte	0xe4,0x6c,0x62,0xf3	@ sha256su1 q11,q9,q10
2627	vld1.32		{q13},[r3]!
2628	vadd.i32	q12,q12,q8
2629	.byte	0xe2,0x03,0xfa,0xf3	@ sha256su0 q8,q9
2630	vmov		q2,q0
2631	.byte	0x68,0x0c,0x02,0xf3	@ sha256h q0,q1,q12
2632	.byte	0x68,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q12
2633	.byte	0xe6,0x0c,0x64,0xf3	@ sha256su1 q8,q10,q11
2634	vld1.32		{q12},[r3]!
2635	vadd.i32	q13,q13,q9
2636	.byte	0xe4,0x23,0xfa,0xf3	@ sha256su0 q9,q10
2637	vmov		q2,q0
2638	.byte	0x6a,0x0c,0x02,0xf3	@ sha256h q0,q1,q13
2639	.byte	0x6a,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q13
2640	.byte	0xe0,0x2c,0x66,0xf3	@ sha256su1 q9,q11,q8
2641	vld1.32		{q13},[r3]!
2642	vadd.i32	q12,q12,q10
2643	.byte	0xe6,0x43,0xfa,0xf3	@ sha256su0 q10,q11
2644	vmov		q2,q0
2645	.byte	0x68,0x0c,0x02,0xf3	@ sha256h q0,q1,q12
2646	.byte	0x68,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q12
2647	.byte	0xe2,0x4c,0x60,0xf3	@ sha256su1 q10,q8,q9
2648	vld1.32		{q12},[r3]!
2649	vadd.i32	q13,q13,q11
2650	.byte	0xe0,0x63,0xfa,0xf3	@ sha256su0 q11,q8
2651	vmov		q2,q0
2652	.byte	0x6a,0x0c,0x02,0xf3	@ sha256h q0,q1,q13
2653	.byte	0x6a,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q13
2654	.byte	0xe4,0x6c,0x62,0xf3	@ sha256su1 q11,q9,q10
2655	vld1.32		{q13},[r3]!
2656	vadd.i32	q12,q12,q8
2657	vmov		q2,q0
2658	.byte	0x68,0x0c,0x02,0xf3	@ sha256h q0,q1,q12
2659	.byte	0x68,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q12
2660
2661	vld1.32		{q12},[r3]!
2662	vadd.i32	q13,q13,q9
2663	vmov		q2,q0
2664	.byte	0x6a,0x0c,0x02,0xf3	@ sha256h q0,q1,q13
2665	.byte	0x6a,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q13
2666
2667	vld1.32		{q13},[r3]
2668	vadd.i32	q12,q12,q10
2669	sub		r3,r3,#256-16	@ rewind
2670	vmov		q2,q0
2671	.byte	0x68,0x0c,0x02,0xf3	@ sha256h q0,q1,q12
2672	.byte	0x68,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q12
2673
2674	vadd.i32	q13,q13,q11
2675	vmov		q2,q0
2676	.byte	0x6a,0x0c,0x02,0xf3	@ sha256h q0,q1,q13
2677	.byte	0x6a,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q13
2678
2679	vadd.i32	q0,q0,q14
2680	vadd.i32	q1,q1,q15
2681	bne		.Loop_v8
2682
2683	vst1.32		{q0,q1},[r0]
2684
2685	bx	lr		@ bx lr
2686.size	sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2687#endif
2688.asciz  "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>"
2689.align	2
2690.comm   OPENSSL_armcap_P,4,4
2691