1#if defined(__arm__)
2#include "arm_arch.h"
3
4.text
5
6.global	sha1_block_data_order
7.hidden	sha1_block_data_order
8.type	sha1_block_data_order,%function
9
10.align	2
11sha1_block_data_order:
12	stmdb	sp!,{r4-r12,lr}
13	add	r2,r1,r2,lsl#6	@ r2 to point at the end of r1
14	ldmia	r0,{r3,r4,r5,r6,r7}
15.Lloop:
16	ldr	r8,.LK_00_19
17	mov	r14,sp
18	sub	sp,sp,#15*4
19	mov	r5,r5,ror#30
20	mov	r6,r6,ror#30
21	mov	r7,r7,ror#30		@ [6]
22.L_00_15:
23#if __ARM_ARCH__<7
24	ldrb	r10,[r1,#2]
25	ldrb	r9,[r1,#3]
26	ldrb	r11,[r1,#1]
27	add	r7,r8,r7,ror#2			@ E+=K_00_19
28	ldrb	r12,[r1],#4
29	orr	r9,r9,r10,lsl#8
30	eor	r10,r5,r6			@ F_xx_xx
31	orr	r9,r9,r11,lsl#16
32	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
33	orr	r9,r9,r12,lsl#24
34#else
35	ldr	r9,[r1],#4			@ handles unaligned
36	add	r7,r8,r7,ror#2			@ E+=K_00_19
37	eor	r10,r5,r6			@ F_xx_xx
38	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
39#ifdef __ARMEL__
40	rev	r9,r9				@ byte swap
41#endif
42#endif
43	and	r10,r4,r10,ror#2
44	add	r7,r7,r9			@ E+=X[i]
45	eor	r10,r10,r6,ror#2		@ F_00_19(B,C,D)
46	str	r9,[r14,#-4]!
47	add	r7,r7,r10			@ E+=F_00_19(B,C,D)
48#if __ARM_ARCH__<7
49	ldrb	r10,[r1,#2]
50	ldrb	r9,[r1,#3]
51	ldrb	r11,[r1,#1]
52	add	r6,r8,r6,ror#2			@ E+=K_00_19
53	ldrb	r12,[r1],#4
54	orr	r9,r9,r10,lsl#8
55	eor	r10,r4,r5			@ F_xx_xx
56	orr	r9,r9,r11,lsl#16
57	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
58	orr	r9,r9,r12,lsl#24
59#else
60	ldr	r9,[r1],#4			@ handles unaligned
61	add	r6,r8,r6,ror#2			@ E+=K_00_19
62	eor	r10,r4,r5			@ F_xx_xx
63	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
64#ifdef __ARMEL__
65	rev	r9,r9				@ byte swap
66#endif
67#endif
68	and	r10,r3,r10,ror#2
69	add	r6,r6,r9			@ E+=X[i]
70	eor	r10,r10,r5,ror#2		@ F_00_19(B,C,D)
71	str	r9,[r14,#-4]!
72	add	r6,r6,r10			@ E+=F_00_19(B,C,D)
73#if __ARM_ARCH__<7
74	ldrb	r10,[r1,#2]
75	ldrb	r9,[r1,#3]
76	ldrb	r11,[r1,#1]
77	add	r5,r8,r5,ror#2			@ E+=K_00_19
78	ldrb	r12,[r1],#4
79	orr	r9,r9,r10,lsl#8
80	eor	r10,r3,r4			@ F_xx_xx
81	orr	r9,r9,r11,lsl#16
82	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
83	orr	r9,r9,r12,lsl#24
84#else
85	ldr	r9,[r1],#4			@ handles unaligned
86	add	r5,r8,r5,ror#2			@ E+=K_00_19
87	eor	r10,r3,r4			@ F_xx_xx
88	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
89#ifdef __ARMEL__
90	rev	r9,r9				@ byte swap
91#endif
92#endif
93	and	r10,r7,r10,ror#2
94	add	r5,r5,r9			@ E+=X[i]
95	eor	r10,r10,r4,ror#2		@ F_00_19(B,C,D)
96	str	r9,[r14,#-4]!
97	add	r5,r5,r10			@ E+=F_00_19(B,C,D)
98#if __ARM_ARCH__<7
99	ldrb	r10,[r1,#2]
100	ldrb	r9,[r1,#3]
101	ldrb	r11,[r1,#1]
102	add	r4,r8,r4,ror#2			@ E+=K_00_19
103	ldrb	r12,[r1],#4
104	orr	r9,r9,r10,lsl#8
105	eor	r10,r7,r3			@ F_xx_xx
106	orr	r9,r9,r11,lsl#16
107	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
108	orr	r9,r9,r12,lsl#24
109#else
110	ldr	r9,[r1],#4			@ handles unaligned
111	add	r4,r8,r4,ror#2			@ E+=K_00_19
112	eor	r10,r7,r3			@ F_xx_xx
113	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
114#ifdef __ARMEL__
115	rev	r9,r9				@ byte swap
116#endif
117#endif
118	and	r10,r6,r10,ror#2
119	add	r4,r4,r9			@ E+=X[i]
120	eor	r10,r10,r3,ror#2		@ F_00_19(B,C,D)
121	str	r9,[r14,#-4]!
122	add	r4,r4,r10			@ E+=F_00_19(B,C,D)
123#if __ARM_ARCH__<7
124	ldrb	r10,[r1,#2]
125	ldrb	r9,[r1,#3]
126	ldrb	r11,[r1,#1]
127	add	r3,r8,r3,ror#2			@ E+=K_00_19
128	ldrb	r12,[r1],#4
129	orr	r9,r9,r10,lsl#8
130	eor	r10,r6,r7			@ F_xx_xx
131	orr	r9,r9,r11,lsl#16
132	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
133	orr	r9,r9,r12,lsl#24
134#else
135	ldr	r9,[r1],#4			@ handles unaligned
136	add	r3,r8,r3,ror#2			@ E+=K_00_19
137	eor	r10,r6,r7			@ F_xx_xx
138	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
139#ifdef __ARMEL__
140	rev	r9,r9				@ byte swap
141#endif
142#endif
143	and	r10,r5,r10,ror#2
144	add	r3,r3,r9			@ E+=X[i]
145	eor	r10,r10,r7,ror#2		@ F_00_19(B,C,D)
146	str	r9,[r14,#-4]!
147	add	r3,r3,r10			@ E+=F_00_19(B,C,D)
148	teq	r14,sp
149	bne	.L_00_15		@ [((11+4)*5+2)*3]
150	sub	sp,sp,#25*4
151#if __ARM_ARCH__<7
152	ldrb	r10,[r1,#2]
153	ldrb	r9,[r1,#3]
154	ldrb	r11,[r1,#1]
155	add	r7,r8,r7,ror#2			@ E+=K_00_19
156	ldrb	r12,[r1],#4
157	orr	r9,r9,r10,lsl#8
158	eor	r10,r5,r6			@ F_xx_xx
159	orr	r9,r9,r11,lsl#16
160	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
161	orr	r9,r9,r12,lsl#24
162#else
163	ldr	r9,[r1],#4			@ handles unaligned
164	add	r7,r8,r7,ror#2			@ E+=K_00_19
165	eor	r10,r5,r6			@ F_xx_xx
166	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
167#ifdef __ARMEL__
168	rev	r9,r9				@ byte swap
169#endif
170#endif
171	and	r10,r4,r10,ror#2
172	add	r7,r7,r9			@ E+=X[i]
173	eor	r10,r10,r6,ror#2		@ F_00_19(B,C,D)
174	str	r9,[r14,#-4]!
175	add	r7,r7,r10			@ E+=F_00_19(B,C,D)
176	ldr	r9,[r14,#15*4]
177	ldr	r10,[r14,#13*4]
178	ldr	r11,[r14,#7*4]
179	add	r6,r8,r6,ror#2			@ E+=K_xx_xx
180	ldr	r12,[r14,#2*4]
181	eor	r9,r9,r10
182	eor	r11,r11,r12			@ 1 cycle stall
183	eor	r10,r4,r5			@ F_xx_xx
184	mov	r9,r9,ror#31
185	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
186	eor	r9,r9,r11,ror#31
187	str	r9,[r14,#-4]!
188	and r10,r3,r10,ror#2					@ F_xx_xx
189						@ F_xx_xx
190	add	r6,r6,r9			@ E+=X[i]
191	eor	r10,r10,r5,ror#2		@ F_00_19(B,C,D)
192	add	r6,r6,r10			@ E+=F_00_19(B,C,D)
193	ldr	r9,[r14,#15*4]
194	ldr	r10,[r14,#13*4]
195	ldr	r11,[r14,#7*4]
196	add	r5,r8,r5,ror#2			@ E+=K_xx_xx
197	ldr	r12,[r14,#2*4]
198	eor	r9,r9,r10
199	eor	r11,r11,r12			@ 1 cycle stall
200	eor	r10,r3,r4			@ F_xx_xx
201	mov	r9,r9,ror#31
202	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
203	eor	r9,r9,r11,ror#31
204	str	r9,[r14,#-4]!
205	and r10,r7,r10,ror#2					@ F_xx_xx
206						@ F_xx_xx
207	add	r5,r5,r9			@ E+=X[i]
208	eor	r10,r10,r4,ror#2		@ F_00_19(B,C,D)
209	add	r5,r5,r10			@ E+=F_00_19(B,C,D)
210	ldr	r9,[r14,#15*4]
211	ldr	r10,[r14,#13*4]
212	ldr	r11,[r14,#7*4]
213	add	r4,r8,r4,ror#2			@ E+=K_xx_xx
214	ldr	r12,[r14,#2*4]
215	eor	r9,r9,r10
216	eor	r11,r11,r12			@ 1 cycle stall
217	eor	r10,r7,r3			@ F_xx_xx
218	mov	r9,r9,ror#31
219	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
220	eor	r9,r9,r11,ror#31
221	str	r9,[r14,#-4]!
222	and r10,r6,r10,ror#2					@ F_xx_xx
223						@ F_xx_xx
224	add	r4,r4,r9			@ E+=X[i]
225	eor	r10,r10,r3,ror#2		@ F_00_19(B,C,D)
226	add	r4,r4,r10			@ E+=F_00_19(B,C,D)
227	ldr	r9,[r14,#15*4]
228	ldr	r10,[r14,#13*4]
229	ldr	r11,[r14,#7*4]
230	add	r3,r8,r3,ror#2			@ E+=K_xx_xx
231	ldr	r12,[r14,#2*4]
232	eor	r9,r9,r10
233	eor	r11,r11,r12			@ 1 cycle stall
234	eor	r10,r6,r7			@ F_xx_xx
235	mov	r9,r9,ror#31
236	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
237	eor	r9,r9,r11,ror#31
238	str	r9,[r14,#-4]!
239	and r10,r5,r10,ror#2					@ F_xx_xx
240						@ F_xx_xx
241	add	r3,r3,r9			@ E+=X[i]
242	eor	r10,r10,r7,ror#2		@ F_00_19(B,C,D)
243	add	r3,r3,r10			@ E+=F_00_19(B,C,D)
244
245	ldr	r8,.LK_20_39		@ [+15+16*4]
246	cmn	sp,#0			@ [+3], clear carry to denote 20_39
247.L_20_39_or_60_79:
248	ldr	r9,[r14,#15*4]
249	ldr	r10,[r14,#13*4]
250	ldr	r11,[r14,#7*4]
251	add	r7,r8,r7,ror#2			@ E+=K_xx_xx
252	ldr	r12,[r14,#2*4]
253	eor	r9,r9,r10
254	eor	r11,r11,r12			@ 1 cycle stall
255	eor	r10,r5,r6			@ F_xx_xx
256	mov	r9,r9,ror#31
257	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
258	eor	r9,r9,r11,ror#31
259	str	r9,[r14,#-4]!
260	eor r10,r4,r10,ror#2					@ F_xx_xx
261						@ F_xx_xx
262	add	r7,r7,r9			@ E+=X[i]
263	add	r7,r7,r10			@ E+=F_20_39(B,C,D)
264	ldr	r9,[r14,#15*4]
265	ldr	r10,[r14,#13*4]
266	ldr	r11,[r14,#7*4]
267	add	r6,r8,r6,ror#2			@ E+=K_xx_xx
268	ldr	r12,[r14,#2*4]
269	eor	r9,r9,r10
270	eor	r11,r11,r12			@ 1 cycle stall
271	eor	r10,r4,r5			@ F_xx_xx
272	mov	r9,r9,ror#31
273	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
274	eor	r9,r9,r11,ror#31
275	str	r9,[r14,#-4]!
276	eor r10,r3,r10,ror#2					@ F_xx_xx
277						@ F_xx_xx
278	add	r6,r6,r9			@ E+=X[i]
279	add	r6,r6,r10			@ E+=F_20_39(B,C,D)
280	ldr	r9,[r14,#15*4]
281	ldr	r10,[r14,#13*4]
282	ldr	r11,[r14,#7*4]
283	add	r5,r8,r5,ror#2			@ E+=K_xx_xx
284	ldr	r12,[r14,#2*4]
285	eor	r9,r9,r10
286	eor	r11,r11,r12			@ 1 cycle stall
287	eor	r10,r3,r4			@ F_xx_xx
288	mov	r9,r9,ror#31
289	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
290	eor	r9,r9,r11,ror#31
291	str	r9,[r14,#-4]!
292	eor r10,r7,r10,ror#2					@ F_xx_xx
293						@ F_xx_xx
294	add	r5,r5,r9			@ E+=X[i]
295	add	r5,r5,r10			@ E+=F_20_39(B,C,D)
296	ldr	r9,[r14,#15*4]
297	ldr	r10,[r14,#13*4]
298	ldr	r11,[r14,#7*4]
299	add	r4,r8,r4,ror#2			@ E+=K_xx_xx
300	ldr	r12,[r14,#2*4]
301	eor	r9,r9,r10
302	eor	r11,r11,r12			@ 1 cycle stall
303	eor	r10,r7,r3			@ F_xx_xx
304	mov	r9,r9,ror#31
305	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
306	eor	r9,r9,r11,ror#31
307	str	r9,[r14,#-4]!
308	eor r10,r6,r10,ror#2					@ F_xx_xx
309						@ F_xx_xx
310	add	r4,r4,r9			@ E+=X[i]
311	add	r4,r4,r10			@ E+=F_20_39(B,C,D)
312	ldr	r9,[r14,#15*4]
313	ldr	r10,[r14,#13*4]
314	ldr	r11,[r14,#7*4]
315	add	r3,r8,r3,ror#2			@ E+=K_xx_xx
316	ldr	r12,[r14,#2*4]
317	eor	r9,r9,r10
318	eor	r11,r11,r12			@ 1 cycle stall
319	eor	r10,r6,r7			@ F_xx_xx
320	mov	r9,r9,ror#31
321	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
322	eor	r9,r9,r11,ror#31
323	str	r9,[r14,#-4]!
324	eor r10,r5,r10,ror#2					@ F_xx_xx
325						@ F_xx_xx
326	add	r3,r3,r9			@ E+=X[i]
327	add	r3,r3,r10			@ E+=F_20_39(B,C,D)
328	teq	r14,sp			@ preserve carry
329	bne	.L_20_39_or_60_79	@ [+((12+3)*5+2)*4]
330	bcs	.L_done			@ [+((12+3)*5+2)*4], spare 300 bytes
331
332	ldr	r8,.LK_40_59
333	sub	sp,sp,#20*4		@ [+2]
334.L_40_59:
335	ldr	r9,[r14,#15*4]
336	ldr	r10,[r14,#13*4]
337	ldr	r11,[r14,#7*4]
338	add	r7,r8,r7,ror#2			@ E+=K_xx_xx
339	ldr	r12,[r14,#2*4]
340	eor	r9,r9,r10
341	eor	r11,r11,r12			@ 1 cycle stall
342	eor	r10,r5,r6			@ F_xx_xx
343	mov	r9,r9,ror#31
344	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
345	eor	r9,r9,r11,ror#31
346	str	r9,[r14,#-4]!
347	and r10,r4,r10,ror#2					@ F_xx_xx
348	and r11,r5,r6					@ F_xx_xx
349	add	r7,r7,r9			@ E+=X[i]
350	add	r7,r7,r10			@ E+=F_40_59(B,C,D)
351	add	r7,r7,r11,ror#2
352	ldr	r9,[r14,#15*4]
353	ldr	r10,[r14,#13*4]
354	ldr	r11,[r14,#7*4]
355	add	r6,r8,r6,ror#2			@ E+=K_xx_xx
356	ldr	r12,[r14,#2*4]
357	eor	r9,r9,r10
358	eor	r11,r11,r12			@ 1 cycle stall
359	eor	r10,r4,r5			@ F_xx_xx
360	mov	r9,r9,ror#31
361	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
362	eor	r9,r9,r11,ror#31
363	str	r9,[r14,#-4]!
364	and r10,r3,r10,ror#2					@ F_xx_xx
365	and r11,r4,r5					@ F_xx_xx
366	add	r6,r6,r9			@ E+=X[i]
367	add	r6,r6,r10			@ E+=F_40_59(B,C,D)
368	add	r6,r6,r11,ror#2
369	ldr	r9,[r14,#15*4]
370	ldr	r10,[r14,#13*4]
371	ldr	r11,[r14,#7*4]
372	add	r5,r8,r5,ror#2			@ E+=K_xx_xx
373	ldr	r12,[r14,#2*4]
374	eor	r9,r9,r10
375	eor	r11,r11,r12			@ 1 cycle stall
376	eor	r10,r3,r4			@ F_xx_xx
377	mov	r9,r9,ror#31
378	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
379	eor	r9,r9,r11,ror#31
380	str	r9,[r14,#-4]!
381	and r10,r7,r10,ror#2					@ F_xx_xx
382	and r11,r3,r4					@ F_xx_xx
383	add	r5,r5,r9			@ E+=X[i]
384	add	r5,r5,r10			@ E+=F_40_59(B,C,D)
385	add	r5,r5,r11,ror#2
386	ldr	r9,[r14,#15*4]
387	ldr	r10,[r14,#13*4]
388	ldr	r11,[r14,#7*4]
389	add	r4,r8,r4,ror#2			@ E+=K_xx_xx
390	ldr	r12,[r14,#2*4]
391	eor	r9,r9,r10
392	eor	r11,r11,r12			@ 1 cycle stall
393	eor	r10,r7,r3			@ F_xx_xx
394	mov	r9,r9,ror#31
395	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
396	eor	r9,r9,r11,ror#31
397	str	r9,[r14,#-4]!
398	and r10,r6,r10,ror#2					@ F_xx_xx
399	and r11,r7,r3					@ F_xx_xx
400	add	r4,r4,r9			@ E+=X[i]
401	add	r4,r4,r10			@ E+=F_40_59(B,C,D)
402	add	r4,r4,r11,ror#2
403	ldr	r9,[r14,#15*4]
404	ldr	r10,[r14,#13*4]
405	ldr	r11,[r14,#7*4]
406	add	r3,r8,r3,ror#2			@ E+=K_xx_xx
407	ldr	r12,[r14,#2*4]
408	eor	r9,r9,r10
409	eor	r11,r11,r12			@ 1 cycle stall
410	eor	r10,r6,r7			@ F_xx_xx
411	mov	r9,r9,ror#31
412	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
413	eor	r9,r9,r11,ror#31
414	str	r9,[r14,#-4]!
415	and r10,r5,r10,ror#2					@ F_xx_xx
416	and r11,r6,r7					@ F_xx_xx
417	add	r3,r3,r9			@ E+=X[i]
418	add	r3,r3,r10			@ E+=F_40_59(B,C,D)
419	add	r3,r3,r11,ror#2
420	teq	r14,sp
421	bne	.L_40_59		@ [+((12+5)*5+2)*4]
422
423	ldr	r8,.LK_60_79
424	sub	sp,sp,#20*4
425	cmp	sp,#0			@ set carry to denote 60_79
426	b	.L_20_39_or_60_79	@ [+4], spare 300 bytes
427.L_done:
428	add	sp,sp,#80*4		@ "deallocate" stack frame
429	ldmia	r0,{r8,r9,r10,r11,r12}
430	add	r3,r8,r3
431	add	r4,r9,r4
432	add	r5,r10,r5,ror#2
433	add	r6,r11,r6,ror#2
434	add	r7,r12,r7,ror#2
435	stmia	r0,{r3,r4,r5,r6,r7}
436	teq	r1,r2
437	bne	.Lloop			@ [+18], total 1307
438
439#if __ARM_ARCH__>=5
440	ldmia	sp!,{r4-r12,pc}
441#else
442	ldmia	sp!,{r4-r12,lr}
443	tst	lr,#1
444	moveq	pc,lr			@ be binary compatible with V4, yet
445	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
446#endif
447.align	2
448.LK_00_19:	.word	0x5a827999
449.LK_20_39:	.word	0x6ed9eba1
450.LK_40_59:	.word	0x8f1bbcdc
451.LK_60_79:	.word	0xca62c1d6
452.size	sha1_block_data_order,.-sha1_block_data_order
453.asciz	"SHA1 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>"
454.align	2
455
456#endif
457