1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# SHA1 block procedure for Alpha.
11
12# On 21264 performance is 33% better than code generated by vendor
13# compiler, and 75% better than GCC [3.4], and in absolute terms is
14# 8.7 cycles per processed byte. Implementation features vectorized
15# byte swap, but not Xupdate.
16
17@X=(	"\$0",	"\$1",	"\$2",	"\$3",	"\$4",	"\$5",	"\$6",	"\$7",
18	"\$8",	"\$9",	"\$10",	"\$11",	"\$12",	"\$13",	"\$14",	"\$15");
19$ctx="a0";	# $16
20$inp="a1";
21$num="a2";
22$A="a3";
23$B="a4";	# 20
24$C="a5";
25$D="t8";
26$E="t9";	@V=($A,$B,$C,$D,$E);
27$t0="t10";	# 24
28$t1="t11";
29$t2="ra";
30$t3="t12";
31$K="AT";	# 28
32
33sub BODY_00_19 {
34my ($i,$a,$b,$c,$d,$e)=@_;
35my $j=$i+1;
36$code.=<<___ if ($i==0);
37	ldq_u	@X[0],0+0($inp)
38	ldq_u	@X[1],0+7($inp)
39___
40$code.=<<___ if (!($i&1) && $i<14);
41	ldq_u	@X[$i+2],($i+2)*4+0($inp)
42	ldq_u	@X[$i+3],($i+2)*4+7($inp)
43___
44$code.=<<___ if (!($i&1) && $i<15);
45	extql	@X[$i],$inp,@X[$i]
46	extqh	@X[$i+1],$inp,@X[$i+1]
47
48	or	@X[$i+1],@X[$i],@X[$i]	# pair of 32-bit values are fetched
49
50	srl	@X[$i],24,$t0		# vectorized byte swap
51	srl	@X[$i],8,$t2
52
53	sll	@X[$i],8,$t3
54	sll	@X[$i],24,@X[$i]
55	zapnot	$t0,0x11,$t0
56	zapnot	$t2,0x22,$t2
57
58	zapnot	@X[$i],0x88,@X[$i]
59	or	$t0,$t2,$t0
60	zapnot	$t3,0x44,$t3
61	sll	$a,5,$t1
62
63	or	@X[$i],$t0,@X[$i]
64	addl	$K,$e,$e
65	and	$b,$c,$t2
66	zapnot	$a,0xf,$a
67
68	or	@X[$i],$t3,@X[$i]
69	srl	$a,27,$t0
70	bic	$d,$b,$t3
71	sll	$b,30,$b
72
73	extll	@X[$i],4,@X[$i+1]	# extract upper half
74	or	$t2,$t3,$t2
75	addl	@X[$i],$e,$e
76
77	addl	$t1,$e,$e
78	srl	$b,32,$t3
79	zapnot	@X[$i],0xf,@X[$i]
80
81	addl	$t0,$e,$e
82	addl	$t2,$e,$e
83	or	$t3,$b,$b
84___
85$code.=<<___ if (($i&1) && $i<15);
86	sll	$a,5,$t1
87	addl	$K,$e,$e
88	and	$b,$c,$t2
89	zapnot	$a,0xf,$a
90
91	srl	$a,27,$t0
92	addl	@X[$i%16],$e,$e
93	bic	$d,$b,$t3
94	sll	$b,30,$b
95
96	or	$t2,$t3,$t2
97	addl	$t1,$e,$e
98	srl	$b,32,$t3
99	zapnot	@X[$i],0xf,@X[$i]
100
101	addl	$t0,$e,$e
102	addl	$t2,$e,$e
103	or	$t3,$b,$b
104___
105$code.=<<___ if ($i>=15);	# with forward Xupdate
106	sll	$a,5,$t1
107	addl	$K,$e,$e
108	and	$b,$c,$t2
109	xor	@X[($j+2)%16],@X[$j%16],@X[$j%16]
110
111	zapnot	$a,0xf,$a
112	addl	@X[$i%16],$e,$e
113	bic	$d,$b,$t3
114	xor	@X[($j+8)%16],@X[$j%16],@X[$j%16]
115
116	srl	$a,27,$t0
117	addl	$t1,$e,$e
118	or	$t2,$t3,$t2
119	xor	@X[($j+13)%16],@X[$j%16],@X[$j%16]
120
121	sll	$b,30,$b
122	addl	$t0,$e,$e
123	srl	@X[$j%16],31,$t1
124
125	addl	$t2,$e,$e
126	srl	$b,32,$t3
127	addl	@X[$j%16],@X[$j%16],@X[$j%16]
128
129	or	$t3,$b,$b
130	zapnot	@X[$i%16],0xf,@X[$i%16]
131	or	$t1,@X[$j%16],@X[$j%16]
132___
133}
134
135sub BODY_20_39 {
136my ($i,$a,$b,$c,$d,$e)=@_;
137my $j=$i+1;
138$code.=<<___ if ($i<79);	# with forward Xupdate
139	sll	$a,5,$t1
140	addl	$K,$e,$e
141	zapnot	$a,0xf,$a
142	xor	@X[($j+2)%16],@X[$j%16],@X[$j%16]
143
144	sll	$b,30,$t3
145	addl	$t1,$e,$e
146	xor	$b,$c,$t2
147	xor	@X[($j+8)%16],@X[$j%16],@X[$j%16]
148
149	srl	$b,2,$b
150	addl	@X[$i%16],$e,$e
151	xor	$d,$t2,$t2
152	xor	@X[($j+13)%16],@X[$j%16],@X[$j%16]
153
154	srl	@X[$j%16],31,$t1
155	addl	$t2,$e,$e
156	srl	$a,27,$t0
157	addl	@X[$j%16],@X[$j%16],@X[$j%16]
158
159	or	$t3,$b,$b
160	addl	$t0,$e,$e
161	or	$t1,@X[$j%16],@X[$j%16]
162___
163$code.=<<___ if ($i<77);
164	zapnot	@X[$i%16],0xf,@X[$i%16]
165___
166$code.=<<___ if ($i==79);	# with context fetch
167	sll	$a,5,$t1
168	addl	$K,$e,$e
169	zapnot	$a,0xf,$a
170	ldl	@X[0],0($ctx)
171
172	sll	$b,30,$t3
173	addl	$t1,$e,$e
174	xor	$b,$c,$t2
175	ldl	@X[1],4($ctx)
176
177	srl	$b,2,$b
178	addl	@X[$i%16],$e,$e
179	xor	$d,$t2,$t2
180	ldl	@X[2],8($ctx)
181
182	srl	$a,27,$t0
183	addl	$t2,$e,$e
184	ldl	@X[3],12($ctx)
185
186	or	$t3,$b,$b
187	addl	$t0,$e,$e
188	ldl	@X[4],16($ctx)
189___
190}
191
192sub BODY_40_59 {
193my ($i,$a,$b,$c,$d,$e)=@_;
194my $j=$i+1;
195$code.=<<___;	# with forward Xupdate
196	sll	$a,5,$t1
197	addl	$K,$e,$e
198	zapnot	$a,0xf,$a
199	xor	@X[($j+2)%16],@X[$j%16],@X[$j%16]
200
201	srl	$a,27,$t0
202	and	$b,$c,$t2
203	and	$b,$d,$t3
204	xor	@X[($j+8)%16],@X[$j%16],@X[$j%16]
205
206	sll	$b,30,$b
207	addl	$t1,$e,$e
208	xor	@X[($j+13)%16],@X[$j%16],@X[$j%16]
209
210	srl	@X[$j%16],31,$t1
211	addl	$t0,$e,$e
212	or	$t2,$t3,$t2
213	and	$c,$d,$t3
214
215	or	$t2,$t3,$t2
216	srl	$b,32,$t3
217	addl	@X[$i%16],$e,$e
218	addl	@X[$j%16],@X[$j%16],@X[$j%16]
219
220	or	$t3,$b,$b
221	addl	$t2,$e,$e
222	or	$t1,@X[$j%16],@X[$j%16]
223	zapnot	@X[$i%16],0xf,@X[$i%16]
224___
225}
226
227$code=<<___;
228#ifdef __linux__
229#include <asm/regdef.h>
230#else
231#include <asm.h>
232#include <regdef.h>
233#endif
234
235.text
236
237.set	noat
238.set	noreorder
239.globl	sha1_block_data_order
240.align	5
241.ent	sha1_block_data_order
242sha1_block_data_order:
243	lda	sp,-64(sp)
244	stq	ra,0(sp)
245	stq	s0,8(sp)
246	stq	s1,16(sp)
247	stq	s2,24(sp)
248	stq	s3,32(sp)
249	stq	s4,40(sp)
250	stq	s5,48(sp)
251	stq	fp,56(sp)
252	.mask	0x0400fe00,-64
253	.frame	sp,64,ra
254	.prologue 0
255
256	ldl	$A,0($ctx)
257	ldl	$B,4($ctx)
258	sll	$num,6,$num
259	ldl	$C,8($ctx)
260	ldl	$D,12($ctx)
261	ldl	$E,16($ctx)
262	addq	$inp,$num,$num
263
264.Lloop:
265	.set	noreorder
266	ldah	$K,23170(zero)
267	zapnot	$B,0xf,$B
268	lda	$K,31129($K)	# K_00_19
269___
270for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
271
272$code.=<<___;
273	ldah	$K,28378(zero)
274	lda	$K,-5215($K)	# K_20_39
275___
276for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
277
278$code.=<<___;
279	ldah	$K,-28900(zero)
280	lda	$K,-17188($K)	# K_40_59
281___
282for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
283
284$code.=<<___;
285	ldah	$K,-13725(zero)
286	lda	$K,-15914($K)	# K_60_79
287___
288for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
289
290$code.=<<___;
291	addl	@X[0],$A,$A
292	addl	@X[1],$B,$B
293	addl	@X[2],$C,$C
294	addl	@X[3],$D,$D
295	addl	@X[4],$E,$E
296	stl	$A,0($ctx)
297	stl	$B,4($ctx)
298	addq	$inp,64,$inp
299	stl	$C,8($ctx)
300	stl	$D,12($ctx)
301	stl	$E,16($ctx)
302	cmpult	$inp,$num,$t1
303	bne	$t1,.Lloop
304
305	.set	noreorder
306	ldq	ra,0(sp)
307	ldq	s0,8(sp)
308	ldq	s1,16(sp)
309	ldq	s2,24(sp)
310	ldq	s3,32(sp)
311	ldq	s4,40(sp)
312	ldq	s5,48(sp)
313	ldq	fp,56(sp)
314	lda	sp,64(sp)
315	ret	(ra)
316.end	sha1_block_data_order
317.ascii	"SHA1 block transform for Alpha, CRYPTOGAMS by <appro\@openssl.org>"
318.align	2
319___
320$output=shift and open STDOUT,">$output";
321print $code;
322close STDOUT;
323