1392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#!/usr/bin/env perl
2392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
3392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# ====================================================================
4392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# project. The module is, however, dual licensed under OpenSSL and
6392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# CRYPTOGAMS licenses depending on where you obtain it. For further
7392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# details see http://www.openssl.org/~appro/cryptogams/.
8392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# ====================================================================
9392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
10392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# SHA256/512 block procedure for PA-RISC.
11392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
12392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# June 2009.
13392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#
14392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# SHA256 performance is >75% better than gcc 3.2 generated code on
15392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# PA-7100LC. Compared to code generated by vendor compiler this
16392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# implementation is almost 70% faster in 64-bit build, but delivers
17392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# virtually same performance in 32-bit build on PA-8600.
18392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#
19392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# SHA512 performance is >2.9x better than gcc 3.2 generated code on
20392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# PA-7100LC, PA-RISC 1.1 processor. Then implementation detects if the
21392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# code is executed on PA-RISC 2.0 processor and switches to 64-bit
22392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# code path delivering adequate peformance even in "blended" 32-bit
23392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# build. Though 64-bit code is not any faster than code generated by
24392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# vendor compiler on PA-8600...
25392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#
26392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Special thanks to polarhome.com for providing HP-UX account.
27392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
28392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$flavour = shift;
29392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$output = shift;
30392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromopen STDOUT,">$output";
31392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
32392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromif ($flavour =~ /64/) {
33392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$LEVEL		="2.0W";
34392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$SIZE_T		=8;
35392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$FRAME_MARKER	=80;
36392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$SAVED_RP	=16;
37392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH		="std";
38392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSHMA		="std,ma";
39392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP		="ldd";
40392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POPMB		="ldd,mb";
41392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom} else {
42392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$LEVEL		="1.0";
43392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$SIZE_T		=4;
44392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$FRAME_MARKER	=48;
45392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$SAVED_RP	=20;
46392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH		="stw";
47392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSHMA		="stwm";
48392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP		="ldw";
49392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POPMB		="ldwm";
50392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom}
51392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
52392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromif ($output =~ /512/) {
53392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$func="sha512_block_data_order";
54392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$SZ=8;
55392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	@Sigma0=(28,34,39);
56392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	@Sigma1=(14,18,41);
57392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	@sigma0=(1,  8, 7);
58392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	@sigma1=(19,61, 6);
59392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$rounds=80;
60392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$LAST10BITS=0x017;
61392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$LD="ldd";
62392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$LDM="ldd,ma";
63392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$ST="std";
64392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom} else {
65392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$func="sha256_block_data_order";
66392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$SZ=4;
67392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	@Sigma0=( 2,13,22);
68392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	@Sigma1=( 6,11,25);
69392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	@sigma0=( 7,18, 3);
70392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	@sigma1=(17,19,10);
71392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$rounds=64;
72392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$LAST10BITS=0x0f2;
73392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$LD="ldw";
74392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$LDM="ldwm";
75392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$ST="stw";
76392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom}
77392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
78392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$FRAME=16*$SIZE_T+$FRAME_MARKER;# 16 saved regs + frame marker
79392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom				#                 [+ argument transfer]
80392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$XOFF=16*$SZ+32;		# local variables
81392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$FRAME+=$XOFF;
82392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$XOFF+=$FRAME_MARKER;		# distance between %sp and local variables
83392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
84392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$ctx="%r26";	# zapped by $a0
85392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$inp="%r25";	# zapped by $a1
86392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$num="%r24";	# zapped by $t0
87392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
88392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$a0 ="%r26";
89392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$a1 ="%r25";
90392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$t0 ="%r24";
91392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$t1 ="%r29";
92392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$Tbl="%r31";
93392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
94392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom@V=($A,$B,$C,$D,$E,$F,$G,$H)=("%r17","%r18","%r19","%r20","%r21","%r22","%r23","%r28");
95392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
96392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom@X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
97392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$inp);
98392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
99392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromsub ROUND_00_15 {
100392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
101392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
102392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	_ror	$e,$Sigma1[0],$a0
103392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	and	$f,$e,$t0
104392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	_ror	$e,$Sigma1[1],$a1
105392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	addl	$t1,$h,$h
106392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	andcm	$g,$e,$t1
107392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$a1,$a0,$a0
108392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	_ror	$a1,`$Sigma1[2]-$Sigma1[1]`,$a1
109392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	or	$t0,$t1,$t1		; Ch(e,f,g)
110392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	addl	@X[$i%16],$h,$h
111392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$a0,$a1,$a1		; Sigma1(e)
112392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	addl	$t1,$h,$h
113392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	_ror	$a,$Sigma0[0],$a0
114392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	addl	$a1,$h,$h
115392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
116392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	_ror	$a,$Sigma0[1],$a1
117392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	and	$a,$b,$t0
118392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	and	$a,$c,$t1
119392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$a1,$a0,$a0
120392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	_ror	$a1,`$Sigma0[2]-$Sigma0[1]`,$a1
121392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$t1,$t0,$t0
122392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	and	$b,$c,$t1
123392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$a0,$a1,$a1		; Sigma0(a)
124392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	addl	$h,$d,$d
125392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$t1,$t0,$t0		; Maj(a,b,c)
126392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	`"$LDM	$SZ($Tbl),$t1" if ($i<15)`
127392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	addl	$a1,$h,$h
128392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	addl	$t0,$h,$h
129392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
130392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
131392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom}
132392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
133392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromsub ROUND_16_xx {
134392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
135392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$i-=16;
136392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
137392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	_ror	@X[($i+1)%16],$sigma0[0],$a0
138392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	_ror	@X[($i+1)%16],$sigma0[1],$a1
139392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	addl	@X[($i+9)%16],@X[$i],@X[$i]
140392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	_ror	@X[($i+14)%16],$sigma1[0],$t0
141392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	_ror	@X[($i+14)%16],$sigma1[1],$t1
142392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$a1,$a0,$a0
143392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	_shr	@X[($i+1)%16],$sigma0[2],$a1
144392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$t1,$t0,$t0
145392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	_shr	@X[($i+14)%16],$sigma1[2],$t1
146392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$a1,$a0,$a0		; sigma0(X[(i+1)&0x0f])
147392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$t1,$t0,$t0		; sigma1(X[(i+14)&0x0f])
148392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$LDM	$SZ($Tbl),$t1
149392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	addl	$a0,@X[$i],@X[$i]
150392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	addl	$t0,@X[$i],@X[$i]
151392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
152392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($i==15);
153392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	extru	$t1,31,10,$a1
154392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	comiclr,<> $LAST10BITS,$a1,%r0
155392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldo	1($Tbl),$Tbl		; signal end of $Tbl
156392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
157392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom&ROUND_00_15($i+16,$a,$b,$c,$d,$e,$f,$g,$h);
158392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom}
159392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
160392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code=<<___;
161392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.LEVEL	$LEVEL
162392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.SPACE	\$TEXT\$
163392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.SUBSPA	\$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
164392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
165392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.ALIGN	64
166392aa7cc7d2b122614c5393c3e357da07fd07af3Brian CarlstromL\$table
167392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
168392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($SZ==8);
169392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd
170392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0xb5c0fbcf,0xec4d3b2f,0xe9b5dba5,0x8189dbbc
171392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x3956c25b,0xf348b538,0x59f111f1,0xb605d019
172392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x923f82a4,0xaf194f9b,0xab1c5ed5,0xda6d8118
173392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0xd807aa98,0xa3030242,0x12835b01,0x45706fbe
174392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x243185be,0x4ee4b28c,0x550c7dc3,0xd5ffb4e2
175392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x72be5d74,0xf27b896f,0x80deb1fe,0x3b1696b1
176392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x9bdc06a7,0x25c71235,0xc19bf174,0xcf692694
177392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0xe49b69c1,0x9ef14ad2,0xefbe4786,0x384f25e3
178392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x0fc19dc6,0x8b8cd5b5,0x240ca1cc,0x77ac9c65
179392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x2de92c6f,0x592b0275,0x4a7484aa,0x6ea6e483
180392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x5cb0a9dc,0xbd41fbd4,0x76f988da,0x831153b5
181392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x983e5152,0xee66dfab,0xa831c66d,0x2db43210
182392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0xb00327c8,0x98fb213f,0xbf597fc7,0xbeef0ee4
183392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0xc6e00bf3,0x3da88fc2,0xd5a79147,0x930aa725
184392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x06ca6351,0xe003826f,0x14292967,0x0a0e6e70
185392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x27b70a85,0x46d22ffc,0x2e1b2138,0x5c26c926
186392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x4d2c6dfc,0x5ac42aed,0x53380d13,0x9d95b3df
187392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x650a7354,0x8baf63de,0x766a0abb,0x3c77b2a8
188392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x81c2c92e,0x47edaee6,0x92722c85,0x1482353b
189392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0xa2bfe8a1,0x4cf10364,0xa81a664b,0xbc423001
190392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0xc24b8b70,0xd0f89791,0xc76c51a3,0x0654be30
191392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0xd192e819,0xd6ef5218,0xd6990624,0x5565a910
192392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0xf40e3585,0x5771202a,0x106aa070,0x32bbd1b8
193392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x19a4c116,0xb8d2d0c8,0x1e376c08,0x5141ab53
194392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x2748774c,0xdf8eeb99,0x34b0bcb5,0xe19b48a8
195392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x391c0cb3,0xc5c95a63,0x4ed8aa4a,0xe3418acb
196392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x5b9cca4f,0x7763e373,0x682e6ff3,0xd6b2b8a3
197392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x748f82ee,0x5defb2fc,0x78a5636f,0x43172f60
198392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x84c87814,0xa1f0ab72,0x8cc70208,0x1a6439ec
199392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x90befffa,0x23631e28,0xa4506ceb,0xde82bde9
200392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0xbef9a3f7,0xb2c67915,0xc67178f2,0xe372532b
201392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0xca273ece,0xea26619c,0xd186b8c7,0x21c0c207
202392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0xeada7dd6,0xcde0eb1e,0xf57d4f7f,0xee6ed178
203392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x06f067aa,0x72176fba,0x0a637dc5,0xa2c898a6
204392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x113f9804,0xbef90dae,0x1b710b35,0x131c471b
205392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x28db77f5,0x23047d84,0x32caab7b,0x40c72493
206392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x3c9ebe0a,0x15c9bebc,0x431d67c4,0x9c100d4c
207392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x4cc5d4be,0xcb3e42b6,0x597f299c,0xfc657e2a
208392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x5fcb6fab,0x3ad6faec,0x6c44198c,0x4a475817
209392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
210392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($SZ==4);
211392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
212392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
213392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
214392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
215392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
216392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
217392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
218392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
219392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
220392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
221392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
222392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
223392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
224392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
225392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
226392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
227392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
228392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
229392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
230392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.EXPORT	$func,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
231392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.ALIGN	64
232392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$func
233392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.PROC
234392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.CALLINFO	FRAME=`$FRAME-16*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=18
235392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.ENTRY
236392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	%r2,-$SAVED_RP(%sp)	; standard prologue
237392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSHMA	%r3,$FRAME(%sp)
238392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	%r4,`-$FRAME+1*$SIZE_T`(%sp)
239392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	%r5,`-$FRAME+2*$SIZE_T`(%sp)
240392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	%r6,`-$FRAME+3*$SIZE_T`(%sp)
241392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	%r7,`-$FRAME+4*$SIZE_T`(%sp)
242392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	%r8,`-$FRAME+5*$SIZE_T`(%sp)
243392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	%r9,`-$FRAME+6*$SIZE_T`(%sp)
244392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	%r10,`-$FRAME+7*$SIZE_T`(%sp)
245392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	%r11,`-$FRAME+8*$SIZE_T`(%sp)
246392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	%r12,`-$FRAME+9*$SIZE_T`(%sp)
247392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	%r13,`-$FRAME+10*$SIZE_T`(%sp)
248392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	%r14,`-$FRAME+11*$SIZE_T`(%sp)
249392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	%r15,`-$FRAME+12*$SIZE_T`(%sp)
250392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	%r16,`-$FRAME+13*$SIZE_T`(%sp)
251392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	%r17,`-$FRAME+14*$SIZE_T`(%sp)
252392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	%r18,`-$FRAME+15*$SIZE_T`(%sp)
253392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
254392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	_shl	$num,`log(16*$SZ)/log(2)`,$num
255392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	addl	$inp,$num,$num		; $num to point at the end of $inp
256392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
257392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	$num,`-$FRAME_MARKER-4*$SIZE_T`(%sp)	; save arguments
258392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	$inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp)
259392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	$ctx,`-$FRAME_MARKER-2*$SIZE_T`(%sp)
260392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
261392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	blr	%r0,$Tbl
262392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldi	3,$t1
263392aa7cc7d2b122614c5393c3e357da07fd07af3Brian CarlstromL\$pic
264392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	andcm	$Tbl,$t1,$Tbl		; wipe privilege level
265392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldo	L\$table-L\$pic($Tbl),$Tbl
266392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
267392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($SZ==8 && $SIZE_T==4);
268392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldi	31,$t1
269392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mtctl	$t1,%cr11
270392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	extrd,u,*= $t1,%sar,1,$t1	; executes on PA-RISC 1.0
271392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	b	L\$parisc1
272392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	nop
273392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
274392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
275392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$LD	`0*$SZ`($ctx),$A	; load context
276392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$LD	`1*$SZ`($ctx),$B
277392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$LD	`2*$SZ`($ctx),$C
278392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$LD	`3*$SZ`($ctx),$D
279392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$LD	`4*$SZ`($ctx),$E
280392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$LD	`5*$SZ`($ctx),$F
281392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$LD	`6*$SZ`($ctx),$G
282392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$LD	`7*$SZ`($ctx),$H
283392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
284392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	extru	$inp,31,`log($SZ)/log(2)`,$t0
285392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	sh3addl	$t0,%r0,$t0
286392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	subi	`8*$SZ`,$t0,$t0
287392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mtctl	$t0,%cr11		; load %sar with align factor
288392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
289392aa7cc7d2b122614c5393c3e357da07fd07af3Brian CarlstromL\$oop
290392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldi	`$SZ-1`,$t0
291392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$LDM	$SZ($Tbl),$t1
292392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	andcm	$inp,$t0,$t0		; align $inp
293392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
294392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	for ($i=0;$i<15;$i++) {		# load input block
295392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$code.="\t$LD	`$SZ*$i`($t0),@X[$i]\n";		}
296392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
297392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	cmpb,*=	$inp,$t0,L\$aligned
298392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$LD	`$SZ*15`($t0),@X[15]
299392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$LD	`$SZ*16`($t0),@X[16]
300392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
301392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	for ($i=0;$i<16;$i++) {		# align data
302392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$code.="\t_align	@X[$i],@X[$i+1],@X[$i]\n";	}
303392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
304392aa7cc7d2b122614c5393c3e357da07fd07af3Brian CarlstromL\$aligned
305392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	nop	; otherwise /usr/ccs/bin/as is confused by below .WORD
306392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
307392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
308392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromfor($i=0;$i<16;$i++)	{ &ROUND_00_15($i,@V); unshift(@V,pop(@V)); }
309392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
310392aa7cc7d2b122614c5393c3e357da07fd07af3Brian CarlstromL\$rounds
311392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	nop	; otherwise /usr/ccs/bin/as is confused by below .WORD
312392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
313392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromfor(;$i<32;$i++)	{ &ROUND_16_xx($i,@V); unshift(@V,pop(@V)); }
314392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
315392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	bb,>=	$Tbl,31,L\$rounds	; end of $Tbl signalled?
316392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	nop
317392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
318392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx	; restore arguments
319392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp
320392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME_MARKER-4*$SIZE_T`(%sp),$num
321392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldo	`-$rounds*$SZ-1`($Tbl),$Tbl		; rewind $Tbl
322392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
323392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$LD	`0*$SZ`($ctx),@X[0]	; load context
324392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$LD	`1*$SZ`($ctx),@X[1]
325392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$LD	`2*$SZ`($ctx),@X[2]
326392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$LD	`3*$SZ`($ctx),@X[3]
327392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$LD	`4*$SZ`($ctx),@X[4]
328392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$LD	`5*$SZ`($ctx),@X[5]
329392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	addl	@X[0],$A,$A
330392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$LD	`6*$SZ`($ctx),@X[6]
331392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	addl	@X[1],$B,$B
332392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$LD	`7*$SZ`($ctx),@X[7]
333392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldo	`16*$SZ`($inp),$inp	; advance $inp
334392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
335392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$ST	$A,`0*$SZ`($ctx)	; save context
336392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	addl	@X[2],$C,$C
337392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$ST	$B,`1*$SZ`($ctx)
338392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	addl	@X[3],$D,$D
339392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$ST	$C,`2*$SZ`($ctx)
340392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	addl	@X[4],$E,$E
341392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$ST	$D,`3*$SZ`($ctx)
342392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	addl	@X[5],$F,$F
343392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$ST	$E,`4*$SZ`($ctx)
344392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	addl	@X[6],$G,$G
345392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$ST	$F,`5*$SZ`($ctx)
346392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	addl	@X[7],$H,$H
347392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$ST	$G,`6*$SZ`($ctx)
348392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$ST	$H,`7*$SZ`($ctx)
349392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
350392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	cmpb,*<>,n $inp,$num,L\$oop
351392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	$inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp)	; save $inp
352392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
353392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromif ($SZ==8 && $SIZE_T==4)	# SHA512 for 32-bit PA-RISC 1.0
354392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom{{
355392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
356392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	b	L\$done
357392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	nop
358392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
359392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.ALIGN	64
360392aa7cc7d2b122614c5393c3e357da07fd07af3Brian CarlstromL\$parisc1
361392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
362392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
363392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom@V=(  $Ahi,  $Alo,  $Bhi,  $Blo,  $Chi,  $Clo,  $Dhi,  $Dlo,
364392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom      $Ehi,  $Elo,  $Fhi,  $Flo,  $Ghi,  $Glo,  $Hhi,  $Hlo) =
365392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom   ( "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
366392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom     "%r9","%r10","%r11","%r12","%r13","%r14","%r15","%r16");
367392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$a0 ="%r17";
368392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$a1 ="%r18";
369392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$a2 ="%r19";
370392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$a3 ="%r20";
371392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$t0 ="%r21";
372392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$t1 ="%r22";
373392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$t2 ="%r28";
374392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$t3 ="%r29";
375392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$Tbl="%r31";
376392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
377392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom@X=("%r23","%r24","%r25","%r26");	# zaps $num,$inp,$ctx
378392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
379392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromsub ROUND_00_15_pa1 {
380392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy ($i,$ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo,
381392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom       $ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo,$flag)=@_;
382392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X;
383392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
384392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if (!$flag);
385392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi
386392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo	; load X[i+1]
387392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
388392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
389392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shd	$ehi,$elo,$Sigma1[0],$t0
390392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	 add	$Xlo,$hlo,$hlo
391392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shd	$elo,$ehi,$Sigma1[0],$t1
392392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	 addc	$Xhi,$hhi,$hhi		; h += X[i]
393392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shd	$ehi,$elo,$Sigma1[1],$t2
394392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	 ldwm	8($Tbl),$Xhi
395392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shd	$elo,$ehi,$Sigma1[1],$t3
396392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	 ldw	-4($Tbl),$Xlo		; load K[i]
397392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$t2,$t0,$t0
398392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$t3,$t1,$t1
399392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	 and	$flo,$elo,$a0
400392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	 and	$fhi,$ehi,$a1
401392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shd	$ehi,$elo,$Sigma1[2],$t2
402392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	 andcm	$glo,$elo,$a2
403392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shd	$elo,$ehi,$Sigma1[2],$t3
404392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	 andcm	$ghi,$ehi,$a3
405392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$t2,$t0,$t0
406392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$t3,$t1,$t1		; Sigma1(e)
407392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	add	$Xlo,$hlo,$hlo
408392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	 xor	$a2,$a0,$a0
409392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	addc	$Xhi,$hhi,$hhi		; h += K[i]
410392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	 xor	$a3,$a1,$a1		; Ch(e,f,g)
411392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
412392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	 add	$t0,$hlo,$hlo
413392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shd	$ahi,$alo,$Sigma0[0],$t0
414392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	 addc	$t1,$hhi,$hhi		; h += Sigma1(e)
415392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shd	$alo,$ahi,$Sigma0[0],$t1
416392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	 add	$a0,$hlo,$hlo
417392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shd	$ahi,$alo,$Sigma0[1],$t2
418392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	 addc	$a1,$hhi,$hhi		; h += Ch(e,f,g)
419392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shd	$alo,$ahi,$Sigma0[1],$t3
420392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
421392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$t2,$t0,$t0
422392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$t3,$t1,$t1
423392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shd	$ahi,$alo,$Sigma0[2],$t2
424392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	and	$alo,$blo,$a0
425392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shd	$alo,$ahi,$Sigma0[2],$t3
426392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	and	$ahi,$bhi,$a1
427392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$t2,$t0,$t0
428392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$t3,$t1,$t1		; Sigma0(a)
429392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
430392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	and	$alo,$clo,$a2
431392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	and	$ahi,$chi,$a3
432392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$a2,$a0,$a0
433392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	 add	$hlo,$dlo,$dlo
434392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$a3,$a1,$a1
435392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	 addc	$hhi,$dhi,$dhi		; d += h
436392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	and	$blo,$clo,$a2
437392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	 add	$t0,$hlo,$hlo
438392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	and	$bhi,$chi,$a3
439392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	 addc	$t1,$hhi,$hhi		; h += Sigma0(a)
440392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$a2,$a0,$a0
441392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	 add	$a0,$hlo,$hlo
442392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$a3,$a1,$a1		; Maj(a,b,c)
443392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	 addc	$a1,$hhi,$hhi		; h += Maj(a,b,c)
444392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
445392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
446392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($i==15 && $flag);
447392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	extru	$Xlo,31,10,$Xlo
448392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	comiclr,= $LAST10BITS,$Xlo,%r0
449392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	b	L\$rounds_pa1
450392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	nop
451392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
452392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrompush(@X,shift(@X)); push(@X,shift(@X));
453392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom}
454392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
455392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromsub ROUND_16_xx_pa1 {
456392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X;
457392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy ($i)=shift;
458392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$i-=16;
459392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
460392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi
461392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo	; load X[i+1]
462392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`-$XOFF+8*(($i+9)%16)`(%sp),$a1
463392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`-$XOFF+8*(($i+9)%16)+4`(%sp),$a0	; load X[i+9]
464392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`-$XOFF+8*(($i+14)%16)`(%sp),$a3
465392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`-$XOFF+8*(($i+14)%16)+4`(%sp),$a2	; load X[i+14]
466392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shd	$Xnhi,$Xnlo,$sigma0[0],$t0
467392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shd	$Xnlo,$Xnhi,$sigma0[0],$t1
468392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	 add	$a0,$Xlo,$Xlo
469392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shd	$Xnhi,$Xnlo,$sigma0[1],$t2
470392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	 addc	$a1,$Xhi,$Xhi
471392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shd	$Xnlo,$Xnhi,$sigma0[1],$t3
472392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$t2,$t0,$t0
473392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shd	$Xnhi,$Xnlo,$sigma0[2],$t2
474392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$t3,$t1,$t1
475392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	extru	$Xnhi,`31-$sigma0[2]`,`32-$sigma0[2]`,$t3
476392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$t2,$t0,$t0
477392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	 shd	$a3,$a2,$sigma1[0],$a0
478392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$t3,$t1,$t1		; sigma0(X[i+1)&0x0f])
479392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	 shd	$a2,$a3,$sigma1[0],$a1
480392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	add	$t0,$Xlo,$Xlo
481392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	 shd	$a3,$a2,$sigma1[1],$t2
482392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	addc	$t1,$Xhi,$Xhi
483392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	 shd	$a2,$a3,$sigma1[1],$t3
484392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$t2,$a0,$a0
485392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shd	$a3,$a2,$sigma1[2],$t2
486392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$t3,$a1,$a1
487392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	extru	$a3,`31-$sigma1[2]`,`32-$sigma1[2]`,$t3
488392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$t2,$a0,$a0
489392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$t3,$a1,$a1		; sigma0(X[i+14)&0x0f])
490392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	add	$a0,$Xlo,$Xlo
491392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	addc	$a1,$Xhi,$Xhi
492392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
493392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stw	$Xhi,`-$XOFF+8*($i%16)`(%sp)
494392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stw	$Xlo,`-$XOFF+8*($i%16)+4`(%sp)
495392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
496392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom&ROUND_00_15_pa1($i,@_,1);
497392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom}
498392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
499392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`0*4`($ctx),$Ahi		; load context
500392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`1*4`($ctx),$Alo
501392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`2*4`($ctx),$Bhi
502392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`3*4`($ctx),$Blo
503392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`4*4`($ctx),$Chi
504392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`5*4`($ctx),$Clo
505392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`6*4`($ctx),$Dhi
506392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`7*4`($ctx),$Dlo
507392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`8*4`($ctx),$Ehi
508392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`9*4`($ctx),$Elo
509392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`10*4`($ctx),$Fhi
510392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`11*4`($ctx),$Flo
511392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`12*4`($ctx),$Ghi
512392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`13*4`($ctx),$Glo
513392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`14*4`($ctx),$Hhi
514392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`15*4`($ctx),$Hlo
515392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
516392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	extru	$inp,31,2,$t0
517392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	sh3addl	$t0,%r0,$t0
518392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	subi	32,$t0,$t0
519392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mtctl	$t0,%cr11		; load %sar with align factor
520392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
521392aa7cc7d2b122614c5393c3e357da07fd07af3Brian CarlstromL\$oop_pa1
522392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	extru	$inp,31,2,$a3
523392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	comib,=	0,$a3,L\$aligned_pa1
524392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	sub	$inp,$a3,$inp
525392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
526392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`0*4`($inp),$X[0]
527392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`1*4`($inp),$X[1]
528392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`2*4`($inp),$t2
529392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`3*4`($inp),$t3
530392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`4*4`($inp),$a0
531392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`5*4`($inp),$a1
532392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`6*4`($inp),$a2
533392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`7*4`($inp),$a3
534392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	vshd	$X[0],$X[1],$X[0]
535392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	vshd	$X[1],$t2,$X[1]
536392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stw	$X[0],`-$XOFF+0*4`(%sp)
537392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`8*4`($inp),$t0
538392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	vshd	$t2,$t3,$t2
539392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stw	$X[1],`-$XOFF+1*4`(%sp)
540392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`9*4`($inp),$t1
541392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	vshd	$t3,$a0,$t3
542392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
543392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom{
544392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1);
545392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromfor ($i=2;$i<=(128/4-8);$i++) {
546392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
547392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stw	$t[0],`-$XOFF+$i*4`(%sp)
548392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`(8+$i)*4`($inp),$t[0]
549392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	vshd	$t[1],$t[2],$t[1]
550392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
551392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrompush(@t,shift(@t));
552392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom}
553392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromfor (;$i<(128/4-1);$i++) {
554392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
555392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stw	$t[0],`-$XOFF+$i*4`(%sp)
556392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	vshd	$t[1],$t[2],$t[1]
557392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
558392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrompush(@t,shift(@t));
559392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom}
560392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
561392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	b	L\$collected_pa1
562392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stw	$t[0],`-$XOFF+$i*4`(%sp)
563392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
564392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
565392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom}
566392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
567392aa7cc7d2b122614c5393c3e357da07fd07af3Brian CarlstromL\$aligned_pa1
568392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`0*4`($inp),$X[0]
569392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`1*4`($inp),$X[1]
570392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`2*4`($inp),$t2
571392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`3*4`($inp),$t3
572392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`4*4`($inp),$a0
573392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`5*4`($inp),$a1
574392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`6*4`($inp),$a2
575392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`7*4`($inp),$a3
576392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stw	$X[0],`-$XOFF+0*4`(%sp)
577392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`8*4`($inp),$t0
578392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stw	$X[1],`-$XOFF+1*4`(%sp)
579392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`9*4`($inp),$t1
580392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
581392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom{
582392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1);
583392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromfor ($i=2;$i<(128/4-8);$i++) {
584392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
585392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stw	$t[0],`-$XOFF+$i*4`(%sp)
586392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`(8+$i)*4`($inp),$t[0]
587392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
588392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrompush(@t,shift(@t));
589392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom}
590392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromfor (;$i<128/4;$i++) {
591392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
592392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stw	$t[0],`-$XOFF+$i*4`(%sp)
593392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
594392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrompush(@t,shift(@t));
595392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom}
596392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.="L\$collected_pa1\n";
597392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom}
598392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
599392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromfor($i=0;$i<16;$i++)	{ &ROUND_00_15_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); }
600392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.="L\$rounds_pa1\n";
601392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromfor(;$i<32;$i++)	{ &ROUND_16_xx_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); }
602392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
603392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
604392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx	; restore arguments
605392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp
606392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME_MARKER-4*$SIZE_T`(%sp),$num
607392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldo	`-$rounds*$SZ`($Tbl),$Tbl		; rewind $Tbl
608392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
609392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`0*4`($ctx),$t1		; update context
610392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`1*4`($ctx),$t0
611392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`2*4`($ctx),$t3
612392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`3*4`($ctx),$t2
613392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`4*4`($ctx),$a1
614392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`5*4`($ctx),$a0
615392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`6*4`($ctx),$a3
616392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	add	$t0,$Alo,$Alo
617392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`7*4`($ctx),$a2
618392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	addc	$t1,$Ahi,$Ahi
619392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`8*4`($ctx),$t1
620392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	add	$t2,$Blo,$Blo
621392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`9*4`($ctx),$t0
622392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	addc	$t3,$Bhi,$Bhi
623392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`10*4`($ctx),$t3
624392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	add	$a0,$Clo,$Clo
625392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`11*4`($ctx),$t2
626392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	addc	$a1,$Chi,$Chi
627392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`12*4`($ctx),$a1
628392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	add	$a2,$Dlo,$Dlo
629392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`13*4`($ctx),$a0
630392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	addc	$a3,$Dhi,$Dhi
631392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`14*4`($ctx),$a3
632392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	add	$t0,$Elo,$Elo
633392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldw	`15*4`($ctx),$a2
634392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	addc	$t1,$Ehi,$Ehi
635392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stw	$Ahi,`0*4`($ctx)
636392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	add	$t2,$Flo,$Flo
637392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stw	$Alo,`1*4`($ctx)
638392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	addc	$t3,$Fhi,$Fhi
639392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stw	$Bhi,`2*4`($ctx)
640392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	add	$a0,$Glo,$Glo
641392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stw	$Blo,`3*4`($ctx)
642392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	addc	$a1,$Ghi,$Ghi
643392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stw	$Chi,`4*4`($ctx)
644392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	add	$a2,$Hlo,$Hlo
645392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stw	$Clo,`5*4`($ctx)
646392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	addc	$a3,$Hhi,$Hhi
647392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stw	$Dhi,`6*4`($ctx)
648392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldo	`16*$SZ`($inp),$inp	; advance $inp
649392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stw	$Dlo,`7*4`($ctx)
650392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stw	$Ehi,`8*4`($ctx)
651392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stw	$Elo,`9*4`($ctx)
652392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stw	$Fhi,`10*4`($ctx)
653392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stw	$Flo,`11*4`($ctx)
654392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stw	$Ghi,`12*4`($ctx)
655392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stw	$Glo,`13*4`($ctx)
656392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stw	$Hhi,`14*4`($ctx)
657392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	comb,=	$inp,$num,L\$done
658392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stw	$Hlo,`15*4`($ctx)
659392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	b	L\$oop_pa1
660392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	$inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp)	; save $inp
661392aa7cc7d2b122614c5393c3e357da07fd07af3Brian CarlstromL\$done
662392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
663392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom}}
664392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
665392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME-$SAVED_RP`(%sp),%r2		; standard epilogue
666392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME+1*$SIZE_T`(%sp),%r4
667392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME+2*$SIZE_T`(%sp),%r5
668392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME+3*$SIZE_T`(%sp),%r6
669392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME+4*$SIZE_T`(%sp),%r7
670392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME+5*$SIZE_T`(%sp),%r8
671392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME+6*$SIZE_T`(%sp),%r9
672392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME+7*$SIZE_T`(%sp),%r10
673392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME+8*$SIZE_T`(%sp),%r11
674392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME+9*$SIZE_T`(%sp),%r12
675392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME+10*$SIZE_T`(%sp),%r13
676392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME+11*$SIZE_T`(%sp),%r14
677392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME+12*$SIZE_T`(%sp),%r15
678392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME+13*$SIZE_T`(%sp),%r16
679392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME+14*$SIZE_T`(%sp),%r17
680392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME+15*$SIZE_T`(%sp),%r18
681392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	bv	(%r2)
682392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.EXIT
683392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POPMB	-$FRAME(%sp),%r3
684392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.PROCEND
685392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.STRINGZ "SHA`64*$SZ` block transform for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
686392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
687392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
688392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Explicitly encode PA-RISC 2.0 instructions used in this module, so
689392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# that it can be compiled with .LEVEL 1.0. It should be noted that I
690392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0
691392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# directive...
692392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
693392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy $ldd = sub {
694392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom  my ($mod,$args) = @_;
695392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom  my $orig = "ldd$mod\t$args";
696392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
697392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    if ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 3 suffices
698392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    {	my $opcode=(0x14<<26)|($2<<21)|($3<<16)|(($1&0x1FF8)<<1)|(($1>>13)&1);
699392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$opcode|=(1<<3) if ($mod =~ /^,m/);
700392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$opcode|=(1<<2) if ($mod =~ /^,mb/);
701392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
702392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    }
703392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    else { "\t".$orig; }
704392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom};
705392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
706392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy $std = sub {
707392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom  my ($mod,$args) = @_;
708392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom  my $orig = "std$mod\t$args";
709392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
710392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices
711392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    {	my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1);
712392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
713392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    }
714392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    else { "\t".$orig; }
715392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom};
716392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
717392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy $extrd = sub {
718392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom  my ($mod,$args) = @_;
719392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom  my $orig = "extrd$mod\t$args";
720392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
721392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    # I only have ",u" completer, it's implicitly encoded...
722392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/)	# format 15
723392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    {	my $opcode=(0x36<<26)|($1<<21)|($4<<16);
724392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	my $len=32-$3;
725392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5);		# encode pos
726392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$opcode |= (($len&0x20)<<7)|($len&0x1f);		# encode len
727392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
728392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    }
729392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/)	# format 12
730392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    {	my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9);
731392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	my $len=32-$2;
732392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$opcode |= (($len&0x20)<<3)|($len&0x1f);		# encode len
733392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$opcode |= (1<<13) if ($mod =~ /,\**=/);
734392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
735392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    }
736392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    else { "\t".$orig; }
737392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom};
738392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
739392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy $shrpd = sub {
740392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom  my ($mod,$args) = @_;
741392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom  my $orig = "shrpd$mod\t$args";
742392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
743392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/)	# format 14
744392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    {	my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4;
745392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	my $cpos=63-$3;
746392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5);		# encode sa
747392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
748392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    }
749392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/)	# format 11
750392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    {	sprintf "\t.WORD\t0x%08x\t; %s",
751392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom		(0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig;
752392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    }
753392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    else { "\t".$orig; }
754392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom};
755392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
756392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromsub assemble {
757392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom  my ($mnemonic,$mod,$args)=@_;
758392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom  my $opcode = eval("\$$mnemonic");
759392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
760392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
761392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom}
762392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
763392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromforeach (split("\n",$code)) {
764392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	s/\`([^\`]*)\`/eval $1/ge;
765392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
766392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	s/shd\s+(%r[0-9]+),(%r[0-9]+),([0-9]+)/
767392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom		$3>31 ? sprintf("shd\t%$2,%$1,%d",$3-32)	# rotation for >=32
768392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom		:       sprintf("shd\t%$1,%$2,%d",$3)/e			or
769392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	# translate made up instructons: _ror, _shr, _align, _shl
770392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	s/_ror(\s+)(%r[0-9]+),/
771392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom		($SZ==4 ? "shd" : "shrpd")."$1$2,$2,"/e			or
772392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
773392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	s/_shr(\s+%r[0-9]+),([0-9]+),/
774392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom		$SZ==4 ? sprintf("extru%s,%d,%d,",$1,31-$2,32-$2)
775392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom		:        sprintf("extrd,u%s,%d,%d,",$1,63-$2,64-$2)/e	or
776392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
777392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	s/_align(\s+%r[0-9]+,%r[0-9]+),/
778392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom		($SZ==4 ? "vshd$1," : "shrpd$1,%sar,")/e		or
779392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
780392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	s/_shl(\s+%r[0-9]+),([0-9]+),/
781392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom		$SIZE_T==4 ? sprintf("zdep%s,%d,%d,",$1,31-$2,32-$2)
782392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom		:            sprintf("depd,z%s,%d,%d,",$1,63-$2,64-$2)/e;
783392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
784392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e if ($SIZE_T==4);
785392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
786392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	s/cmpb,\*/comb,/ if ($SIZE_T==4);
787392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
788392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	print $_,"\n";
789392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom}
790392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
791392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromclose STDOUT;
792