1392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#!/usr/bin/env perl
2392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#
3392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# ====================================================================
4392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# project. The module is, however, dual licensed under OpenSSL and
6392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# CRYPTOGAMS licenses depending on where you obtain it. For further
7392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# details see http://www.openssl.org/~appro/cryptogams/.
8392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# ====================================================================
9392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#
10392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# April 2010
11392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#
12392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# The module implements "4-bit" GCM GHASH function and underlying
13392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# single multiplication operation in GF(2^128). "4-bit" means that it
14392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# uses 256 bytes per-key table [+128 bytes shared table]. On PA-7100LC
15392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# it processes one byte in 19.6 cycles, which is more than twice as
16392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# fast as code generated by gcc 3.2. PA-RISC 2.0 loop is scheduled for
17392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 8 cycles, but measured performance on PA-8600 system is ~9 cycles per
18392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# processed byte. This is ~2.2x faster than 64-bit code generated by
19392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# vendor compiler (which used to be very hard to beat:-).
20392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#
21392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Special thanks to polarhome.com for providing HP-UX account.
22392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
23392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$flavour = shift;
24392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$output = shift;
25392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromopen STDOUT,">$output";
26392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
27392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromif ($flavour =~ /64/) {
28392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$LEVEL		="2.0W";
29392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$SIZE_T		=8;
30392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$FRAME_MARKER	=80;
31392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$SAVED_RP	=16;
32392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH		="std";
33392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSHMA		="std,ma";
34392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP		="ldd";
35392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POPMB		="ldd,mb";
36392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$NREGS		=6;
37392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom} else {
38392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$LEVEL		="1.0";	#"\n\t.ALLOW\t2.0";
39392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$SIZE_T		=4;
40392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$FRAME_MARKER	=48;
41392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$SAVED_RP	=20;
42392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH		="stw";
43392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSHMA		="stwm";
44392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP		="ldw";
45392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POPMB		="ldwm";
46392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$NREGS		=11;
47392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom}
48392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
49392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$FRAME=10*$SIZE_T+$FRAME_MARKER;# NREGS saved regs + frame marker
50392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom				#                 [+ argument transfer]
51392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
52392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom################# volatile registers
53392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$Xi="%r26";	# argument block
54392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$Htbl="%r25";
55392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$inp="%r24";
56392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$len="%r23";
57392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$Hhh=$Htbl;	# variables
58392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$Hll="%r22";
59392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$Zhh="%r21";
60392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$Zll="%r20";
61392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$cnt="%r19";
62392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$rem_4bit="%r28";
63392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$rem="%r29";
64392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$mask0xf0="%r31";
65392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
66392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom################# preserved registers
67392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$Thh="%r1";
68392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$Tll="%r2";
69392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$nlo="%r3";
70392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$nhi="%r4";
71392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$byte="%r5";
72392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromif ($SIZE_T==4) {
73392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$Zhl="%r6";
74392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$Zlh="%r7";
75392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$Hhl="%r8";
76392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$Hlh="%r9";
77392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$Thl="%r10";
78392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$Tlh="%r11";
79392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom}
80392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$rem2="%r6";	# used in PA-RISC 2.0 code
81392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
82392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
83392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.LEVEL	$LEVEL
84392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.SPACE	\$TEXT\$
85392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.SUBSPA	\$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
86392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
87392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.EXPORT	gcm_gmult_4bit,ENTRY,ARGW0=GR,ARGW1=GR
88392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.ALIGN	64
89392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromgcm_gmult_4bit
90392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.PROC
91392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.CALLINFO	FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=$NREGS
92392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.ENTRY
93392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	%r2,-$SAVED_RP(%sp)	; standard prologue
94392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSHMA	%r3,$FRAME(%sp)
95392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	%r4,`-$FRAME+1*$SIZE_T`(%sp)
96392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	%r5,`-$FRAME+2*$SIZE_T`(%sp)
97392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	%r6,`-$FRAME+3*$SIZE_T`(%sp)
98392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
99392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($SIZE_T==4);
100392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	%r7,`-$FRAME+4*$SIZE_T`(%sp)
101392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	%r8,`-$FRAME+5*$SIZE_T`(%sp)
102392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	%r9,`-$FRAME+6*$SIZE_T`(%sp)
103392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	%r10,`-$FRAME+7*$SIZE_T`(%sp)
104392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	%r11,`-$FRAME+8*$SIZE_T`(%sp)
105392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
106392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
107392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	blr	%r0,$rem_4bit
108392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldi	3,$rem
109392aa7cc7d2b122614c5393c3e357da07fd07af3Brian CarlstromL\$pic_gmult
110392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	andcm	$rem_4bit,$rem,$rem_4bit
111392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	addl	$inp,$len,$len
112392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldo	L\$rem_4bit-L\$pic_gmult($rem_4bit),$rem_4bit
113392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldi	0xf0,$mask0xf0
114392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
115392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($SIZE_T==4);
116392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldi	31,$rem
117392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mtctl	$rem,%cr11
118392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	extrd,u,*= $rem,%sar,1,$rem	; executes on PA-RISC 1.0
119392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	b	L\$parisc1_gmult
120392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	nop
121392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
122392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
123392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
124392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldb	15($Xi),$nlo
125392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldo	8($Htbl),$Hll
126392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
127392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	and	$mask0xf0,$nlo,$nhi
128392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	depd,z	$nlo,59,4,$nlo
129392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
130392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldd	$nlo($Hll),$Zll
131392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldd	$nlo($Hhh),$Zhh
132392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
133392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	depd,z	$Zll,60,4,$rem
134392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpd	$Zhh,$Zll,4,$Zll
135392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	extrd,u	$Zhh,59,60,$Zhh
136392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldb	14($Xi),$nlo
137392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
138392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldd	$nhi($Hll),$Tll
139392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldd	$nhi($Hhh),$Thh
140392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	and	$mask0xf0,$nlo,$nhi
141392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	depd,z	$nlo,59,4,$nlo
142392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
143392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Tll,$Zll,$Zll
144392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Thh,$Zhh,$Zhh
145392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldd	$rem($rem_4bit),$rem
146392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	b	L\$oop_gmult_pa2
147392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldi	13,$cnt
148392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
149392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.ALIGN	8
150392aa7cc7d2b122614c5393c3e357da07fd07af3Brian CarlstromL\$oop_gmult_pa2
151392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$rem,$Zhh,$Zhh		; moved here to work around gas bug
152392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	depd,z	$Zll,60,4,$rem
153392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
154392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpd	$Zhh,$Zll,4,$Zll
155392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	extrd,u	$Zhh,59,60,$Zhh
156392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldd	$nlo($Hll),$Tll
157392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldd	$nlo($Hhh),$Thh
158392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
159392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Tll,$Zll,$Zll
160392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Thh,$Zhh,$Zhh
161392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldd	$rem($rem_4bit),$rem
162392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
163392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$rem,$Zhh,$Zhh
164392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	depd,z	$Zll,60,4,$rem
165392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldbx	$cnt($Xi),$nlo
166392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
167392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpd	$Zhh,$Zll,4,$Zll
168392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	extrd,u	$Zhh,59,60,$Zhh
169392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldd	$nhi($Hll),$Tll
170392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldd	$nhi($Hhh),$Thh
171392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
172392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	and	$mask0xf0,$nlo,$nhi
173392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	depd,z	$nlo,59,4,$nlo
174392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldd	$rem($rem_4bit),$rem
175392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
176392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Tll,$Zll,$Zll
177392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	addib,uv -1,$cnt,L\$oop_gmult_pa2
178392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Thh,$Zhh,$Zhh
179392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
180392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$rem,$Zhh,$Zhh
181392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	depd,z	$Zll,60,4,$rem
182392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
183392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpd	$Zhh,$Zll,4,$Zll
184392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	extrd,u	$Zhh,59,60,$Zhh
185392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldd	$nlo($Hll),$Tll
186392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldd	$nlo($Hhh),$Thh
187392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
188392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Tll,$Zll,$Zll
189392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Thh,$Zhh,$Zhh
190392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldd	$rem($rem_4bit),$rem
191392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
192392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$rem,$Zhh,$Zhh
193392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	depd,z	$Zll,60,4,$rem
194392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
195392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpd	$Zhh,$Zll,4,$Zll
196392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	extrd,u	$Zhh,59,60,$Zhh
197392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldd	$nhi($Hll),$Tll
198392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldd	$nhi($Hhh),$Thh
199392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
200392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Tll,$Zll,$Zll
201392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Thh,$Zhh,$Zhh
202392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldd	$rem($rem_4bit),$rem
203392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
204392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$rem,$Zhh,$Zhh
205392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	std	$Zll,8($Xi)
206392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	std	$Zhh,0($Xi)
207392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
208392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
209392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($SIZE_T==4);
210392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	b	L\$done_gmult
211392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	nop
212392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
213392aa7cc7d2b122614c5393c3e357da07fd07af3Brian CarlstromL\$parisc1_gmult
214392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldb	15($Xi),$nlo
215392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldo	12($Htbl),$Hll
216392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldo	8($Htbl),$Hlh
217392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldo	4($Htbl),$Hhl
218392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
219392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	and	$mask0xf0,$nlo,$nhi
220392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	zdep	$nlo,27,4,$nlo
221392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
222392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nlo($Hll),$Zll
223392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nlo($Hlh),$Zlh
224392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nlo($Hhl),$Zhl
225392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nlo($Hhh),$Zhh
226392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	zdep	$Zll,28,4,$rem
227392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldb	14($Xi),$nlo
228392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$rem($rem_4bit),$rem
229392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpw	$Zlh,$Zll,4,$Zll
230392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nhi($Hll),$Tll
231392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpw	$Zhl,$Zlh,4,$Zlh
232392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nhi($Hlh),$Tlh
233392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpw	$Zhh,$Zhl,4,$Zhl
234392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nhi($Hhl),$Thl
235392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	extru	$Zhh,27,28,$Zhh
236392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nhi($Hhh),$Thh
237392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$rem,$Zhh,$Zhh
238392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	and	$mask0xf0,$nlo,$nhi
239392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	zdep	$nlo,27,4,$nlo
240392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
241392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Tll,$Zll,$Zll
242392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nlo($Hll),$Tll
243392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Tlh,$Zlh,$Zlh
244392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nlo($Hlh),$Tlh
245392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Thl,$Zhl,$Zhl
246392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	b	L\$oop_gmult_pa1
247392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldi	13,$cnt
248392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
249392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.ALIGN	8
250392aa7cc7d2b122614c5393c3e357da07fd07af3Brian CarlstromL\$oop_gmult_pa1
251392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	zdep	$Zll,28,4,$rem
252392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nlo($Hhl),$Thl
253392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Thh,$Zhh,$Zhh
254392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$rem($rem_4bit),$rem
255392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpw	$Zlh,$Zll,4,$Zll
256392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nlo($Hhh),$Thh
257392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpw	$Zhl,$Zlh,4,$Zlh
258392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldbx	$cnt($Xi),$nlo
259392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Tll,$Zll,$Zll
260392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nhi($Hll),$Tll
261392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpw	$Zhh,$Zhl,4,$Zhl
262392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Tlh,$Zlh,$Zlh
263392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nhi($Hlh),$Tlh
264392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	extru	$Zhh,27,28,$Zhh
265392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Thl,$Zhl,$Zhl
266392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nhi($Hhl),$Thl
267392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$rem,$Zhh,$Zhh
268392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	zdep	$Zll,28,4,$rem
269392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Thh,$Zhh,$Zhh
270392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nhi($Hhh),$Thh
271392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpw	$Zlh,$Zll,4,$Zll
272392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$rem($rem_4bit),$rem
273392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpw	$Zhl,$Zlh,4,$Zlh
274392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpw	$Zhh,$Zhl,4,$Zhl
275392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	and	$mask0xf0,$nlo,$nhi
276392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	extru	$Zhh,27,28,$Zhh
277392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	zdep	$nlo,27,4,$nlo
278392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Tll,$Zll,$Zll
279392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nlo($Hll),$Tll
280392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Tlh,$Zlh,$Zlh
281392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nlo($Hlh),$Tlh
282392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$rem,$Zhh,$Zhh
283392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	addib,uv -1,$cnt,L\$oop_gmult_pa1
284392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Thl,$Zhl,$Zhl
285392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
286392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	zdep	$Zll,28,4,$rem
287392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nlo($Hhl),$Thl
288392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Thh,$Zhh,$Zhh
289392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$rem($rem_4bit),$rem
290392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpw	$Zlh,$Zll,4,$Zll
291392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nlo($Hhh),$Thh
292392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpw	$Zhl,$Zlh,4,$Zlh
293392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Tll,$Zll,$Zll
294392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nhi($Hll),$Tll
295392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpw	$Zhh,$Zhl,4,$Zhl
296392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Tlh,$Zlh,$Zlh
297392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nhi($Hlh),$Tlh
298392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	extru	$Zhh,27,28,$Zhh
299392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$rem,$Zhh,$Zhh
300392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Thl,$Zhl,$Zhl
301392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nhi($Hhl),$Thl
302392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Thh,$Zhh,$Zhh
303392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nhi($Hhh),$Thh
304392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	zdep	$Zll,28,4,$rem
305392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$rem($rem_4bit),$rem
306392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpw	$Zlh,$Zll,4,$Zll
307392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpw	$Zhl,$Zlh,4,$Zlh
308392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpw	$Zhh,$Zhl,4,$Zhl
309392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	extru	$Zhh,27,28,$Zhh
310392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Tll,$Zll,$Zll
311392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Tlh,$Zlh,$Zlh
312392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$rem,$Zhh,$Zhh
313392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stw	$Zll,12($Xi)
314392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Thl,$Zhl,$Zhl
315392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stw	$Zlh,8($Xi)
316392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Thh,$Zhh,$Zhh
317392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stw	$Zhl,4($Xi)
318392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stw	$Zhh,0($Xi)
319392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
320392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
321392aa7cc7d2b122614c5393c3e357da07fd07af3Brian CarlstromL\$done_gmult
322392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME-$SAVED_RP`(%sp),%r2		; standard epilogue
323392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME+1*$SIZE_T`(%sp),%r4
324392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME+2*$SIZE_T`(%sp),%r5
325392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME+3*$SIZE_T`(%sp),%r6
326392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
327392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($SIZE_T==4);
328392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME+4*$SIZE_T`(%sp),%r7
329392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME+5*$SIZE_T`(%sp),%r8
330392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME+6*$SIZE_T`(%sp),%r9
331392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME+7*$SIZE_T`(%sp),%r10
332392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME+8*$SIZE_T`(%sp),%r11
333392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
334392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
335392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	bv	(%r2)
336392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.EXIT
337392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POPMB	-$FRAME(%sp),%r3
338392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.PROCEND
339392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
340392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.EXPORT	gcm_ghash_4bit,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR
341392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.ALIGN	64
342392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromgcm_ghash_4bit
343392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.PROC
344392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.CALLINFO	FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=11
345392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.ENTRY
346392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	%r2,-$SAVED_RP(%sp)	; standard prologue
347392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSHMA	%r3,$FRAME(%sp)
348392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	%r4,`-$FRAME+1*$SIZE_T`(%sp)
349392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	%r5,`-$FRAME+2*$SIZE_T`(%sp)
350392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	%r6,`-$FRAME+3*$SIZE_T`(%sp)
351392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
352392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($SIZE_T==4);
353392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	%r7,`-$FRAME+4*$SIZE_T`(%sp)
354392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	%r8,`-$FRAME+5*$SIZE_T`(%sp)
355392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	%r9,`-$FRAME+6*$SIZE_T`(%sp)
356392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	%r10,`-$FRAME+7*$SIZE_T`(%sp)
357392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$PUSH	%r11,`-$FRAME+8*$SIZE_T`(%sp)
358392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
359392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
360392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	blr	%r0,$rem_4bit
361392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldi	3,$rem
362392aa7cc7d2b122614c5393c3e357da07fd07af3Brian CarlstromL\$pic_ghash
363392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	andcm	$rem_4bit,$rem,$rem_4bit
364392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	addl	$inp,$len,$len
365392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldo	L\$rem_4bit-L\$pic_ghash($rem_4bit),$rem_4bit
366392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldi	0xf0,$mask0xf0
367392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
368392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($SIZE_T==4);
369392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldi	31,$rem
370392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mtctl	$rem,%cr11
371392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	extrd,u,*= $rem,%sar,1,$rem	; executes on PA-RISC 1.0
372392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	b	L\$parisc1_ghash
373392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	nop
374392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
375392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
376392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
377392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldb	15($Xi),$nlo
378392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldo	8($Htbl),$Hll
379392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
380392aa7cc7d2b122614c5393c3e357da07fd07af3Brian CarlstromL\$outer_ghash_pa2
381392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldb	15($inp),$nhi
382392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$nhi,$nlo,$nlo
383392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	and	$mask0xf0,$nlo,$nhi
384392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	depd,z	$nlo,59,4,$nlo
385392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
386392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldd	$nlo($Hll),$Zll
387392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldd	$nlo($Hhh),$Zhh
388392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
389392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	depd,z	$Zll,60,4,$rem
390392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpd	$Zhh,$Zll,4,$Zll
391392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	extrd,u	$Zhh,59,60,$Zhh
392392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldb	14($Xi),$nlo
393392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldb	14($inp),$byte
394392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
395392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldd	$nhi($Hll),$Tll
396392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldd	$nhi($Hhh),$Thh
397392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$byte,$nlo,$nlo
398392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	and	$mask0xf0,$nlo,$nhi
399392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	depd,z	$nlo,59,4,$nlo
400392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
401392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Tll,$Zll,$Zll
402392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Thh,$Zhh,$Zhh
403392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldd	$rem($rem_4bit),$rem
404392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	b	L\$oop_ghash_pa2
405392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldi	13,$cnt
406392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
407392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.ALIGN	8
408392aa7cc7d2b122614c5393c3e357da07fd07af3Brian CarlstromL\$oop_ghash_pa2
409392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$rem,$Zhh,$Zhh		; moved here to work around gas bug
410392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	depd,z	$Zll,60,4,$rem2
411392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
412392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpd	$Zhh,$Zll,4,$Zll
413392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	extrd,u	$Zhh,59,60,$Zhh
414392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldd	$nlo($Hll),$Tll
415392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldd	$nlo($Hhh),$Thh
416392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
417392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Tll,$Zll,$Zll
418392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Thh,$Zhh,$Zhh
419392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldbx	$cnt($Xi),$nlo
420392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldbx	$cnt($inp),$byte
421392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
422392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	depd,z	$Zll,60,4,$rem
423392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpd	$Zhh,$Zll,4,$Zll
424392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldd	$rem2($rem_4bit),$rem2
425392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
426392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$rem2,$Zhh,$Zhh
427392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$byte,$nlo,$nlo
428392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldd	$nhi($Hll),$Tll
429392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldd	$nhi($Hhh),$Thh
430392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
431392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	and	$mask0xf0,$nlo,$nhi
432392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	depd,z	$nlo,59,4,$nlo
433392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
434392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	extrd,u	$Zhh,59,60,$Zhh
435392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Tll,$Zll,$Zll
436392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
437392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldd	$rem($rem_4bit),$rem
438392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	addib,uv -1,$cnt,L\$oop_ghash_pa2
439392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Thh,$Zhh,$Zhh
440392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
441392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$rem,$Zhh,$Zhh
442392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	depd,z	$Zll,60,4,$rem2
443392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
444392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpd	$Zhh,$Zll,4,$Zll
445392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	extrd,u	$Zhh,59,60,$Zhh
446392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldd	$nlo($Hll),$Tll
447392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldd	$nlo($Hhh),$Thh
448392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
449392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Tll,$Zll,$Zll
450392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Thh,$Zhh,$Zhh
451392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
452392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	depd,z	$Zll,60,4,$rem
453392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpd	$Zhh,$Zll,4,$Zll
454392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldd	$rem2($rem_4bit),$rem2
455392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
456392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$rem2,$Zhh,$Zhh
457392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldd	$nhi($Hll),$Tll
458392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldd	$nhi($Hhh),$Thh
459392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
460392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	extrd,u	$Zhh,59,60,$Zhh
461392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Tll,$Zll,$Zll
462392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Thh,$Zhh,$Zhh
463392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldd	$rem($rem_4bit),$rem
464392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
465392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$rem,$Zhh,$Zhh
466392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	std	$Zll,8($Xi)
467392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldo	16($inp),$inp
468392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	std	$Zhh,0($Xi)
469392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	cmpb,*<> $inp,$len,L\$outer_ghash_pa2
470392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	copy	$Zll,$nlo
471392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
472392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
473392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($SIZE_T==4);
474392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	b	L\$done_ghash
475392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	nop
476392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
477392aa7cc7d2b122614c5393c3e357da07fd07af3Brian CarlstromL\$parisc1_ghash
478392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldb	15($Xi),$nlo
479392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldo	12($Htbl),$Hll
480392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldo	8($Htbl),$Hlh
481392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldo	4($Htbl),$Hhl
482392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
483392aa7cc7d2b122614c5393c3e357da07fd07af3Brian CarlstromL\$outer_ghash_pa1
484392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldb	15($inp),$byte
485392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$byte,$nlo,$nlo
486392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	and	$mask0xf0,$nlo,$nhi
487392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	zdep	$nlo,27,4,$nlo
488392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
489392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nlo($Hll),$Zll
490392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nlo($Hlh),$Zlh
491392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nlo($Hhl),$Zhl
492392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nlo($Hhh),$Zhh
493392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	zdep	$Zll,28,4,$rem
494392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldb	14($Xi),$nlo
495392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldb	14($inp),$byte
496392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$rem($rem_4bit),$rem
497392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpw	$Zlh,$Zll,4,$Zll
498392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nhi($Hll),$Tll
499392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpw	$Zhl,$Zlh,4,$Zlh
500392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nhi($Hlh),$Tlh
501392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpw	$Zhh,$Zhl,4,$Zhl
502392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nhi($Hhl),$Thl
503392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	extru	$Zhh,27,28,$Zhh
504392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nhi($Hhh),$Thh
505392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$byte,$nlo,$nlo
506392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$rem,$Zhh,$Zhh
507392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	and	$mask0xf0,$nlo,$nhi
508392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	zdep	$nlo,27,4,$nlo
509392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
510392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Tll,$Zll,$Zll
511392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nlo($Hll),$Tll
512392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Tlh,$Zlh,$Zlh
513392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nlo($Hlh),$Tlh
514392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Thl,$Zhl,$Zhl
515392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	b	L\$oop_ghash_pa1
516392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldi	13,$cnt
517392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
518392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.ALIGN	8
519392aa7cc7d2b122614c5393c3e357da07fd07af3Brian CarlstromL\$oop_ghash_pa1
520392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	zdep	$Zll,28,4,$rem
521392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nlo($Hhl),$Thl
522392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Thh,$Zhh,$Zhh
523392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$rem($rem_4bit),$rem
524392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpw	$Zlh,$Zll,4,$Zll
525392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nlo($Hhh),$Thh
526392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpw	$Zhl,$Zlh,4,$Zlh
527392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldbx	$cnt($Xi),$nlo
528392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Tll,$Zll,$Zll
529392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nhi($Hll),$Tll
530392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpw	$Zhh,$Zhl,4,$Zhl
531392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldbx	$cnt($inp),$byte
532392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Tlh,$Zlh,$Zlh
533392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nhi($Hlh),$Tlh
534392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	extru	$Zhh,27,28,$Zhh
535392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Thl,$Zhl,$Zhl
536392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nhi($Hhl),$Thl
537392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$rem,$Zhh,$Zhh
538392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	zdep	$Zll,28,4,$rem
539392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Thh,$Zhh,$Zhh
540392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nhi($Hhh),$Thh
541392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpw	$Zlh,$Zll,4,$Zll
542392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$rem($rem_4bit),$rem
543392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpw	$Zhl,$Zlh,4,$Zlh
544392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$byte,$nlo,$nlo
545392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpw	$Zhh,$Zhl,4,$Zhl
546392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	and	$mask0xf0,$nlo,$nhi
547392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	extru	$Zhh,27,28,$Zhh
548392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	zdep	$nlo,27,4,$nlo
549392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Tll,$Zll,$Zll
550392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nlo($Hll),$Tll
551392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Tlh,$Zlh,$Zlh
552392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nlo($Hlh),$Tlh
553392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$rem,$Zhh,$Zhh
554392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	addib,uv -1,$cnt,L\$oop_ghash_pa1
555392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Thl,$Zhl,$Zhl
556392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
557392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	zdep	$Zll,28,4,$rem
558392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nlo($Hhl),$Thl
559392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Thh,$Zhh,$Zhh
560392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$rem($rem_4bit),$rem
561392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpw	$Zlh,$Zll,4,$Zll
562392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nlo($Hhh),$Thh
563392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpw	$Zhl,$Zlh,4,$Zlh
564392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Tll,$Zll,$Zll
565392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nhi($Hll),$Tll
566392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpw	$Zhh,$Zhl,4,$Zhl
567392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Tlh,$Zlh,$Zlh
568392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nhi($Hlh),$Tlh
569392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	extru	$Zhh,27,28,$Zhh
570392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$rem,$Zhh,$Zhh
571392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Thl,$Zhl,$Zhl
572392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nhi($Hhl),$Thl
573392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Thh,$Zhh,$Zhh
574392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$nhi($Hhh),$Thh
575392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	zdep	$Zll,28,4,$rem
576392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldwx	$rem($rem_4bit),$rem
577392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpw	$Zlh,$Zll,4,$Zll
578392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpw	$Zhl,$Zlh,4,$Zlh
579392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shrpw	$Zhh,$Zhl,4,$Zhl
580392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	extru	$Zhh,27,28,$Zhh
581392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Tll,$Zll,$Zll
582392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Tlh,$Zlh,$Zlh
583392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$rem,$Zhh,$Zhh
584392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stw	$Zll,12($Xi)
585392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Thl,$Zhl,$Zhl
586392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stw	$Zlh,8($Xi)
587392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	$Thh,$Zhh,$Zhh
588392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stw	$Zhl,4($Xi)
589392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ldo	16($inp),$inp
590392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stw	$Zhh,0($Xi)
591392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	comb,<>	$inp,$len,L\$outer_ghash_pa1
592392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	copy	$Zll,$nlo
593392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
594392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
595392aa7cc7d2b122614c5393c3e357da07fd07af3Brian CarlstromL\$done_ghash
596392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME-$SAVED_RP`(%sp),%r2		; standard epilogue
597392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME+1*$SIZE_T`(%sp),%r4
598392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME+2*$SIZE_T`(%sp),%r5
599392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME+3*$SIZE_T`(%sp),%r6
600392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
601392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($SIZE_T==4);
602392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME+4*$SIZE_T`(%sp),%r7
603392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME+5*$SIZE_T`(%sp),%r8
604392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME+6*$SIZE_T`(%sp),%r9
605392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME+7*$SIZE_T`(%sp),%r10
606392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POP	`-$FRAME+8*$SIZE_T`(%sp),%r11
607392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
608392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
609392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	bv	(%r2)
610392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.EXIT
611392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$POPMB	-$FRAME(%sp),%r3
612392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.PROCEND
613392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
614392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.ALIGN	64
615392aa7cc7d2b122614c5393c3e357da07fd07af3Brian CarlstromL\$rem_4bit
616392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	`0x0000<<16`,0,`0x1C20<<16`,0,`0x3840<<16`,0,`0x2460<<16`,0
617392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	`0x7080<<16`,0,`0x6CA0<<16`,0,`0x48C0<<16`,0,`0x54E0<<16`,0
618392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	`0xE100<<16`,0,`0xFD20<<16`,0,`0xD940<<16`,0,`0xC560<<16`,0
619392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.WORD	`0x9180<<16`,0,`0x8DA0<<16`,0,`0xA9C0<<16`,0,`0xB5E0<<16`,0
620392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.STRINGZ "GHASH for PA-RISC, GRYPTOGAMS by <appro\@openssl.org>"
621392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.ALIGN	64
622392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
623392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
624392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Explicitly encode PA-RISC 2.0 instructions used in this module, so
625392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# that it can be compiled with .LEVEL 1.0. It should be noted that I
626392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0
627392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# directive...
628392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
629392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy $ldd = sub {
630392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom  my ($mod,$args) = @_;
631392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom  my $orig = "ldd$mod\t$args";
632392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
633392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    if ($args =~ /%r([0-9]+)\(%r([0-9]+)\),%r([0-9]+)/)		# format 4
634392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    {	my $opcode=(0x03<<26)|($2<<21)|($1<<16)|(3<<6)|$3;
635392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
636392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    }
637392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    elsif ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/)	# format 5
638392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    {	my $opcode=(0x03<<26)|($2<<21)|(1<<12)|(3<<6)|$3;
639392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$opcode|=(($1&0xF)<<17)|(($1&0x10)<<12);		# encode offset
640392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$opcode|=(1<<5)  if ($mod =~ /^,m/);
641392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$opcode|=(1<<13) if ($mod =~ /^,mb/);
642392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
643392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    }
644392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    else { "\t".$orig; }
645392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom};
646392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
647392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy $std = sub {
648392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom  my ($mod,$args) = @_;
649392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom  my $orig = "std$mod\t$args";
650392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
651392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices
652392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    {	my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1);
653392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
654392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    }
655392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    else { "\t".$orig; }
656392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom};
657392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
658392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy $extrd = sub {
659392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom  my ($mod,$args) = @_;
660392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom  my $orig = "extrd$mod\t$args";
661392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
662392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    # I only have ",u" completer, it's implicitly encoded...
663392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/)	# format 15
664392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    {	my $opcode=(0x36<<26)|($1<<21)|($4<<16);
665392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	my $len=32-$3;
666392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5);		# encode pos
667392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$opcode |= (($len&0x20)<<7)|($len&0x1f);		# encode len
668392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
669392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    }
670392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/)	# format 12
671392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    {	my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9);
672392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	my $len=32-$2;
673392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$opcode |= (($len&0x20)<<3)|($len&0x1f);		# encode len
674392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$opcode |= (1<<13) if ($mod =~ /,\**=/);
675392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
676392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    }
677392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    else { "\t".$orig; }
678392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom};
679392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
680392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy $shrpd = sub {
681392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom  my ($mod,$args) = @_;
682392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom  my $orig = "shrpd$mod\t$args";
683392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
684392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/)	# format 14
685392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    {	my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4;
686392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	my $cpos=63-$3;
687392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5);		# encode sa
688392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
689392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    }
690392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/)	# format 11
691392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    {	sprintf "\t.WORD\t0x%08x\t; %s",
692392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom		(0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig;
693392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    }
694392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    else { "\t".$orig; }
695392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom};
696392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
697392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy $depd = sub {
698392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom  my ($mod,$args) = @_;
699392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom  my $orig = "depd$mod\t$args";
700392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
701392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    # I only have ",z" completer, it's impicitly encoded...
702392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/)	# format 16
703392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    {	my $opcode=(0x3c<<26)|($4<<21)|($1<<16);
704392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    	my $cpos=63-$2;
705392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	my $len=32-$3;
706392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5);		# encode pos
707392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$opcode |= (($len&0x20)<<7)|($len&0x1f);		# encode len
708392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
709392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    }
710392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    else { "\t".$orig; }
711392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom};
712392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
713392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromsub assemble {
714392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom  my ($mnemonic,$mod,$args)=@_;
715392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom  my $opcode = eval("\$$mnemonic");
716392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
717392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom    ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
718392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom}
719392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
720392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromforeach (split("\n",$code)) {
721392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	s/\`([^\`]*)\`/eval $1/ge;
722392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	if ($SIZE_T==4) {
723392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom		s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e;
724392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom		s/cmpb,\*/comb,/;
725392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom		s/,\*/,/;
726392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	}
727392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	print $_,"\n";
728392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom}
729392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
730392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromclose STDOUT;
731