1392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#!/usr/bin/env perl
2392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
3392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# ====================================================================
4392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# project. The module is, however, dual licensed under OpenSSL and
6392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# CRYPTOGAMS licenses depending on where you obtain it. For further
7392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# details see http://www.openssl.org/~appro/cryptogams/.
8392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# ====================================================================
9392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
10392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# September 2010.
11392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#
12392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# The module implements "4-bit" GCM GHASH function and underlying
13392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# single multiplication operation in GF(2^128). "4-bit" means that it
14392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# uses 256 bytes per-key table [+128 bytes shared table]. Performance
15392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# was measured to be ~18 cycles per processed byte on z10, which is
16392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# almost 40% better than gcc-generated code. It should be noted that
17392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 18 cycles is worse result than expected: loop is scheduled for 12
18392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# and the result should be close to 12. In the lack of instruction-
19392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# level profiling data it's impossible to tell why...
20392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
21392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# November 2010.
22392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#
23392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Adapt for -m31 build. If kernel supports what's called "highgprs"
24392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
25392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# instructions and achieve "64-bit" performance even in 31-bit legacy
26392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# application context. The feature is not specific to any particular
27392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# processor, as long as it's "z-CPU". Latter implies that the code
28392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# remains z/Architecture specific. On z990 it was measured to perform
29392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 2.8x better than 32-bit code generated by gcc 4.3.
30392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
31392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# March 2011.
32392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#
33392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Support for hardware KIMD-GHASH is verified to produce correct
34392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# result and therefore is engaged. On z196 it was measured to process
35392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 8KB buffer ~7 faster than software implementation. It's not as
36392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# impressive for smaller buffer sizes and for smallest 16-bytes buffer
37392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# it's actually almost 2 times slower. Which is the reason why
38392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# KIMD-GHASH is not used in gcm_gmult_4bit.
39392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
40392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$flavour = shift;
41392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
42392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromif ($flavour =~ /3[12]/) {
43392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$SIZE_T=4;
44392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$g="";
45392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom} else {
46392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$SIZE_T=8;
47392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	$g="g";
48392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom}
49392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
50392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromwhile (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
51392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromopen STDOUT,">$output";
52392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
53392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$softonly=0;
54392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
55392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$Zhi="%r0";
56392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$Zlo="%r1";
57392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
58392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$Xi="%r2";	# argument block
59392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$Htbl="%r3";
60392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$inp="%r4";
61392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$len="%r5";
62392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
63392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$rem0="%r6";	# variables
64392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$rem1="%r7";
65392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$nlo="%r8";
66392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$nhi="%r9";
67392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$xi="%r10";
68392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$cnt="%r11";
69392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$tmp="%r12";
70392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$x78="%r13";
71392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$rem_4bit="%r14";
72392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
73392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$sp="%r15";
74392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
75392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
76392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.text
77392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
78392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.globl	gcm_gmult_4bit
79392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align	32
80392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromgcm_gmult_4bit:
81392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
82392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if(!$softonly && 0);	# hardware is slow for single block...
83392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	larl	%r1,OPENSSL_s390xcap_P
84392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lg	%r0,0(%r1)
85392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	tmhl	%r0,0x4000	# check for message-security-assist
86392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	jz	.Lsoft_gmult
87392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lghi	%r0,0
88392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	la	%r1,16($sp)
89392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.long	0xb93e0004	# kimd %r0,%r4
90392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lg	%r1,24($sp)
91392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	tmhh	%r1,0x4000	# check for function 65
92392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	jz	.Lsoft_gmult
93392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stg	%r0,16($sp)	# arrange 16 bytes of zero input
94392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stg	%r0,24($sp)
95392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lghi	%r0,65		# function 65
96392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	la	%r1,0($Xi)	# H lies right after Xi in gcm128_context
97392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	la	$inp,16($sp)
98392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lghi	$len,16
99392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.long	0xb93e0004	# kimd %r0,$inp
100392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	brc	1,.-4		# pay attention to "partial completion"
101392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	br	%r14
102392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align	32
103392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lsoft_gmult:
104392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
105392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
106392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stm${g}	%r6,%r14,6*$SIZE_T($sp)
107392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
108392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	aghi	$Xi,-1
109392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lghi	$len,1
110392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lghi	$x78,`0xf<<3`
111392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	larl	$rem_4bit,rem_4bit
112392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
113392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lg	$Zlo,8+1($Xi)		# Xi
114392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	j	.Lgmult_shortcut
115392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type	gcm_gmult_4bit,\@function
116392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size	gcm_gmult_4bit,(.-gcm_gmult_4bit)
117392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
118392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.globl	gcm_ghash_4bit
119392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align	32
120392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromgcm_ghash_4bit:
121392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
122392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if(!$softonly);
123392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	larl	%r1,OPENSSL_s390xcap_P
124392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lg	%r0,0(%r1)
125392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	tmhl	%r0,0x4000	# check for message-security-assist
126392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	jz	.Lsoft_ghash
127392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lghi	%r0,0
128392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	la	%r1,16($sp)
129392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.long	0xb93e0004	# kimd %r0,%r4
130392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lg	%r1,24($sp)
131392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	tmhh	%r1,0x4000	# check for function 65
132392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	jz	.Lsoft_ghash
133392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lghi	%r0,65		# function 65
134392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	la	%r1,0($Xi)	# H lies right after Xi in gcm128_context
135392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.long	0xb93e0004	# kimd %r0,$inp
136392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	brc	1,.-4		# pay attention to "partial completion"
137392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	br	%r14
138392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align	32
139392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lsoft_ghash:
140392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
141a1a5710c055e139ea00e785f9eb55b3af3e4dab1Brian Carlstrom$code.=<<___ if ($flavour =~ /3[12]/);
142392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	llgfr	$len,$len
143392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
144392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
145392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stm${g}	%r6,%r14,6*$SIZE_T($sp)
146392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
147392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	aghi	$Xi,-1
148392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	srlg	$len,$len,4
149392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lghi	$x78,`0xf<<3`
150392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	larl	$rem_4bit,rem_4bit
151392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
152392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lg	$Zlo,8+1($Xi)		# Xi
153392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lg	$Zhi,0+1($Xi)
154392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lghi	$tmp,0
155392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Louter:
156392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xg	$Zhi,0($inp)		# Xi ^= inp
157392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xg	$Zlo,8($inp)
158392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xgr	$Zhi,$tmp
159392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stg	$Zlo,8+1($Xi)
160392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stg	$Zhi,0+1($Xi)
161392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
162392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lgmult_shortcut:
163392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lghi	$tmp,0xf0
164392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	sllg	$nlo,$Zlo,4
165392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	srlg	$xi,$Zlo,8		# extract second byte
166392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ngr	$nlo,$tmp
167392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lgr	$nhi,$Zlo
168392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lghi	$cnt,14
169392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ngr	$nhi,$tmp
170392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
171392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lg	$Zlo,8($nlo,$Htbl)
172392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lg	$Zhi,0($nlo,$Htbl)
173392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
174392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	sllg	$nlo,$xi,4
175392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	sllg	$rem0,$Zlo,3
176392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ngr	$nlo,$tmp
177392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ngr	$rem0,$x78
178392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ngr	$xi,$tmp
179392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
180392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	sllg	$tmp,$Zhi,60
181392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	srlg	$Zlo,$Zlo,4
182392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	srlg	$Zhi,$Zhi,4
183392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xg	$Zlo,8($nhi,$Htbl)
184392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xg	$Zhi,0($nhi,$Htbl)
185392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lgr	$nhi,$xi
186392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	sllg	$rem1,$Zlo,3
187392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xgr	$Zlo,$tmp
188392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ngr	$rem1,$x78
189392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	j	.Lghash_inner
190392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align	16
191392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lghash_inner:
192392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	srlg	$Zlo,$Zlo,4
193392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	sllg	$tmp,$Zhi,60
194392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xg	$Zlo,8($nlo,$Htbl)
195392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	srlg	$Zhi,$Zhi,4
196392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	llgc	$xi,0($cnt,$Xi)
197392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xg	$Zhi,0($nlo,$Htbl)
198392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	sllg	$nlo,$xi,4
199392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xg	$Zhi,0($rem0,$rem_4bit)
200392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	nill	$nlo,0xf0
201392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	sllg	$rem0,$Zlo,3
202392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xgr	$Zlo,$tmp
203392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ngr	$rem0,$x78
204392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	nill	$xi,0xf0
205392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
206392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	sllg	$tmp,$Zhi,60
207392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	srlg	$Zlo,$Zlo,4
208392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	srlg	$Zhi,$Zhi,4
209392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xg	$Zlo,8($nhi,$Htbl)
210392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xg	$Zhi,0($nhi,$Htbl)
211392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lgr	$nhi,$xi
212392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xg	$Zhi,0($rem1,$rem_4bit)
213392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	sllg	$rem1,$Zlo,3
214392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xgr	$Zlo,$tmp
215392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ngr	$rem1,$x78
216392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	brct	$cnt,.Lghash_inner
217392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
218392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	sllg	$tmp,$Zhi,60
219392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	srlg	$Zlo,$Zlo,4
220392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	srlg	$Zhi,$Zhi,4
221392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xg	$Zlo,8($nlo,$Htbl)
222392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xg	$Zhi,0($nlo,$Htbl)
223392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	sllg	$xi,$Zlo,3
224392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xg	$Zhi,0($rem0,$rem_4bit)
225392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xgr	$Zlo,$tmp
226392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ngr	$xi,$x78
227392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
228392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	sllg	$tmp,$Zhi,60
229392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	srlg	$Zlo,$Zlo,4
230392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	srlg	$Zhi,$Zhi,4
231392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xg	$Zlo,8($nhi,$Htbl)
232392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xg	$Zhi,0($nhi,$Htbl)
233392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xgr	$Zlo,$tmp
234392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xg	$Zhi,0($rem1,$rem_4bit)
235392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
236392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lg	$tmp,0($xi,$rem_4bit)
237392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	la	$inp,16($inp)
238392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	sllg	$tmp,$tmp,4		# correct last rem_4bit[rem]
239392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	brctg	$len,.Louter
240392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
241392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xgr	$Zhi,$tmp
242392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stg	$Zlo,8+1($Xi)
243392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	stg	$Zhi,0+1($Xi)
244392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lm${g}	%r6,%r14,6*$SIZE_T($sp)
245392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	br	%r14
246392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type	gcm_ghash_4bit,\@function
247392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size	gcm_ghash_4bit,(.-gcm_ghash_4bit)
248392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
249392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align	64
250392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromrem_4bit:
251392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.long	`0x0000<<12`,0,`0x1C20<<12`,0,`0x3840<<12`,0,`0x2460<<12`,0
252392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.long	`0x7080<<12`,0,`0x6CA0<<12`,0,`0x48C0<<12`,0,`0x54E0<<12`,0
253392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.long	`0xE100<<12`,0,`0xFD20<<12`,0,`0xD940<<12`,0,`0xC560<<12`,0
254392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.long	`0x9180<<12`,0,`0x8DA0<<12`,0,`0xA9C0<<12`,0,`0xB5E0<<12`,0
255392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type	rem_4bit,\@object
256392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size	rem_4bit,(.-rem_4bit)
257392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.string	"GHASH for s390x, CRYPTOGAMS by <appro\@openssl.org>"
258392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
259392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
260392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code =~ s/\`([^\`]*)\`/eval $1/gem;
261392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromprint $code;
262392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromclose STDOUT;
263