1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9#
10# February 2009
11#
12# Performance is 2x of gcc 3.4.6 on z10. Coding "secret" is to
13# "cluster" Address Generation Interlocks, so that one pipeline stall
14# resolves several dependencies.
15
16$rp="%r14";
17$sp="%r15";
18$code=<<___;
19.text
20
21___
22
23# void RC4(RC4_KEY *key,size_t len,const void *inp,void *out)
24{
25$acc="%r0";
26$cnt="%r1";
27$key="%r2";
28$len="%r3";
29$inp="%r4";
30$out="%r5";
31
32@XX=("%r6","%r7");
33@TX=("%r8","%r9");
34$YY="%r10";
35$TY="%r11";
36
37$code.=<<___;
38.globl	RC4
39.type	RC4,\@function
40.align	64
41RC4:
42	stmg	%r6,%r11,48($sp)
43	llgc	$XX[0],0($key)
44	llgc	$YY,1($key)
45	la	$XX[0],1($XX[0])
46	nill	$XX[0],0xff
47	srlg	$cnt,$len,3
48	ltgr	$cnt,$cnt
49	llgc	$TX[0],2($XX[0],$key)
50	jz	.Lshort
51	j	.Loop8
52
53.align	64
54.Loop8:
55___
56for ($i=0;$i<8;$i++) {
57$code.=<<___;
58	la	$YY,0($YY,$TX[0])	# $i
59	nill	$YY,255
60	la	$XX[1],1($XX[0])
61	nill	$XX[1],255
62___
63$code.=<<___ if ($i==1);
64	llgc	$acc,2($TY,$key)
65___
66$code.=<<___ if ($i>1);
67	sllg	$acc,$acc,8
68	ic	$acc,2($TY,$key)
69___
70$code.=<<___;
71	llgc	$TY,2($YY,$key)
72	stc	$TX[0],2($YY,$key)
73	llgc	$TX[1],2($XX[1],$key)
74	stc	$TY,2($XX[0],$key)
75	cr	$XX[1],$YY
76	jne	.Lcmov$i
77	la	$TX[1],0($TX[0])
78.Lcmov$i:
79	la	$TY,0($TY,$TX[0])
80	nill	$TY,255
81___
82push(@TX,shift(@TX)); push(@XX,shift(@XX));     # "rotate" registers
83}
84
85$code.=<<___;
86	lg	$TX[1],0($inp)
87	sllg	$acc,$acc,8
88	la	$inp,8($inp)
89	ic	$acc,2($TY,$key)
90	xgr	$acc,$TX[1]
91	stg	$acc,0($out)
92	la	$out,8($out)
93	brct	$cnt,.Loop8
94
95.Lshort:
96	lghi	$acc,7
97	ngr	$len,$acc
98	jz	.Lexit
99	j	.Loop1
100
101.align	16
102.Loop1:
103	la	$YY,0($YY,$TX[0])
104	nill	$YY,255
105	llgc	$TY,2($YY,$key)
106	stc	$TX[0],2($YY,$key)
107	stc	$TY,2($XX[0],$key)
108	ar	$TY,$TX[0]
109	ahi	$XX[0],1
110	nill	$TY,255
111	nill	$XX[0],255
112	llgc	$acc,0($inp)
113	la	$inp,1($inp)
114	llgc	$TY,2($TY,$key)
115	llgc	$TX[0],2($XX[0],$key)
116	xr	$acc,$TY
117	stc	$acc,0($out)
118	la	$out,1($out)
119	brct	$len,.Loop1
120
121.Lexit:
122	ahi	$XX[0],-1
123	stc	$XX[0],0($key)
124	stc	$YY,1($key)
125	lmg	%r6,%r11,48($sp)
126	br	$rp
127.size	RC4,.-RC4
128.string	"RC4 for s390x, CRYPTOGAMS by <appro\@openssl.org>"
129
130___
131}
132
133# void RC4_set_key(RC4_KEY *key,unsigned int len,const void *inp)
134{
135$cnt="%r0";
136$idx="%r1";
137$key="%r2";
138$len="%r3";
139$inp="%r4";
140$acc="%r5";
141$dat="%r6";
142$ikey="%r7";
143$iinp="%r8";
144
145$code.=<<___;
146.globl	RC4_set_key
147.type	RC4_set_key,\@function
148.align	64
149RC4_set_key:
150	stmg	%r6,%r8,48($sp)
151	lhi	$cnt,256
152	la	$idx,0(%r0)
153	sth	$idx,0($key)
154.align	4
155.L1stloop:
156	stc	$idx,2($idx,$key)
157	la	$idx,1($idx)
158	brct	$cnt,.L1stloop
159
160	lghi	$ikey,-256
161	lr	$cnt,$len
162	la	$iinp,0(%r0)
163	la	$idx,0(%r0)
164.align	16
165.L2ndloop:
166	llgc	$acc,2+256($ikey,$key)
167	llgc	$dat,0($iinp,$inp)
168	la	$idx,0($idx,$acc)
169	la	$ikey,1($ikey)
170	la	$idx,0($idx,$dat)
171	nill	$idx,255
172	la	$iinp,1($iinp)
173	tml	$ikey,255
174	llgc	$dat,2($idx,$key)
175	stc	$dat,2+256-1($ikey,$key)
176	stc	$acc,2($idx,$key)
177	jz	.Ldone
178	brct	$cnt,.L2ndloop
179	lr	$cnt,$len
180	la	$iinp,0(%r0)
181	j	.L2ndloop
182.Ldone:
183	lmg	%r6,%r8,48($sp)
184	br	$rp
185.size	RC4_set_key,.-RC4_set_key
186
187___
188}
189
190# const char *RC4_options()
191$code.=<<___;
192.globl	RC4_options
193.type	RC4_options,\@function
194.align	16
195RC4_options:
196	larl	%r2,.Loptions
197	br	%r14
198.size	RC4_options,.-RC4_options
199.section	.rodata
200.Loptions:
201.align	8
202.string	"rc4(8x,char)"
203___
204
205print $code;
206