1#!/usr/bin/env perl 2# 3# ==================================================================== 4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL 5# project. The module is, however, dual licensed under OpenSSL and 6# CRYPTOGAMS licenses depending on where you obtain it. For further 7# details see http://www.openssl.org/~appro/cryptogams/. 8# ==================================================================== 9# 10# February 2009 11# 12# Performance is 2x of gcc 3.4.6 on z10. Coding "secret" is to 13# "cluster" Address Generation Interlocks, so that one pipeline stall 14# resolves several dependencies. 15 16$rp="%r14"; 17$sp="%r15"; 18$code=<<___; 19.text 20 21___ 22 23# void RC4(RC4_KEY *key,size_t len,const void *inp,void *out) 24{ 25$acc="%r0"; 26$cnt="%r1"; 27$key="%r2"; 28$len="%r3"; 29$inp="%r4"; 30$out="%r5"; 31 32@XX=("%r6","%r7"); 33@TX=("%r8","%r9"); 34$YY="%r10"; 35$TY="%r11"; 36 37$code.=<<___; 38.globl RC4 39.type RC4,\@function 40.align 64 41RC4: 42 stmg %r6,%r11,48($sp) 43 llgc $XX[0],0($key) 44 llgc $YY,1($key) 45 la $XX[0],1($XX[0]) 46 nill $XX[0],0xff 47 srlg $cnt,$len,3 48 ltgr $cnt,$cnt 49 llgc $TX[0],2($XX[0],$key) 50 jz .Lshort 51 j .Loop8 52 53.align 64 54.Loop8: 55___ 56for ($i=0;$i<8;$i++) { 57$code.=<<___; 58 la $YY,0($YY,$TX[0]) # $i 59 nill $YY,255 60 la $XX[1],1($XX[0]) 61 nill $XX[1],255 62___ 63$code.=<<___ if ($i==1); 64 llgc $acc,2($TY,$key) 65___ 66$code.=<<___ if ($i>1); 67 sllg $acc,$acc,8 68 ic $acc,2($TY,$key) 69___ 70$code.=<<___; 71 llgc $TY,2($YY,$key) 72 stc $TX[0],2($YY,$key) 73 llgc $TX[1],2($XX[1],$key) 74 stc $TY,2($XX[0],$key) 75 cr $XX[1],$YY 76 jne .Lcmov$i 77 la $TX[1],0($TX[0]) 78.Lcmov$i: 79 la $TY,0($TY,$TX[0]) 80 nill $TY,255 81___ 82push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers 83} 84 85$code.=<<___; 86 lg $TX[1],0($inp) 87 sllg $acc,$acc,8 88 la $inp,8($inp) 89 ic $acc,2($TY,$key) 90 xgr $acc,$TX[1] 91 stg $acc,0($out) 92 la $out,8($out) 93 brct $cnt,.Loop8 94 95.Lshort: 96 lghi $acc,7 97 ngr $len,$acc 98 jz .Lexit 99 j .Loop1 100 101.align 16 102.Loop1: 103 la $YY,0($YY,$TX[0]) 104 nill $YY,255 105 llgc $TY,2($YY,$key) 106 stc $TX[0],2($YY,$key) 107 stc $TY,2($XX[0],$key) 108 ar $TY,$TX[0] 109 ahi $XX[0],1 110 nill $TY,255 111 nill $XX[0],255 112 llgc $acc,0($inp) 113 la $inp,1($inp) 114 llgc $TY,2($TY,$key) 115 llgc $TX[0],2($XX[0],$key) 116 xr $acc,$TY 117 stc $acc,0($out) 118 la $out,1($out) 119 brct $len,.Loop1 120 121.Lexit: 122 ahi $XX[0],-1 123 stc $XX[0],0($key) 124 stc $YY,1($key) 125 lmg %r6,%r11,48($sp) 126 br $rp 127.size RC4,.-RC4 128.string "RC4 for s390x, CRYPTOGAMS by <appro\@openssl.org>" 129 130___ 131} 132 133# void RC4_set_key(RC4_KEY *key,unsigned int len,const void *inp) 134{ 135$cnt="%r0"; 136$idx="%r1"; 137$key="%r2"; 138$len="%r3"; 139$inp="%r4"; 140$acc="%r5"; 141$dat="%r6"; 142$ikey="%r7"; 143$iinp="%r8"; 144 145$code.=<<___; 146.globl RC4_set_key 147.type RC4_set_key,\@function 148.align 64 149RC4_set_key: 150 stmg %r6,%r8,48($sp) 151 lhi $cnt,256 152 la $idx,0(%r0) 153 sth $idx,0($key) 154.align 4 155.L1stloop: 156 stc $idx,2($idx,$key) 157 la $idx,1($idx) 158 brct $cnt,.L1stloop 159 160 lghi $ikey,-256 161 lr $cnt,$len 162 la $iinp,0(%r0) 163 la $idx,0(%r0) 164.align 16 165.L2ndloop: 166 llgc $acc,2+256($ikey,$key) 167 llgc $dat,0($iinp,$inp) 168 la $idx,0($idx,$acc) 169 la $ikey,1($ikey) 170 la $idx,0($idx,$dat) 171 nill $idx,255 172 la $iinp,1($iinp) 173 tml $ikey,255 174 llgc $dat,2($idx,$key) 175 stc $dat,2+256-1($ikey,$key) 176 stc $acc,2($idx,$key) 177 jz .Ldone 178 brct $cnt,.L2ndloop 179 lr $cnt,$len 180 la $iinp,0(%r0) 181 j .L2ndloop 182.Ldone: 183 lmg %r6,%r8,48($sp) 184 br $rp 185.size RC4_set_key,.-RC4_set_key 186 187___ 188} 189 190# const char *RC4_options() 191$code.=<<___; 192.globl RC4_options 193.type RC4_options,\@function 194.align 16 195RC4_options: 196 larl %r2,.Loptions 197 br %r14 198.size RC4_options,.-RC4_options 199.section .rodata 200.Loptions: 201.align 8 202.string "rc4(8x,char)" 203___ 204 205print $code; 206