1392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#!/usr/bin/env perl 2392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 3392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# ==================================================================== 4392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 5392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# project. The module is, however, dual licensed under OpenSSL and 6392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# CRYPTOGAMS licenses depending on where you obtain it. For further 7392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# details see http://www.openssl.org/~appro/cryptogams/. 8392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# ==================================================================== 9392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 10392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# September 2010. 11392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 12392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# The module implements "4-bit" GCM GHASH function and underlying 13392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# single multiplication operation in GF(2^128). "4-bit" means that it 14392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# uses 256 bytes per-key table [+128 bytes shared table]. Performance 15392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# was measured to be ~18 cycles per processed byte on z10, which is 16392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# almost 40% better than gcc-generated code. It should be noted that 17392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 18 cycles is worse result than expected: loop is scheduled for 12 18392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# and the result should be close to 12. In the lack of instruction- 19392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# level profiling data it's impossible to tell why... 20392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 21392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# November 2010. 22392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 23392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Adapt for -m31 build. If kernel supports what's called "highgprs" 24392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit 25392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# instructions and achieve "64-bit" performance even in 31-bit legacy 26392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# application context. The feature is not specific to any particular 27392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# processor, as long as it's "z-CPU". Latter implies that the code 28392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# remains z/Architecture specific. On z990 it was measured to perform 29392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 2.8x better than 32-bit code generated by gcc 4.3. 30392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 31392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# March 2011. 32392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 33392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Support for hardware KIMD-GHASH is verified to produce correct 34392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# result and therefore is engaged. On z196 it was measured to process 35392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 8KB buffer ~7 faster than software implementation. It's not as 36392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# impressive for smaller buffer sizes and for smallest 16-bytes buffer 37392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# it's actually almost 2 times slower. Which is the reason why 38392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# KIMD-GHASH is not used in gcm_gmult_4bit. 39392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 40392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$flavour = shift; 41392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 42392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromif ($flavour =~ /3[12]/) { 43392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $SIZE_T=4; 44392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $g=""; 45392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom} else { 46392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $SIZE_T=8; 47392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $g="g"; 48392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom} 49392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 50392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromwhile (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} 51392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromopen STDOUT,">$output"; 52392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 53392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$softonly=0; 54392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 55392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$Zhi="%r0"; 56392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$Zlo="%r1"; 57392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 58392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$Xi="%r2"; # argument block 59392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$Htbl="%r3"; 60392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$inp="%r4"; 61392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$len="%r5"; 62392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 63392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$rem0="%r6"; # variables 64392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$rem1="%r7"; 65392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$nlo="%r8"; 66392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$nhi="%r9"; 67392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$xi="%r10"; 68392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$cnt="%r11"; 69392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$tmp="%r12"; 70392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$x78="%r13"; 71392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$rem_4bit="%r14"; 72392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 73392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$sp="%r15"; 74392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 75392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 76392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.text 77392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 78392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.globl gcm_gmult_4bit 79392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 32 80392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromgcm_gmult_4bit: 81392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 82392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if(!$softonly && 0); # hardware is slow for single block... 83392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom larl %r1,OPENSSL_s390xcap_P 84392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lg %r0,0(%r1) 85392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom tmhl %r0,0x4000 # check for message-security-assist 86392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jz .Lsoft_gmult 87392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lghi %r0,0 88392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom la %r1,16($sp) 89392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .long 0xb93e0004 # kimd %r0,%r4 90392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lg %r1,24($sp) 91392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom tmhh %r1,0x4000 # check for function 65 92392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jz .Lsoft_gmult 93392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom stg %r0,16($sp) # arrange 16 bytes of zero input 94392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom stg %r0,24($sp) 95392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lghi %r0,65 # function 65 96392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom la %r1,0($Xi) # H lies right after Xi in gcm128_context 97392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom la $inp,16($sp) 98392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lghi $len,16 99392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .long 0xb93e0004 # kimd %r0,$inp 100392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom brc 1,.-4 # pay attention to "partial completion" 101392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom br %r14 102392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 32 103392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lsoft_gmult: 104392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 105392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 106392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom stm${g} %r6,%r14,6*$SIZE_T($sp) 107392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 108392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aghi $Xi,-1 109392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lghi $len,1 110392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lghi $x78,`0xf<<3` 111392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom larl $rem_4bit,rem_4bit 112392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 113392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lg $Zlo,8+1($Xi) # Xi 114392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom j .Lgmult_shortcut 115392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type gcm_gmult_4bit,\@function 116392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size gcm_gmult_4bit,(.-gcm_gmult_4bit) 117392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 118392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.globl gcm_ghash_4bit 119392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 32 120392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromgcm_ghash_4bit: 121392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 122392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if(!$softonly); 123392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom larl %r1,OPENSSL_s390xcap_P 124392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lg %r0,0(%r1) 125392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom tmhl %r0,0x4000 # check for message-security-assist 126392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jz .Lsoft_ghash 127392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lghi %r0,0 128392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom la %r1,16($sp) 129392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .long 0xb93e0004 # kimd %r0,%r4 130392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lg %r1,24($sp) 131392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom tmhh %r1,0x4000 # check for function 65 132392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jz .Lsoft_ghash 133392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lghi %r0,65 # function 65 134392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom la %r1,0($Xi) # H lies right after Xi in gcm128_context 135392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .long 0xb93e0004 # kimd %r0,$inp 136392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom brc 1,.-4 # pay attention to "partial completion" 137392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom br %r14 138392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 32 139392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lsoft_ghash: 140392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 141a1a5710c055e139ea00e785f9eb55b3af3e4dab1Brian Carlstrom$code.=<<___ if ($flavour =~ /3[12]/); 142392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom llgfr $len,$len 143392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 144392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 145392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom stm${g} %r6,%r14,6*$SIZE_T($sp) 146392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 147392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aghi $Xi,-1 148392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom srlg $len,$len,4 149392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lghi $x78,`0xf<<3` 150392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom larl $rem_4bit,rem_4bit 151392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 152392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lg $Zlo,8+1($Xi) # Xi 153392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lg $Zhi,0+1($Xi) 154392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lghi $tmp,0 155392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Louter: 156392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xg $Zhi,0($inp) # Xi ^= inp 157392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xg $Zlo,8($inp) 158392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xgr $Zhi,$tmp 159392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom stg $Zlo,8+1($Xi) 160392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom stg $Zhi,0+1($Xi) 161392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 162392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lgmult_shortcut: 163392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lghi $tmp,0xf0 164392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sllg $nlo,$Zlo,4 165392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom srlg $xi,$Zlo,8 # extract second byte 166392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ngr $nlo,$tmp 167392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lgr $nhi,$Zlo 168392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lghi $cnt,14 169392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ngr $nhi,$tmp 170392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 171392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lg $Zlo,8($nlo,$Htbl) 172392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lg $Zhi,0($nlo,$Htbl) 173392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 174392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sllg $nlo,$xi,4 175392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sllg $rem0,$Zlo,3 176392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ngr $nlo,$tmp 177392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ngr $rem0,$x78 178392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ngr $xi,$tmp 179392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 180392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sllg $tmp,$Zhi,60 181392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom srlg $Zlo,$Zlo,4 182392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom srlg $Zhi,$Zhi,4 183392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xg $Zlo,8($nhi,$Htbl) 184392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xg $Zhi,0($nhi,$Htbl) 185392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lgr $nhi,$xi 186392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sllg $rem1,$Zlo,3 187392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xgr $Zlo,$tmp 188392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ngr $rem1,$x78 189392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom j .Lghash_inner 190392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 191392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lghash_inner: 192392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom srlg $Zlo,$Zlo,4 193392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sllg $tmp,$Zhi,60 194392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xg $Zlo,8($nlo,$Htbl) 195392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom srlg $Zhi,$Zhi,4 196392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom llgc $xi,0($cnt,$Xi) 197392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xg $Zhi,0($nlo,$Htbl) 198392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sllg $nlo,$xi,4 199392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xg $Zhi,0($rem0,$rem_4bit) 200392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom nill $nlo,0xf0 201392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sllg $rem0,$Zlo,3 202392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xgr $Zlo,$tmp 203392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ngr $rem0,$x78 204392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom nill $xi,0xf0 205392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 206392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sllg $tmp,$Zhi,60 207392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom srlg $Zlo,$Zlo,4 208392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom srlg $Zhi,$Zhi,4 209392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xg $Zlo,8($nhi,$Htbl) 210392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xg $Zhi,0($nhi,$Htbl) 211392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lgr $nhi,$xi 212392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xg $Zhi,0($rem1,$rem_4bit) 213392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sllg $rem1,$Zlo,3 214392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xgr $Zlo,$tmp 215392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ngr $rem1,$x78 216392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom brct $cnt,.Lghash_inner 217392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 218392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sllg $tmp,$Zhi,60 219392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom srlg $Zlo,$Zlo,4 220392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom srlg $Zhi,$Zhi,4 221392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xg $Zlo,8($nlo,$Htbl) 222392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xg $Zhi,0($nlo,$Htbl) 223392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sllg $xi,$Zlo,3 224392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xg $Zhi,0($rem0,$rem_4bit) 225392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xgr $Zlo,$tmp 226392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ngr $xi,$x78 227392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 228392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sllg $tmp,$Zhi,60 229392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom srlg $Zlo,$Zlo,4 230392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom srlg $Zhi,$Zhi,4 231392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xg $Zlo,8($nhi,$Htbl) 232392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xg $Zhi,0($nhi,$Htbl) 233392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xgr $Zlo,$tmp 234392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xg $Zhi,0($rem1,$rem_4bit) 235392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 236392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lg $tmp,0($xi,$rem_4bit) 237392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom la $inp,16($inp) 238392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sllg $tmp,$tmp,4 # correct last rem_4bit[rem] 239392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom brctg $len,.Louter 240392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 241392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xgr $Zhi,$tmp 242392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom stg $Zlo,8+1($Xi) 243392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom stg $Zhi,0+1($Xi) 244392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lm${g} %r6,%r14,6*$SIZE_T($sp) 245392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom br %r14 246392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type gcm_ghash_4bit,\@function 247392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size gcm_ghash_4bit,(.-gcm_ghash_4bit) 248392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 249392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 64 250392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromrem_4bit: 251392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .long `0x0000<<12`,0,`0x1C20<<12`,0,`0x3840<<12`,0,`0x2460<<12`,0 252392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .long `0x7080<<12`,0,`0x6CA0<<12`,0,`0x48C0<<12`,0,`0x54E0<<12`,0 253392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .long `0xE100<<12`,0,`0xFD20<<12`,0,`0xD940<<12`,0,`0xC560<<12`,0 254392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .long `0x9180<<12`,0,`0x8DA0<<12`,0,`0xA9C0<<12`,0,`0xB5E0<<12`,0 255392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type rem_4bit,\@object 256392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size rem_4bit,(.-rem_4bit) 257392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.string "GHASH for s390x, CRYPTOGAMS by <appro\@openssl.org>" 258392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 259392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 260392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code =~ s/\`([^\`]*)\`/eval $1/gem; 261392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromprint $code; 262392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromclose STDOUT; 263