1392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#!/usr/bin/env perl 2392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 3392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# ==================================================================== 4392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 5392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# project. The module is, however, dual licensed under OpenSSL and 6392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# CRYPTOGAMS licenses depending on where you obtain it. For further 7392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# details see http://www.openssl.org/~appro/cryptogams/. 8392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# ==================================================================== 9392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 10392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# March, June 2010 11392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 12392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# The module implements "4-bit" GCM GHASH function and underlying 13392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# single multiplication operation in GF(2^128). "4-bit" means that 14392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# it uses 256 bytes per-key table [+128 bytes shared table]. GHASH 15392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# function features so called "528B" variant utilizing additional 16392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 256+16 bytes of per-key storage [+512 bytes shared table]. 17392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Performance results are for this streamed GHASH subroutine and are 18392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# expressed in cycles per processed byte, less is better: 19392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 20392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# gcc 3.4.x(*) assembler 21392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 22392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# P4 28.6 14.0 +100% 23392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Opteron 19.3 7.7 +150% 24392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Core2 17.8 8.1(**) +120% 25392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 26392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# (*) comparison is not completely fair, because C results are 27392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# for vanilla "256B" implementation, while assembler results 28392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# are for "528B";-) 29392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# (**) it's mystery [to me] why Core2 result is not same as for 30392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Opteron; 31392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 32392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# May 2010 33392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 34392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Add PCLMULQDQ version performing at 2.02 cycles per processed byte. 35392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# See ghash-x86.pl for background information and details about coding 36392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# techniques. 37392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 38392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Special thanks to David Woodhouse <dwmw2@infradead.org> for 39392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# providing access to a Westmere-based system on behalf of Intel 40392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Open Source Technology Centre. 41392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 42392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$flavour = shift; 43392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$output = shift; 44392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromif ($flavour =~ /\./) { $output = $flavour; undef $flavour; } 45392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 46392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); 47392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 48392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 49392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or 50392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or 51392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromdie "can't locate x86_64-xlate.pl"; 52392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 53392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromopen STDOUT,"| $^X $xlate $flavour $output"; 54392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 55392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# common register layout 56392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$nlo="%rax"; 57392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$nhi="%rbx"; 58392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$Zlo="%r8"; 59392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$Zhi="%r9"; 60392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$tmp="%r10"; 61392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$rem_4bit = "%r11"; 62392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 63392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$Xi="%rdi"; 64392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$Htbl="%rsi"; 65392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 66392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# per-function register layout 67392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$cnt="%rcx"; 68392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$rem="%rdx"; 69392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 70392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromsub LB() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1l/ or 71392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $r =~ s/%[er]([sd]i)/%\1l/ or 72392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $r =~ s/%[er](bp)/%\1l/ or 73392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $r =~ s/%(r[0-9]+)[d]?/%\1b/; $r; } 74392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 75392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromsub AUTOLOAD() # thunk [simplified] 32-bit style perlasm 76392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; 77392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom my $arg = pop; 78392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $arg = "\$$arg" if ($arg*1 eq $arg); 79392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $code .= "\t$opcode\t".join(',',$arg,reverse @_)."\n"; 80392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom} 81392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 82392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom{ my $N; 83392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub loop() { 84392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom my $inp = shift; 85392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 86392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $N++; 87392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 88392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xor $nlo,$nlo 89392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xor $nhi,$nhi 90392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov `&LB("$Zlo")`,`&LB("$nlo")` 91392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov `&LB("$Zlo")`,`&LB("$nhi")` 92392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shl \$4,`&LB("$nlo")` 93392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov \$14,$cnt 94392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 8($Htbl,$nlo),$Zlo 95392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov ($Htbl,$nlo),$Zhi 96392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom and \$0xf0,`&LB("$nhi")` 97392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $Zlo,$rem 98392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Loop$N 99392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 100392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 101392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Loop$N: 102392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shr \$4,$Zlo 103392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom and \$0xf,$rem 104392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $Zhi,$tmp 105392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov ($inp,$cnt),`&LB("$nlo")` 106392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shr \$4,$Zhi 107392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xor 8($Htbl,$nhi),$Zlo 108392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shl \$60,$tmp 109392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xor ($Htbl,$nhi),$Zhi 110392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov `&LB("$nlo")`,`&LB("$nhi")` 111392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xor ($rem_4bit,$rem,8),$Zhi 112392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $Zlo,$rem 113392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shl \$4,`&LB("$nlo")` 114392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xor $tmp,$Zlo 115392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom dec $cnt 116392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom js .Lbreak$N 117392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 118392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shr \$4,$Zlo 119392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom and \$0xf,$rem 120392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $Zhi,$tmp 121392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shr \$4,$Zhi 122392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xor 8($Htbl,$nlo),$Zlo 123392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shl \$60,$tmp 124392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xor ($Htbl,$nlo),$Zhi 125392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom and \$0xf0,`&LB("$nhi")` 126392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xor ($rem_4bit,$rem,8),$Zhi 127392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $Zlo,$rem 128392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xor $tmp,$Zlo 129392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Loop$N 130392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 131392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 132392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lbreak$N: 133392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shr \$4,$Zlo 134392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom and \$0xf,$rem 135392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $Zhi,$tmp 136392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shr \$4,$Zhi 137392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xor 8($Htbl,$nlo),$Zlo 138392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shl \$60,$tmp 139392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xor ($Htbl,$nlo),$Zhi 140392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom and \$0xf0,`&LB("$nhi")` 141392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xor ($rem_4bit,$rem,8),$Zhi 142392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $Zlo,$rem 143392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xor $tmp,$Zlo 144392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 145392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shr \$4,$Zlo 146392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom and \$0xf,$rem 147392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $Zhi,$tmp 148392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shr \$4,$Zhi 149392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xor 8($Htbl,$nhi),$Zlo 150392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shl \$60,$tmp 151392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xor ($Htbl,$nhi),$Zhi 152392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xor $tmp,$Zlo 153392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xor ($rem_4bit,$rem,8),$Zhi 154392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 155392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom bswap $Zlo 156392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom bswap $Zhi 157392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 158392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom}} 159392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 160392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code=<<___; 161392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.text 162392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 163392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.globl gcm_gmult_4bit 164392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type gcm_gmult_4bit,\@function,2 165392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 166392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromgcm_gmult_4bit: 167392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %rbx 168392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %rbp # %rbp and %r12 are pushed exclusively in 169392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %r12 # order to reuse Win64 exception handler... 170392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lgmult_prologue: 171392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 172392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movzb 15($Xi),$Zlo 173392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea .Lrem_4bit(%rip),$rem_4bit 174392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 175392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &loop ($Xi); 176392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 177392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $Zlo,8($Xi) 178392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $Zhi,($Xi) 179392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 180392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 16(%rsp),%rbx 181392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 24(%rsp),%rsp 182392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lgmult_epilogue: 183392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 184392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size gcm_gmult_4bit,.-gcm_gmult_4bit 185392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 186392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 187392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# per-function register layout 188392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$inp="%rdx"; 189392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$len="%rcx"; 190392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$rem_8bit=$rem_4bit; 191392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 192392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 193392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.globl gcm_ghash_4bit 194392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type gcm_ghash_4bit,\@function,4 195392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 196392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromgcm_ghash_4bit: 197392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %rbx 198392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %rbp 199392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %r12 200392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %r13 201392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %r14 202392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %r15 203392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$280,%rsp 204392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lghash_prologue: 205392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $inp,%r14 # reassign couple of args 206392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $len,%r15 207392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 208392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom{ my $inp="%r14"; 209392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom my $dat="%edx"; 210392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom my $len="%r15"; 211392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom my @nhi=("%ebx","%ecx"); 212392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom my @rem=("%r12","%r13"); 213392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom my $Hshr4="%rbp"; 214392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 215392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &sub ($Htbl,-128); # size optimization 216392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &lea ($Hshr4,"16+128(%rsp)"); 217392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom { my @lo =($nlo,$nhi); 218392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom my @hi =($Zlo,$Zhi); 219392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 220392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &xor ($dat,$dat); 221392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom for ($i=0,$j=-2;$i<18;$i++,$j++) { 222392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &mov ("$j(%rsp)",&LB($dat)) if ($i>1); 223392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &or ($lo[0],$tmp) if ($i>1); 224392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &mov (&LB($dat),&LB($lo[1])) if ($i>0 && $i<17); 225392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &shr ($lo[1],4) if ($i>0 && $i<17); 226392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &mov ($tmp,$hi[1]) if ($i>0 && $i<17); 227392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &shr ($hi[1],4) if ($i>0 && $i<17); 228392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &mov ("8*$j($Hshr4)",$hi[0]) if ($i>1); 229392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &mov ($hi[0],"16*$i+0-128($Htbl)") if ($i<16); 230392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &shl (&LB($dat),4) if ($i>0 && $i<17); 231392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &mov ("8*$j-128($Hshr4)",$lo[0]) if ($i>1); 232392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &mov ($lo[0],"16*$i+8-128($Htbl)") if ($i<16); 233392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &shl ($tmp,60) if ($i>0 && $i<17); 234392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 235392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push (@lo,shift(@lo)); 236392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push (@hi,shift(@hi)); 237392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom } 238392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom } 239392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &add ($Htbl,-128); 240392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &mov ($Zlo,"8($Xi)"); 241392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &mov ($Zhi,"0($Xi)"); 242392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &add ($len,$inp); # pointer to the end of data 243392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &lea ($rem_8bit,".Lrem_8bit(%rip)"); 244392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &jmp (".Louter_loop"); 245392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 246392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=".align 16\n.Louter_loop:\n"; 247392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &xor ($Zhi,"($inp)"); 248392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &mov ("%rdx","8($inp)"); 249392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &lea ($inp,"16($inp)"); 250392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &xor ("%rdx",$Zlo); 251392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &mov ("($Xi)",$Zhi); 252392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &mov ("8($Xi)","%rdx"); 253392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &shr ("%rdx",32); 254392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 255392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &xor ($nlo,$nlo); 256392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &rol ($dat,8); 257392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &mov (&LB($nlo),&LB($dat)); 258392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &movz ($nhi[0],&LB($dat)); 259392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &shl (&LB($nlo),4); 260392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &shr ($nhi[0],4); 261392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 262392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom for ($j=11,$i=0;$i<15;$i++) { 263392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &rol ($dat,8); 264392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &xor ($Zlo,"8($Htbl,$nlo)") if ($i>0); 265392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &xor ($Zhi,"($Htbl,$nlo)") if ($i>0); 266392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &mov ($Zlo,"8($Htbl,$nlo)") if ($i==0); 267392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &mov ($Zhi,"($Htbl,$nlo)") if ($i==0); 268392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 269392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &mov (&LB($nlo),&LB($dat)); 270392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &xor ($Zlo,$tmp) if ($i>0); 271392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &movzw ($rem[1],"($rem_8bit,$rem[1],2)") if ($i>0); 272392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 273392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &movz ($nhi[1],&LB($dat)); 274392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &shl (&LB($nlo),4); 275392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &movzb ($rem[0],"(%rsp,$nhi[0])"); 276392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 277392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &shr ($nhi[1],4) if ($i<14); 278392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &and ($nhi[1],0xf0) if ($i==14); 279392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &shl ($rem[1],48) if ($i>0); 280392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &xor ($rem[0],$Zlo); 281392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 282392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &mov ($tmp,$Zhi); 283392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &xor ($Zhi,$rem[1]) if ($i>0); 284392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &shr ($Zlo,8); 285392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 286392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &movz ($rem[0],&LB($rem[0])); 287392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &mov ($dat,"$j($Xi)") if (--$j%4==0); 288392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &shr ($Zhi,8); 289392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 290392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &xor ($Zlo,"-128($Hshr4,$nhi[0],8)"); 291392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &shl ($tmp,56); 292392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &xor ($Zhi,"($Hshr4,$nhi[0],8)"); 293392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 294392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom unshift (@nhi,pop(@nhi)); # "rotate" registers 295392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom unshift (@rem,pop(@rem)); 296392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom } 297392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &movzw ($rem[1],"($rem_8bit,$rem[1],2)"); 298392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &xor ($Zlo,"8($Htbl,$nlo)"); 299392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &xor ($Zhi,"($Htbl,$nlo)"); 300392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 301392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &shl ($rem[1],48); 302392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &xor ($Zlo,$tmp); 303392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 304392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &xor ($Zhi,$rem[1]); 305392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &movz ($rem[0],&LB($Zlo)); 306392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &shr ($Zlo,4); 307392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 308392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &mov ($tmp,$Zhi); 309392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &shl (&LB($rem[0]),4); 310392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &shr ($Zhi,4); 311392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 312392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &xor ($Zlo,"8($Htbl,$nhi[0])"); 313392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &movzw ($rem[0],"($rem_8bit,$rem[0],2)"); 314392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &shl ($tmp,60); 315392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 316392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &xor ($Zhi,"($Htbl,$nhi[0])"); 317392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &xor ($Zlo,$tmp); 318392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &shl ($rem[0],48); 319392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 320392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &bswap ($Zlo); 321392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &xor ($Zhi,$rem[0]); 322392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 323392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &bswap ($Zhi); 324392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &cmp ($inp,$len); 325392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &jb (".Louter_loop"); 326392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom} 327392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 328392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $Zlo,8($Xi) 329392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $Zhi,($Xi) 330392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 331392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 280(%rsp),%rsi 332392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 0(%rsi),%r15 333392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 8(%rsi),%r14 334392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 16(%rsi),%r13 335392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 24(%rsi),%r12 336392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 32(%rsi),%rbp 337392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 40(%rsi),%rbx 338392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 48(%rsi),%rsp 339392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lghash_epilogue: 340392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 341392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size gcm_ghash_4bit,.-gcm_ghash_4bit 342392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 343392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 344392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom###################################################################### 345392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# PCLMULQDQ version. 346392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 347392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom@_4args=$win64? ("%rcx","%rdx","%r8", "%r9") : # Win64 order 348392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ("%rdi","%rsi","%rdx","%rcx"); # Unix order 349392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 350392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom($Xi,$Xhi)=("%xmm0","%xmm1"); $Hkey="%xmm2"; 351392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom($T1,$T2,$T3)=("%xmm3","%xmm4","%xmm5"); 352392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 353392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromsub clmul64x64_T2 { # minimal register pressure 354392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy ($Xhi,$Xi,$Hkey,$modulo)=@_; 355392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 356392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if (!defined($modulo)); 357392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa $Xi,$Xhi # 358392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$0b01001110,$Xi,$T1 359392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$0b01001110,$Hkey,$T2 360392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $Xi,$T1 # 361392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $Hkey,$T2 362392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 363392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 364392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pclmulqdq \$0x00,$Hkey,$Xi ####### 365392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pclmulqdq \$0x11,$Hkey,$Xhi ####### 366392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pclmulqdq \$0x00,$T2,$T1 ####### 367392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $Xi,$T1 # 368392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $Xhi,$T1 # 369392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 370392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa $T1,$T2 # 371392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom psrldq \$8,$T1 372392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pslldq \$8,$T2 # 373392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $T1,$Xhi 374392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $T2,$Xi # 375392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 376392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom} 377392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 378392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromsub reduction_alg9 { # 17/13 times faster than Intel version 379392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy ($Xhi,$Xi) = @_; 380392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 381392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 382392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # 1st phase 383392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa $Xi,$T1 # 384392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom psllq \$1,$Xi 385392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $T1,$Xi # 386392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom psllq \$5,$Xi # 387392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $T1,$Xi # 388392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom psllq \$57,$Xi # 389392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa $Xi,$T2 # 390392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pslldq \$8,$Xi 391392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom psrldq \$8,$T2 # 392392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $T1,$Xi 393392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $T2,$Xhi # 394392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 395392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # 2nd phase 396392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa $Xi,$T2 397392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom psrlq \$5,$Xi 398392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $T2,$Xi # 399392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom psrlq \$1,$Xi # 400392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $T2,$Xi # 401392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $Xhi,$T2 402392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom psrlq \$1,$Xi # 403392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $T2,$Xi # 404392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 405392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom} 406392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 407392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom{ my ($Htbl,$Xip)=@_4args; 408392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 409392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 410392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.globl gcm_init_clmul 411392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type gcm_init_clmul,\@abi-omnipotent 412392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 413392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromgcm_init_clmul: 414392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu ($Xip),$Hkey 415392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$0b01001110,$Hkey,$Hkey # dword swap 416392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 417392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # <<1 twist 418392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$0b11111111,$Hkey,$T2 # broadcast uppermost dword 419392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa $Hkey,$T1 420392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom psllq \$1,$Hkey 421392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $T3,$T3 # 422392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom psrlq \$63,$T1 423392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pcmpgtd $T2,$T3 # broadcast carry bit 424392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pslldq \$8,$T1 425392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom por $T1,$Hkey # H<<=1 426392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 427392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # magic reduction 428392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pand .L0x1c2_polynomial(%rip),$T3 429392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $T3,$Hkey # if(carry) H^=0x1c2_polynomial 430392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 431392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # calculate H^2 432392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa $Hkey,$Xi 433392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 434392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &clmul64x64_T2 ($Xhi,$Xi,$Hkey); 435392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &reduction_alg9 ($Xhi,$Xi); 436392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 437392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu $Hkey,($Htbl) # save H 438392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu $Xi,16($Htbl) # save H^2 439392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 440392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size gcm_init_clmul,.-gcm_init_clmul 441392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 442392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom} 443392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 444392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom{ my ($Xip,$Htbl)=@_4args; 445392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 446392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 447392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.globl gcm_gmult_clmul 448392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type gcm_gmult_clmul,\@abi-omnipotent 449392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 450392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromgcm_gmult_clmul: 451392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu ($Xip),$Xi 452392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa .Lbswap_mask(%rip),$T3 453392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu ($Htbl),$Hkey 454392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb $T3,$Xi 455392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 456392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &clmul64x64_T2 ($Xhi,$Xi,$Hkey); 457392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &reduction_alg9 ($Xhi,$Xi); 458392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 459392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb $T3,$Xi 460392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu $Xi,($Xip) 461392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 462392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size gcm_gmult_clmul,.-gcm_gmult_clmul 463392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 464392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom} 465392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 466392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom{ my ($Xip,$Htbl,$inp,$len)=@_4args; 467392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom my $Xn="%xmm6"; 468392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom my $Xhn="%xmm7"; 469392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom my $Hkey2="%xmm8"; 470392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom my $T1n="%xmm9"; 471392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom my $T2n="%xmm10"; 472392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 473392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 474392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.globl gcm_ghash_clmul 475392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type gcm_ghash_clmul,\@abi-omnipotent 476392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 477392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromgcm_ghash_clmul: 478392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 479392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($win64); 480392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.LSEH_begin_gcm_ghash_clmul: 481392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # I can't trust assembler to use specific encoding:-( 482392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .byte 0x48,0x83,0xec,0x58 #sub \$0x58,%rsp 483392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .byte 0x0f,0x29,0x34,0x24 #movaps %xmm6,(%rsp) 484392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .byte 0x0f,0x29,0x7c,0x24,0x10 #movdqa %xmm7,0x10(%rsp) 485392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .byte 0x44,0x0f,0x29,0x44,0x24,0x20 #movaps %xmm8,0x20(%rsp) 486392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .byte 0x44,0x0f,0x29,0x4c,0x24,0x30 #movaps %xmm9,0x30(%rsp) 487392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .byte 0x44,0x0f,0x29,0x54,0x24,0x40 #movaps %xmm10,0x40(%rsp) 488392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 489392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 490392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa .Lbswap_mask(%rip),$T3 491392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 492392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu ($Xip),$Xi 493392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu ($Htbl),$Hkey 494392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb $T3,$Xi 495392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 496392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$0x10,$len 497392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jz .Lodd_tail 498392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 499392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 16($Htbl),$Hkey2 500392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ####### 501392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # Xi+2 =[H*(Ii+1 + Xi+1)] mod P = 502392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # [(H*Ii+1) + (H*Xi+1)] mod P = 503392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # [(H*Ii+1) + H^2*(Ii+Xi)] mod P 504392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # 505392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu ($inp),$T1 # Ii 506392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 16($inp),$Xn # Ii+1 507392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb $T3,$T1 508392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb $T3,$Xn 509392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $T1,$Xi # Ii+Xi 510392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 511392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &clmul64x64_T2 ($Xhn,$Xn,$Hkey); # H*Ii+1 512392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 513392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa $Xi,$Xhi # 514392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$0b01001110,$Xi,$T1 515392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$0b01001110,$Hkey2,$T2 516392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $Xi,$T1 # 517392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $Hkey2,$T2 518392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 519392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 32($inp),$inp # i+=2 520392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$0x20,$len 521392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jbe .Leven_tail 522392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 523392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lmod_loop: 524392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 525392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &clmul64x64_T2 ($Xhi,$Xi,$Hkey2,1); # H^2*(Ii+Xi) 526392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 527392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu ($inp),$T1 # Ii 528392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $Xn,$Xi # (H*Ii+1) + H^2*(Ii+Xi) 529392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $Xhn,$Xhi 530392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 531392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 16($inp),$Xn # Ii+1 532392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb $T3,$T1 533392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb $T3,$Xn 534392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 535392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa $Xn,$Xhn # 536392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$0b01001110,$Xn,$T1n 537392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$0b01001110,$Hkey,$T2n 538392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $Xn,$T1n # 539392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $Hkey,$T2n 540392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $T1,$Xhi # "Ii+Xi", consume early 541392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 542392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa $Xi,$T1 # 1st phase 543392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom psllq \$1,$Xi 544392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $T1,$Xi # 545392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom psllq \$5,$Xi # 546392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $T1,$Xi # 547392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pclmulqdq \$0x00,$Hkey,$Xn ####### 548392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom psllq \$57,$Xi # 549392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa $Xi,$T2 # 550392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pslldq \$8,$Xi 551392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom psrldq \$8,$T2 # 552392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $T1,$Xi 553392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $T2,$Xhi # 554392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 555392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pclmulqdq \$0x11,$Hkey,$Xhn ####### 556392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa $Xi,$T2 # 2nd phase 557392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom psrlq \$5,$Xi 558392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $T2,$Xi # 559392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom psrlq \$1,$Xi # 560392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $T2,$Xi # 561392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $Xhi,$T2 562392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom psrlq \$1,$Xi # 563392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $T2,$Xi # 564392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 565392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pclmulqdq \$0x00,$T2n,$T1n ####### 566392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa $Xi,$Xhi # 567392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$0b01001110,$Xi,$T1 568392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$0b01001110,$Hkey2,$T2 569392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $Xi,$T1 # 570392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $Hkey2,$T2 571392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 572392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $Xn,$T1n # 573392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $Xhn,$T1n # 574392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa $T1n,$T2n # 575392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom psrldq \$8,$T1n 576392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pslldq \$8,$T2n # 577392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $T1n,$Xhn 578392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $T2n,$Xn # 579392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 580392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 32($inp),$inp 581392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$0x20,$len 582392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ja .Lmod_loop 583392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 584392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Leven_tail: 585392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 586392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &clmul64x64_T2 ($Xhi,$Xi,$Hkey2,1); # H^2*(Ii+Xi) 587392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 588392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $Xn,$Xi # (H*Ii+1) + H^2*(Ii+Xi) 589392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $Xhn,$Xhi 590392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 591392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &reduction_alg9 ($Xhi,$Xi); 592392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 593392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom test $len,$len 594392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jnz .Ldone 595392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 596392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lodd_tail: 597392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu ($inp),$T1 # Ii 598392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb $T3,$T1 599392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $T1,$Xi # Ii+Xi 600392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 601392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &clmul64x64_T2 ($Xhi,$Xi,$Hkey); # H*(Ii+Xi) 602392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &reduction_alg9 ($Xhi,$Xi); 603392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 604392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Ldone: 605392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb $T3,$Xi 606392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu $Xi,($Xip) 607392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 608392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($win64); 609392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps (%rsp),%xmm6 610392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x10(%rsp),%xmm7 611392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x20(%rsp),%xmm8 612392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x30(%rsp),%xmm9 613392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x40(%rsp),%xmm10 614392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom add \$0x58,%rsp 615392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 616392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 617392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 618392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.LSEH_end_gcm_ghash_clmul: 619392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size gcm_ghash_clmul,.-gcm_ghash_clmul 620392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 621392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom} 622392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 623392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 624392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 64 625392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lbswap_mask: 626392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 627392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.L0x1c2_polynomial: 628392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 629392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 64 630392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type .Lrem_4bit,\@object 631392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lrem_4bit: 632392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .long 0,`0x0000<<16`,0,`0x1C20<<16`,0,`0x3840<<16`,0,`0x2460<<16` 633392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .long 0,`0x7080<<16`,0,`0x6CA0<<16`,0,`0x48C0<<16`,0,`0x54E0<<16` 634392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .long 0,`0xE100<<16`,0,`0xFD20<<16`,0,`0xD940<<16`,0,`0xC560<<16` 635392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .long 0,`0x9180<<16`,0,`0x8DA0<<16`,0,`0xA9C0<<16`,0,`0xB5E0<<16` 636392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type .Lrem_8bit,\@object 637392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lrem_8bit: 638392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E 639392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E 640392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E 641392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E 642392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E 643392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E 644392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E 645392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E 646392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE 647392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE 648392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE 649392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE 650392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E 651392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E 652392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE 653392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE 654392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E 655392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E 656392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E 657392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E 658392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E 659392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E 660392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E 661392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E 662392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE 663392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE 664392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE 665392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE 666392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E 667392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E 668392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE 669392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE 670392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 671392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.asciz "GHASH for x86_64, CRYPTOGAMS by <appro\@openssl.org>" 672392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 64 673392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 674392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 675392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, 676392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# CONTEXT *context,DISPATCHER_CONTEXT *disp) 677392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromif ($win64) { 678392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$rec="%rcx"; 679392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$frame="%rdx"; 680392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$context="%r8"; 681392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$disp="%r9"; 682392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 683392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 684392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.extern __imp_RtlVirtualUnwind 685392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type se_handler,\@abi-omnipotent 686392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 687392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromse_handler: 688392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %rsi 689392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %rdi 690392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %rbx 691392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %rbp 692392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %r12 693392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %r13 694392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %r14 695392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %r15 696392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pushfq 697392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$64,%rsp 698392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 699392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 120($context),%rax # pull context->Rax 700392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 248($context),%rbx # pull context->Rip 701392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 702392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 8($disp),%rsi # disp->ImageBase 703392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 56($disp),%r11 # disp->HandlerData 704392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 705392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 0(%r11),%r10d # HandlerData[0] 706392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea (%rsi,%r10),%r10 # prologue label 707392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp %r10,%rbx # context->Rip<prologue label 708392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jb .Lin_prologue 709392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 710392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 152($context),%rax # pull context->Rsp 711392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 712392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 4(%r11),%r10d # HandlerData[1] 713392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea (%rsi,%r10),%r10 # epilogue label 714392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp %r10,%rbx # context->Rip>=epilogue label 715392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jae .Lin_prologue 716392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 717392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 24(%rax),%rax # adjust "rsp" 718392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 719392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov -8(%rax),%rbx 720392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov -16(%rax),%rbp 721392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov -24(%rax),%r12 722392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov %rbx,144($context) # restore context->Rbx 723392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov %rbp,160($context) # restore context->Rbp 724392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov %r12,216($context) # restore context->R12 725392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 726392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lin_prologue: 727392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 8(%rax),%rdi 728392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 16(%rax),%rsi 729392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov %rax,152($context) # restore context->Rsp 730392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov %rsi,168($context) # restore context->Rsi 731392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov %rdi,176($context) # restore context->Rdi 732392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 733392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 40($disp),%rdi # disp->ContextRecord 734392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $context,%rsi # context 735392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov \$`1232/8`,%ecx # sizeof(CONTEXT) 736392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .long 0xa548f3fc # cld; rep movsq 737392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 738392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $disp,%rsi 739392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER 740392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 8(%rsi),%rdx # arg2, disp->ImageBase 741392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 0(%rsi),%r8 # arg3, disp->ControlPc 742392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 16(%rsi),%r9 # arg4, disp->FunctionEntry 743392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 40(%rsi),%r10 # disp->ContextRecord 744392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 56(%rsi),%r11 # &disp->HandlerData 745392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 24(%rsi),%r12 # &disp->EstablisherFrame 746392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov %r10,32(%rsp) # arg5 747392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov %r11,40(%rsp) # arg6 748392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov %r12,48(%rsp) # arg7 749392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov %rcx,56(%rsp) # arg8, (NULL) 750392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call *__imp_RtlVirtualUnwind(%rip) 751392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 752392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov \$1,%eax # ExceptionContinueSearch 753392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom add \$64,%rsp 754392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom popfq 755392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pop %r15 756392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pop %r14 757392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pop %r13 758392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pop %r12 759392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pop %rbp 760392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pop %rbx 761392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pop %rdi 762392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pop %rsi 763392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 764392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size se_handler,.-se_handler 765392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 766392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.section .pdata 767392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 4 768392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_begin_gcm_gmult_4bit 769392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_end_gcm_gmult_4bit 770392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_info_gcm_gmult_4bit 771392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 772392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_begin_gcm_ghash_4bit 773392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_end_gcm_ghash_4bit 774392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_info_gcm_ghash_4bit 775392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 776392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_begin_gcm_ghash_clmul 777392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_end_gcm_ghash_clmul 778392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_info_gcm_ghash_clmul 779392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 780392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.section .xdata 781392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 8 782392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.LSEH_info_gcm_gmult_4bit: 783392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .byte 9,0,0,0 784392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva se_handler 785392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .Lgmult_prologue,.Lgmult_epilogue # HandlerData 786392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.LSEH_info_gcm_ghash_4bit: 787392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .byte 9,0,0,0 788392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva se_handler 789392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .Lghash_prologue,.Lghash_epilogue # HandlerData 790392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.LSEH_info_gcm_ghash_clmul: 791392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .byte 0x01,0x1f,0x0b,0x00 792392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .byte 0x1f,0xa8,0x04,0x00 #movaps 0x40(rsp),xmm10 793392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .byte 0x19,0x98,0x03,0x00 #movaps 0x30(rsp),xmm9 794392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .byte 0x13,0x88,0x02,0x00 #movaps 0x20(rsp),xmm8 795392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .byte 0x0d,0x78,0x01,0x00 #movaps 0x10(rsp),xmm7 796392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .byte 0x08,0x68,0x00,0x00 #movaps (rsp),xmm6 797392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .byte 0x04,0xa2,0x00,0x00 #sub rsp,0x58 798392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 799392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom} 800392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 801392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code =~ s/\`([^\`]*)\`/eval($1)/gem; 802392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 803392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromprint $code; 804392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 805392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromclose STDOUT; 806