1d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#!/usr/bin/env perl 2d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# 3d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# ==================================================================== 4d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 5d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# project. The module is, however, dual licensed under OpenSSL and 6d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# CRYPTOGAMS licenses depending on where you obtain it. For further 7d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# details see http://www.openssl.org/~appro/cryptogams/. 8d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# ==================================================================== 9d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 10d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# June 2011 11d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# 12d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# This is RC4+MD5 "stitch" implementation. The idea, as spelled in 13d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# http://download.intel.com/design/intarch/papers/323686.pdf, is that 14d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# since both algorithms exhibit instruction-level parallelism, ILP, 15d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# below theoretical maximum, interleaving them would allow to utilize 16d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# processor resources better and achieve better performance. RC4 17d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# instruction sequence is virtually identical to rc4-x86_64.pl, which 18d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# is heavily based on submission by Maxim Perminov, Maxim Locktyukhin 19d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# and Jim Guilford of Intel. MD5 is fresh implementation aiming to 20d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# minimize register usage, which was used as "main thread" with RC4 21d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# weaved into it, one RC4 round per one MD5 round. In addition to the 22d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# stiched subroutine the script can generate standalone replacement 23d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# md5_block_asm_data_order and RC4. Below are performance numbers in 24d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# cycles per processed byte, less is better, for these the standalone 25d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# subroutines, sum of them, and stitched one: 26d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# 27d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# RC4 MD5 RC4+MD5 stitch gain 28d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# Opteron 6.5(*) 5.4 11.9 7.0 +70%(*) 29d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# Core2 6.5 5.8 12.3 7.7 +60% 30d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# Westmere 4.3 5.2 9.5 7.0 +36% 31d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# Sandy Bridge 4.2 5.5 9.7 6.8 +43% 32d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# Atom 9.3 6.5 15.8 11.1 +42% 33d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# 34d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# (*) rc4-x86_64.pl delivers 5.3 on Opteron, so real improvement 35d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# is +53%... 36d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 37d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy ($rc4,$md5)=(1,1); # what to generate? 38d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy $D="#" if (!$md5); # if set to "#", MD5 is stitched into RC4(), 39d9e397b599b13d642138480a28c14db7a136bf0Adam Langley # but its result is discarded. Idea here is 40d9e397b599b13d642138480a28c14db7a136bf0Adam Langley # to be able to use 'openssl speed rc4' for 41d9e397b599b13d642138480a28c14db7a136bf0Adam Langley # benchmarking the stitched subroutine... 42d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 43d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy $flavour = shift; 44d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy $output = shift; 45d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyif ($flavour =~ /\./) { $output = $flavour; undef $flavour; } 46d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 47d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); 48d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 49d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$0 =~ m/(.*[\/\\])[^\/\\]+$/; my $dir=$1; my $xlate; 50d9e397b599b13d642138480a28c14db7a136bf0Adam Langley( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or 51d9e397b599b13d642138480a28c14db7a136bf0Adam Langley( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or 52d9e397b599b13d642138480a28c14db7a136bf0Adam Langleydie "can't locate x86_64-xlate.pl"; 53d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 54d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyopen OUT,"| \"$^X\" $xlate $flavour $output"; 55d9e397b599b13d642138480a28c14db7a136bf0Adam Langley*STDOUT=*OUT; 56d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 57d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy ($dat,$in0,$out,$ctx,$inp,$len, $func,$nargs); 58d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 59d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyif ($rc4 && !$md5) { 60d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ($dat,$len,$in0,$out) = ("%rdi","%rsi","%rdx","%rcx"); 61d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $func="RC4"; $nargs=4; 62d9e397b599b13d642138480a28c14db7a136bf0Adam Langley} elsif ($md5 && !$rc4) { 63d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ($ctx,$inp,$len) = ("%rdi","%rsi","%rdx"); 64d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $func="md5_block_asm_data_order"; $nargs=3; 65d9e397b599b13d642138480a28c14db7a136bf0Adam Langley} else { 66d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ($dat,$in0,$out,$ctx,$inp,$len) = ("%rdi","%rsi","%rdx","%rcx","%r8","%r9"); 67d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $func="rc4_md5_enc"; $nargs=6; 68d9e397b599b13d642138480a28c14db7a136bf0Adam Langley # void rc4_md5_enc( 69d9e397b599b13d642138480a28c14db7a136bf0Adam Langley # RC4_KEY *key, # 70d9e397b599b13d642138480a28c14db7a136bf0Adam Langley # const void *in0, # RC4 input 71d9e397b599b13d642138480a28c14db7a136bf0Adam Langley # void *out, # RC4 output 72d9e397b599b13d642138480a28c14db7a136bf0Adam Langley # MD5_CTX *ctx, # 73d9e397b599b13d642138480a28c14db7a136bf0Adam Langley # const void *inp, # MD5 input 74d9e397b599b13d642138480a28c14db7a136bf0Adam Langley # size_t len); # number of 64-byte blocks 75d9e397b599b13d642138480a28c14db7a136bf0Adam Langley} 76d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 77d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy @K=( 0xd76aa478,0xe8c7b756,0x242070db,0xc1bdceee, 78d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 0xf57c0faf,0x4787c62a,0xa8304613,0xfd469501, 79d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 0x698098d8,0x8b44f7af,0xffff5bb1,0x895cd7be, 80d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 0x6b901122,0xfd987193,0xa679438e,0x49b40821, 81d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 82d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 0xf61e2562,0xc040b340,0x265e5a51,0xe9b6c7aa, 83d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 0xd62f105d,0x02441453,0xd8a1e681,0xe7d3fbc8, 84d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 0x21e1cde6,0xc33707d6,0xf4d50d87,0x455a14ed, 85d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 0xa9e3e905,0xfcefa3f8,0x676f02d9,0x8d2a4c8a, 86d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 87d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 0xfffa3942,0x8771f681,0x6d9d6122,0xfde5380c, 88d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 0xa4beea44,0x4bdecfa9,0xf6bb4b60,0xbebfbc70, 89d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 0x289b7ec6,0xeaa127fa,0xd4ef3085,0x04881d05, 90d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 0xd9d4d039,0xe6db99e5,0x1fa27cf8,0xc4ac5665, 91d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 92d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 0xf4292244,0x432aff97,0xab9423a7,0xfc93a039, 93d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 0x655b59c3,0x8f0ccc92,0xffeff47d,0x85845dd1, 94d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 0x6fa87e4f,0xfe2ce6e0,0xa3014314,0x4e0811a1, 95d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 0xf7537e82,0xbd3af235,0x2ad7d2bb,0xeb86d391 ); 96d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 97d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy @V=("%r8d","%r9d","%r10d","%r11d"); # MD5 registers 98d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy $tmp="%r12d"; 99d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 100d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy @XX=("%rbp","%rsi"); # RC4 registers 101d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy @TX=("%rax","%rbx"); 102d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy $YY="%rcx"; 103d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy $TY="%rdx"; 104d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 105d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy $MOD=32; # 16, 32 or 64 106d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 107d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code.=<<___; 108d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.text 109d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.align 16 110d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 111d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.globl $func 112d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.type $func,\@function,$nargs 113d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$func: 114d9e397b599b13d642138480a28c14db7a136bf0Adam Langley cmp \$0,$len 115d9e397b599b13d642138480a28c14db7a136bf0Adam Langley je .Labort 116d9e397b599b13d642138480a28c14db7a136bf0Adam Langley push %rbx 117d9e397b599b13d642138480a28c14db7a136bf0Adam Langley push %rbp 118d9e397b599b13d642138480a28c14db7a136bf0Adam Langley push %r12 119d9e397b599b13d642138480a28c14db7a136bf0Adam Langley push %r13 120d9e397b599b13d642138480a28c14db7a136bf0Adam Langley push %r14 121d9e397b599b13d642138480a28c14db7a136bf0Adam Langley push %r15 122d9e397b599b13d642138480a28c14db7a136bf0Adam Langley sub \$40,%rsp 123d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.Lbody: 124d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___ 125d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyif ($rc4) { 126d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code.=<<___; 127d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$D#md5# mov $ctx,%r11 # reassign arguments 128d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov $len,%r12 129d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov $in0,%r13 130d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov $out,%r14 131d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$D#md5# mov $inp,%r15 132d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___ 133d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $ctx="%r11" if ($md5); # reassign arguments 134d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $len="%r12"; 135d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $in0="%r13"; 136d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $out="%r14"; 137d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $inp="%r15" if ($md5); 138d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $inp=$in0 if (!$md5); 139d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code.=<<___; 140d9e397b599b13d642138480a28c14db7a136bf0Adam Langley xor $XX[0],$XX[0] 141d9e397b599b13d642138480a28c14db7a136bf0Adam Langley xor $YY,$YY 142d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 143d9e397b599b13d642138480a28c14db7a136bf0Adam Langley lea 8($dat),$dat 144d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov -8($dat),$XX[0]#b 145d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov -4($dat),$YY#b 146d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 147d9e397b599b13d642138480a28c14db7a136bf0Adam Langley inc $XX[0]#b 148d9e397b599b13d642138480a28c14db7a136bf0Adam Langley sub $in0,$out 149d9e397b599b13d642138480a28c14db7a136bf0Adam Langley movl ($dat,$XX[0],4),$TX[0]#d 150d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___ 151d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code.=<<___ if (!$md5); 152d9e397b599b13d642138480a28c14db7a136bf0Adam Langley xor $TX[1],$TX[1] 153d9e397b599b13d642138480a28c14db7a136bf0Adam Langley test \$-128,$len 154d9e397b599b13d642138480a28c14db7a136bf0Adam Langley jz .Loop1 155d9e397b599b13d642138480a28c14db7a136bf0Adam Langley sub $XX[0],$TX[1] 156d9e397b599b13d642138480a28c14db7a136bf0Adam Langley and \$`$MOD-1`,$TX[1] 157d9e397b599b13d642138480a28c14db7a136bf0Adam Langley jz .Loop${MOD}_is_hot 158d9e397b599b13d642138480a28c14db7a136bf0Adam Langley sub $TX[1],$len 159d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.Loop${MOD}_warmup: 160d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add $TX[0]#b,$YY#b 161d9e397b599b13d642138480a28c14db7a136bf0Adam Langley movl ($dat,$YY,4),$TY#d 162d9e397b599b13d642138480a28c14db7a136bf0Adam Langley movl $TX[0]#d,($dat,$YY,4) 163d9e397b599b13d642138480a28c14db7a136bf0Adam Langley movl $TY#d,($dat,$XX[0],4) 164d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add $TY#b,$TX[0]#b 165d9e397b599b13d642138480a28c14db7a136bf0Adam Langley inc $XX[0]#b 166d9e397b599b13d642138480a28c14db7a136bf0Adam Langley movl ($dat,$TX[0],4),$TY#d 167d9e397b599b13d642138480a28c14db7a136bf0Adam Langley movl ($dat,$XX[0],4),$TX[0]#d 168d9e397b599b13d642138480a28c14db7a136bf0Adam Langley xorb ($in0),$TY#b 169d9e397b599b13d642138480a28c14db7a136bf0Adam Langley movb $TY#b,($out,$in0) 170d9e397b599b13d642138480a28c14db7a136bf0Adam Langley lea 1($in0),$in0 171d9e397b599b13d642138480a28c14db7a136bf0Adam Langley dec $TX[1] 172d9e397b599b13d642138480a28c14db7a136bf0Adam Langley jnz .Loop${MOD}_warmup 173d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 174d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov $YY,$TX[1] 175d9e397b599b13d642138480a28c14db7a136bf0Adam Langley xor $YY,$YY 176d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov $TX[1]#b,$YY#b 177d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 178d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.Loop${MOD}_is_hot: 179d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov $len,32(%rsp) # save original $len 180d9e397b599b13d642138480a28c14db7a136bf0Adam Langley shr \$6,$len # number of 64-byte blocks 181d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___ 182d9e397b599b13d642138480a28c14db7a136bf0Adam Langley if ($D && !$md5) { # stitch in dummy MD5 183d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $md5=1; 184d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $ctx="%r11"; 185d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $inp="%r15"; 186d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $code.=<<___; 187d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov %rsp,$ctx 188d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov $in0,$inp 189d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___ 190d9e397b599b13d642138480a28c14db7a136bf0Adam Langley } 191d9e397b599b13d642138480a28c14db7a136bf0Adam Langley} 192d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code.=<<___; 193d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# add $TX[0]#b,$YY#b 194d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# lea ($dat,$XX[0],4),$XX[1] 195d9e397b599b13d642138480a28c14db7a136bf0Adam Langley shl \$6,$len 196d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add $inp,$len # pointer to the end of input 197d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov $len,16(%rsp) 198d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 199d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# mov $ctx,24(%rsp) # save pointer to MD5_CTX 200d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# mov 0*4($ctx),$V[0] # load current hash value from MD5_CTX 201d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# mov 1*4($ctx),$V[1] 202d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# mov 2*4($ctx),$V[2] 203d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# mov 3*4($ctx),$V[3] 204d9e397b599b13d642138480a28c14db7a136bf0Adam Langley jmp .Loop 205d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 206d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.align 16 207d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.Loop: 208d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# mov $V[0],0*4(%rsp) # put aside current hash value 209d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# mov $V[1],1*4(%rsp) 210d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# mov $V[2],2*4(%rsp) 211d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# mov $V[3],$tmp # forward reference 212d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# mov $V[3],3*4(%rsp) 213d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___ 214d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 215d9e397b599b13d642138480a28c14db7a136bf0Adam Langleysub R0 { 216d9e397b599b13d642138480a28c14db7a136bf0Adam Langley my ($i,$a,$b,$c,$d)=@_; 217d9e397b599b13d642138480a28c14db7a136bf0Adam Langley my @rot0=(7,12,17,22); 218d9e397b599b13d642138480a28c14db7a136bf0Adam Langley my $j=$i%16; 219d9e397b599b13d642138480a28c14db7a136bf0Adam Langley my $k=$i%$MOD; 220d9e397b599b13d642138480a28c14db7a136bf0Adam Langley my $xmm="%xmm".($j&1); 221d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $code.=" movdqu ($in0),%xmm2\n" if ($rc4 && $j==15); 222d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $code.=" add \$$MOD,$XX[0]#b\n" if ($rc4 && $j==15 && $k==$MOD-1); 223d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $code.=" pxor $xmm,$xmm\n" if ($rc4 && $j<=1); 224d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $code.=<<___; 225d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# movl ($dat,$YY,4),$TY#d 226d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# xor $c,$tmp 227d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# movl $TX[0]#d,($dat,$YY,4) 228d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# and $b,$tmp 229d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# add 4*`$j`($inp),$a 230d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# add $TY#b,$TX[0]#b 231d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# movl `4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d 232d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# add \$$K[$i],$a 233d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# xor $d,$tmp 234d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# movz $TX[0]#b,$TX[0]#d 235d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# movl $TY#d,4*$k($XX[1]) 236d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# add $tmp,$a 237d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# add $TX[1]#b,$YY#b 238d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# rol \$$rot0[$j%4],$a 239d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# mov `$j==15?"$b":"$c"`,$tmp # forward reference 240d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# pinsrw \$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n 241d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# add $b,$a 242d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___ 243d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $code.=<<___ if ($rc4 && $j==15 && $k==$MOD-1); 244d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov $YY,$XX[1] 245d9e397b599b13d642138480a28c14db7a136bf0Adam Langley xor $YY,$YY # keyword to partial register 246d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov $XX[1]#b,$YY#b 247d9e397b599b13d642138480a28c14db7a136bf0Adam Langley lea ($dat,$XX[0],4),$XX[1] 248d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___ 249d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $code.=<<___ if ($rc4 && $j==15); 250d9e397b599b13d642138480a28c14db7a136bf0Adam Langley psllq \$8,%xmm1 251d9e397b599b13d642138480a28c14db7a136bf0Adam Langley pxor %xmm0,%xmm2 252d9e397b599b13d642138480a28c14db7a136bf0Adam Langley pxor %xmm1,%xmm2 253d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___ 254d9e397b599b13d642138480a28c14db7a136bf0Adam Langley} 255d9e397b599b13d642138480a28c14db7a136bf0Adam Langleysub R1 { 256d9e397b599b13d642138480a28c14db7a136bf0Adam Langley my ($i,$a,$b,$c,$d)=@_; 257d9e397b599b13d642138480a28c14db7a136bf0Adam Langley my @rot1=(5,9,14,20); 258d9e397b599b13d642138480a28c14db7a136bf0Adam Langley my $j=$i%16; 259d9e397b599b13d642138480a28c14db7a136bf0Adam Langley my $k=$i%$MOD; 260d9e397b599b13d642138480a28c14db7a136bf0Adam Langley my $xmm="%xmm".($j&1); 261d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $code.=" movdqu 16($in0),%xmm3\n" if ($rc4 && $j==15); 262d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $code.=" add \$$MOD,$XX[0]#b\n" if ($rc4 && $j==15 && $k==$MOD-1); 263d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $code.=" pxor $xmm,$xmm\n" if ($rc4 && $j<=1); 264d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $code.=<<___; 265d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# movl ($dat,$YY,4),$TY#d 266d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# xor $b,$tmp 267d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# movl $TX[0]#d,($dat,$YY,4) 268d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# and $d,$tmp 269d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# add 4*`((1+5*$j)%16)`($inp),$a 270d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# add $TY#b,$TX[0]#b 271d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# movl `4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d 272d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# add \$$K[$i],$a 273d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# xor $c,$tmp 274d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# movz $TX[0]#b,$TX[0]#d 275d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# movl $TY#d,4*$k($XX[1]) 276d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# add $tmp,$a 277d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# add $TX[1]#b,$YY#b 278d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# rol \$$rot1[$j%4],$a 279d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# mov `$j==15?"$c":"$b"`,$tmp # forward reference 280d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# pinsrw \$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n 281d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# add $b,$a 282d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___ 283d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $code.=<<___ if ($rc4 && $j==15 && $k==$MOD-1); 284d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov $YY,$XX[1] 285d9e397b599b13d642138480a28c14db7a136bf0Adam Langley xor $YY,$YY # keyword to partial register 286d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov $XX[1]#b,$YY#b 287d9e397b599b13d642138480a28c14db7a136bf0Adam Langley lea ($dat,$XX[0],4),$XX[1] 288d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___ 289d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $code.=<<___ if ($rc4 && $j==15); 290d9e397b599b13d642138480a28c14db7a136bf0Adam Langley psllq \$8,%xmm1 291d9e397b599b13d642138480a28c14db7a136bf0Adam Langley pxor %xmm0,%xmm3 292d9e397b599b13d642138480a28c14db7a136bf0Adam Langley pxor %xmm1,%xmm3 293d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___ 294d9e397b599b13d642138480a28c14db7a136bf0Adam Langley} 295d9e397b599b13d642138480a28c14db7a136bf0Adam Langleysub R2 { 296d9e397b599b13d642138480a28c14db7a136bf0Adam Langley my ($i,$a,$b,$c,$d)=@_; 297d9e397b599b13d642138480a28c14db7a136bf0Adam Langley my @rot2=(4,11,16,23); 298d9e397b599b13d642138480a28c14db7a136bf0Adam Langley my $j=$i%16; 299d9e397b599b13d642138480a28c14db7a136bf0Adam Langley my $k=$i%$MOD; 300d9e397b599b13d642138480a28c14db7a136bf0Adam Langley my $xmm="%xmm".($j&1); 301d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $code.=" movdqu 32($in0),%xmm4\n" if ($rc4 && $j==15); 302d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $code.=" add \$$MOD,$XX[0]#b\n" if ($rc4 && $j==15 && $k==$MOD-1); 303d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $code.=" pxor $xmm,$xmm\n" if ($rc4 && $j<=1); 304d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $code.=<<___; 305d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# movl ($dat,$YY,4),$TY#d 306d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# xor $c,$tmp 307d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# movl $TX[0]#d,($dat,$YY,4) 308d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# xor $b,$tmp 309d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# add 4*`((5+3*$j)%16)`($inp),$a 310d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# add $TY#b,$TX[0]#b 311d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# movl `4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d 312d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# add \$$K[$i],$a 313d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# movz $TX[0]#b,$TX[0]#d 314d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# add $tmp,$a 315d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# movl $TY#d,4*$k($XX[1]) 316d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# add $TX[1]#b,$YY#b 317d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# rol \$$rot2[$j%4],$a 318d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# mov `$j==15?"\\\$-1":"$c"`,$tmp # forward reference 319d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# pinsrw \$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n 320d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# add $b,$a 321d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___ 322d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $code.=<<___ if ($rc4 && $j==15 && $k==$MOD-1); 323d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov $YY,$XX[1] 324d9e397b599b13d642138480a28c14db7a136bf0Adam Langley xor $YY,$YY # keyword to partial register 325d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov $XX[1]#b,$YY#b 326d9e397b599b13d642138480a28c14db7a136bf0Adam Langley lea ($dat,$XX[0],4),$XX[1] 327d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___ 328d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $code.=<<___ if ($rc4 && $j==15); 329d9e397b599b13d642138480a28c14db7a136bf0Adam Langley psllq \$8,%xmm1 330d9e397b599b13d642138480a28c14db7a136bf0Adam Langley pxor %xmm0,%xmm4 331d9e397b599b13d642138480a28c14db7a136bf0Adam Langley pxor %xmm1,%xmm4 332d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___ 333d9e397b599b13d642138480a28c14db7a136bf0Adam Langley} 334d9e397b599b13d642138480a28c14db7a136bf0Adam Langleysub R3 { 335d9e397b599b13d642138480a28c14db7a136bf0Adam Langley my ($i,$a,$b,$c,$d)=@_; 336d9e397b599b13d642138480a28c14db7a136bf0Adam Langley my @rot3=(6,10,15,21); 337d9e397b599b13d642138480a28c14db7a136bf0Adam Langley my $j=$i%16; 338d9e397b599b13d642138480a28c14db7a136bf0Adam Langley my $k=$i%$MOD; 339d9e397b599b13d642138480a28c14db7a136bf0Adam Langley my $xmm="%xmm".($j&1); 340d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $code.=" movdqu 48($in0),%xmm5\n" if ($rc4 && $j==15); 341d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $code.=" add \$$MOD,$XX[0]#b\n" if ($rc4 && $j==15 && $k==$MOD-1); 342d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $code.=" pxor $xmm,$xmm\n" if ($rc4 && $j<=1); 343d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $code.=<<___; 344d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# movl ($dat,$YY,4),$TY#d 345d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# xor $d,$tmp 346d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# movl $TX[0]#d,($dat,$YY,4) 347d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# or $b,$tmp 348d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# add 4*`((7*$j)%16)`($inp),$a 349d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# add $TY#b,$TX[0]#b 350d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# movl `4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d 351d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# add \$$K[$i],$a 352d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# movz $TX[0]#b,$TX[0]#d 353d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# xor $c,$tmp 354d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# movl $TY#d,4*$k($XX[1]) 355d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# add $tmp,$a 356d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# add $TX[1]#b,$YY#b 357d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# rol \$$rot3[$j%4],$a 358d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# mov \$-1,$tmp # forward reference 359d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# pinsrw \$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n 360d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# add $b,$a 361d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___ 362d9e397b599b13d642138480a28c14db7a136bf0Adam Langley $code.=<<___ if ($rc4 && $j==15); 363d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov $XX[0],$XX[1] 364d9e397b599b13d642138480a28c14db7a136bf0Adam Langley xor $XX[0],$XX[0] # keyword to partial register 365d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov $XX[1]#b,$XX[0]#b 366d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov $YY,$XX[1] 367d9e397b599b13d642138480a28c14db7a136bf0Adam Langley xor $YY,$YY # keyword to partial register 368d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov $XX[1]#b,$YY#b 369d9e397b599b13d642138480a28c14db7a136bf0Adam Langley lea ($dat,$XX[0],4),$XX[1] 370d9e397b599b13d642138480a28c14db7a136bf0Adam Langley psllq \$8,%xmm1 371d9e397b599b13d642138480a28c14db7a136bf0Adam Langley pxor %xmm0,%xmm5 372d9e397b599b13d642138480a28c14db7a136bf0Adam Langley pxor %xmm1,%xmm5 373d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___ 374d9e397b599b13d642138480a28c14db7a136bf0Adam Langley} 375d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 376d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy $i=0; 377d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyfor(;$i<16;$i++) { R0($i,@V); unshift(@V,pop(@V)); push(@TX,shift(@TX)); } 378d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyfor(;$i<32;$i++) { R1($i,@V); unshift(@V,pop(@V)); push(@TX,shift(@TX)); } 379d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyfor(;$i<48;$i++) { R2($i,@V); unshift(@V,pop(@V)); push(@TX,shift(@TX)); } 380d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyfor(;$i<64;$i++) { R3($i,@V); unshift(@V,pop(@V)); push(@TX,shift(@TX)); } 381d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 382d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code.=<<___; 383d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# add 0*4(%rsp),$V[0] # accumulate hash value 384d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# add 1*4(%rsp),$V[1] 385d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# add 2*4(%rsp),$V[2] 386d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# add 3*4(%rsp),$V[3] 387d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 388d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# movdqu %xmm2,($out,$in0) # write RC4 output 389d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# movdqu %xmm3,16($out,$in0) 390d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# movdqu %xmm4,32($out,$in0) 391d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# movdqu %xmm5,48($out,$in0) 392d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# lea 64($inp),$inp 393d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# lea 64($in0),$in0 394d9e397b599b13d642138480a28c14db7a136bf0Adam Langley cmp 16(%rsp),$inp # are we done? 395d9e397b599b13d642138480a28c14db7a136bf0Adam Langley jb .Loop 396d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 397d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# mov 24(%rsp),$len # restore pointer to MD5_CTX 398d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# sub $TX[0]#b,$YY#b # correct $YY 399d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# mov $V[0],0*4($len) # write MD5_CTX 400d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# mov $V[1],1*4($len) 401d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# mov $V[2],2*4($len) 402d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#md5# mov $V[3],3*4($len) 403d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___ 404d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code.=<<___ if ($rc4 && (!$md5 || $D)); 405d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov 32(%rsp),$len # restore original $len 406d9e397b599b13d642138480a28c14db7a136bf0Adam Langley and \$63,$len # remaining bytes 407d9e397b599b13d642138480a28c14db7a136bf0Adam Langley jnz .Loop1 408d9e397b599b13d642138480a28c14db7a136bf0Adam Langley jmp .Ldone 409d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 410d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.align 16 411d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.Loop1: 412d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add $TX[0]#b,$YY#b 413d9e397b599b13d642138480a28c14db7a136bf0Adam Langley movl ($dat,$YY,4),$TY#d 414d9e397b599b13d642138480a28c14db7a136bf0Adam Langley movl $TX[0]#d,($dat,$YY,4) 415d9e397b599b13d642138480a28c14db7a136bf0Adam Langley movl $TY#d,($dat,$XX[0],4) 416d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add $TY#b,$TX[0]#b 417d9e397b599b13d642138480a28c14db7a136bf0Adam Langley inc $XX[0]#b 418d9e397b599b13d642138480a28c14db7a136bf0Adam Langley movl ($dat,$TX[0],4),$TY#d 419d9e397b599b13d642138480a28c14db7a136bf0Adam Langley movl ($dat,$XX[0],4),$TX[0]#d 420d9e397b599b13d642138480a28c14db7a136bf0Adam Langley xorb ($in0),$TY#b 421d9e397b599b13d642138480a28c14db7a136bf0Adam Langley movb $TY#b,($out,$in0) 422d9e397b599b13d642138480a28c14db7a136bf0Adam Langley lea 1($in0),$in0 423d9e397b599b13d642138480a28c14db7a136bf0Adam Langley dec $len 424d9e397b599b13d642138480a28c14db7a136bf0Adam Langley jnz .Loop1 425d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 426d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.Ldone: 427d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___ 428d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code.=<<___; 429d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# sub \$1,$XX[0]#b 430d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# movl $XX[0]#d,-8($dat) 431d9e397b599b13d642138480a28c14db7a136bf0Adam Langley#rc4# movl $YY#d,-4($dat) 432d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 433d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov 40(%rsp),%r15 434d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov 48(%rsp),%r14 435d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov 56(%rsp),%r13 436d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov 64(%rsp),%r12 437d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov 72(%rsp),%rbp 438d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov 80(%rsp),%rbx 439d9e397b599b13d642138480a28c14db7a136bf0Adam Langley lea 88(%rsp),%rsp 440d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.Lepilogue: 441d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.Labort: 442d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ret 443d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.size $func,.-$func 444d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___ 445d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 446d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyif ($rc4 && $D) { # sole purpose of this section is to provide 447d9e397b599b13d642138480a28c14db7a136bf0Adam Langley # option to use the generated module as drop-in 448d9e397b599b13d642138480a28c14db7a136bf0Adam Langley # replacement for rc4-x86_64.pl for debugging 449d9e397b599b13d642138480a28c14db7a136bf0Adam Langley # and testing purposes... 450d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy ($idx,$ido)=("%r8","%r9"); 451d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy ($dat,$len,$inp)=("%rdi","%rsi","%rdx"); 452d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 453d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code.=<<___; 454d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.globl RC4_set_key 455d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.type RC4_set_key,\@function,3 456d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.align 16 457d9e397b599b13d642138480a28c14db7a136bf0Adam LangleyRC4_set_key: 458d9e397b599b13d642138480a28c14db7a136bf0Adam Langley lea 8($dat),$dat 459d9e397b599b13d642138480a28c14db7a136bf0Adam Langley lea ($inp,$len),$inp 460d9e397b599b13d642138480a28c14db7a136bf0Adam Langley neg $len 461d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov $len,%rcx 462d9e397b599b13d642138480a28c14db7a136bf0Adam Langley xor %eax,%eax 463d9e397b599b13d642138480a28c14db7a136bf0Adam Langley xor $ido,$ido 464d9e397b599b13d642138480a28c14db7a136bf0Adam Langley xor %r10,%r10 465d9e397b599b13d642138480a28c14db7a136bf0Adam Langley xor %r11,%r11 466d9e397b599b13d642138480a28c14db7a136bf0Adam Langley jmp .Lw1stloop 467d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 468d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.align 16 469d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.Lw1stloop: 470d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov %eax,($dat,%rax,4) 471d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add \$1,%al 472d9e397b599b13d642138480a28c14db7a136bf0Adam Langley jnc .Lw1stloop 473d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 474d9e397b599b13d642138480a28c14db7a136bf0Adam Langley xor $ido,$ido 475d9e397b599b13d642138480a28c14db7a136bf0Adam Langley xor $idx,$idx 476d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.align 16 477d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.Lw2ndloop: 478d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov ($dat,$ido,4),%r10d 479d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add ($inp,$len,1),$idx#b 480d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add %r10b,$idx#b 481d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add \$1,$len 482d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov ($dat,$idx,4),%r11d 483d9e397b599b13d642138480a28c14db7a136bf0Adam Langley cmovz %rcx,$len 484d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov %r10d,($dat,$idx,4) 485d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov %r11d,($dat,$ido,4) 486d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add \$1,$ido#b 487d9e397b599b13d642138480a28c14db7a136bf0Adam Langley jnc .Lw2ndloop 488d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 489d9e397b599b13d642138480a28c14db7a136bf0Adam Langley xor %eax,%eax 490d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov %eax,-8($dat) 491d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov %eax,-4($dat) 492d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ret 493d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.size RC4_set_key,.-RC4_set_key 494d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 495d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.globl RC4_options 496d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.type RC4_options,\@abi-omnipotent 497d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.align 16 498d9e397b599b13d642138480a28c14db7a136bf0Adam LangleyRC4_options: 499d9e397b599b13d642138480a28c14db7a136bf0Adam Langley lea .Lopts(%rip),%rax 500d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ret 501d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.align 64 502d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.Lopts: 503d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.asciz "rc4(64x,int)" 504d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.align 64 505d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.size RC4_options,.-RC4_options 506d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___ 507d9e397b599b13d642138480a28c14db7a136bf0Adam Langley} 508d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, 509d9e397b599b13d642138480a28c14db7a136bf0Adam Langley# CONTEXT *context,DISPATCHER_CONTEXT *disp) 510d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyif ($win64) { 511d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy $rec="%rcx"; 512d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy $frame="%rdx"; 513d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy $context="%r8"; 514d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy $disp="%r9"; 515d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 516d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code.=<<___; 517d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.extern __imp_RtlVirtualUnwind 518d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.type se_handler,\@abi-omnipotent 519d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.align 16 520d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyse_handler: 521d9e397b599b13d642138480a28c14db7a136bf0Adam Langley push %rsi 522d9e397b599b13d642138480a28c14db7a136bf0Adam Langley push %rdi 523d9e397b599b13d642138480a28c14db7a136bf0Adam Langley push %rbx 524d9e397b599b13d642138480a28c14db7a136bf0Adam Langley push %rbp 525d9e397b599b13d642138480a28c14db7a136bf0Adam Langley push %r12 526d9e397b599b13d642138480a28c14db7a136bf0Adam Langley push %r13 527d9e397b599b13d642138480a28c14db7a136bf0Adam Langley push %r14 528d9e397b599b13d642138480a28c14db7a136bf0Adam Langley push %r15 529d9e397b599b13d642138480a28c14db7a136bf0Adam Langley pushfq 530d9e397b599b13d642138480a28c14db7a136bf0Adam Langley sub \$64,%rsp 531d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 532d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov 120($context),%rax # pull context->Rax 533d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov 248($context),%rbx # pull context->Rip 534d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 535d9e397b599b13d642138480a28c14db7a136bf0Adam Langley lea .Lbody(%rip),%r10 536d9e397b599b13d642138480a28c14db7a136bf0Adam Langley cmp %r10,%rbx # context->Rip<.Lbody 537d9e397b599b13d642138480a28c14db7a136bf0Adam Langley jb .Lin_prologue 538d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 539d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov 152($context),%rax # pull context->Rsp 540d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 541d9e397b599b13d642138480a28c14db7a136bf0Adam Langley lea .Lepilogue(%rip),%r10 542d9e397b599b13d642138480a28c14db7a136bf0Adam Langley cmp %r10,%rbx # context->Rip>=.Lepilogue 543d9e397b599b13d642138480a28c14db7a136bf0Adam Langley jae .Lin_prologue 544d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 545d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov 40(%rax),%r15 546d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov 48(%rax),%r14 547d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov 56(%rax),%r13 548d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov 64(%rax),%r12 549d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov 72(%rax),%rbp 550d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov 80(%rax),%rbx 551d9e397b599b13d642138480a28c14db7a136bf0Adam Langley lea 88(%rax),%rax 552d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 553d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov %rbx,144($context) # restore context->Rbx 554d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov %rbp,160($context) # restore context->Rbp 555d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov %r12,216($context) # restore context->R12 556d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov %r13,224($context) # restore context->R12 557d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov %r14,232($context) # restore context->R14 558d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov %r15,240($context) # restore context->R15 559d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 560d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.Lin_prologue: 561d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov 8(%rax),%rdi 562d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov 16(%rax),%rsi 563d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov %rax,152($context) # restore context->Rsp 564d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov %rsi,168($context) # restore context->Rsi 565d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov %rdi,176($context) # restore context->Rdi 566d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 567d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov 40($disp),%rdi # disp->ContextRecord 568d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov $context,%rsi # context 569d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov \$154,%ecx # sizeof(CONTEXT) 570d9e397b599b13d642138480a28c14db7a136bf0Adam Langley .long 0xa548f3fc # cld; rep movsq 571d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 572d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov $disp,%rsi 573d9e397b599b13d642138480a28c14db7a136bf0Adam Langley xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER 574d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov 8(%rsi),%rdx # arg2, disp->ImageBase 575d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov 0(%rsi),%r8 # arg3, disp->ControlPc 576d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov 16(%rsi),%r9 # arg4, disp->FunctionEntry 577d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov 40(%rsi),%r10 # disp->ContextRecord 578d9e397b599b13d642138480a28c14db7a136bf0Adam Langley lea 56(%rsi),%r11 # &disp->HandlerData 579d9e397b599b13d642138480a28c14db7a136bf0Adam Langley lea 24(%rsi),%r12 # &disp->EstablisherFrame 580d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov %r10,32(%rsp) # arg5 581d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov %r11,40(%rsp) # arg6 582d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov %r12,48(%rsp) # arg7 583d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov %rcx,56(%rsp) # arg8, (NULL) 584d9e397b599b13d642138480a28c14db7a136bf0Adam Langley call *__imp_RtlVirtualUnwind(%rip) 585d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 586d9e397b599b13d642138480a28c14db7a136bf0Adam Langley mov \$1,%eax # ExceptionContinueSearch 587d9e397b599b13d642138480a28c14db7a136bf0Adam Langley add \$64,%rsp 588d9e397b599b13d642138480a28c14db7a136bf0Adam Langley popfq 589d9e397b599b13d642138480a28c14db7a136bf0Adam Langley pop %r15 590d9e397b599b13d642138480a28c14db7a136bf0Adam Langley pop %r14 591d9e397b599b13d642138480a28c14db7a136bf0Adam Langley pop %r13 592d9e397b599b13d642138480a28c14db7a136bf0Adam Langley pop %r12 593d9e397b599b13d642138480a28c14db7a136bf0Adam Langley pop %rbp 594d9e397b599b13d642138480a28c14db7a136bf0Adam Langley pop %rbx 595d9e397b599b13d642138480a28c14db7a136bf0Adam Langley pop %rdi 596d9e397b599b13d642138480a28c14db7a136bf0Adam Langley pop %rsi 597d9e397b599b13d642138480a28c14db7a136bf0Adam Langley ret 598d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.size se_handler,.-se_handler 599d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 600d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.section .pdata 601d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.align 4 602d9e397b599b13d642138480a28c14db7a136bf0Adam Langley .rva .LSEH_begin_$func 603d9e397b599b13d642138480a28c14db7a136bf0Adam Langley .rva .LSEH_end_$func 604d9e397b599b13d642138480a28c14db7a136bf0Adam Langley .rva .LSEH_info_$func 605d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 606d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.section .xdata 607d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.align 8 608d9e397b599b13d642138480a28c14db7a136bf0Adam Langley.LSEH_info_$func: 609d9e397b599b13d642138480a28c14db7a136bf0Adam Langley .byte 9,0,0,0 610d9e397b599b13d642138480a28c14db7a136bf0Adam Langley .rva se_handler 611d9e397b599b13d642138480a28c14db7a136bf0Adam Langley___ 612d9e397b599b13d642138480a28c14db7a136bf0Adam Langley} 613d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 614d9e397b599b13d642138480a28c14db7a136bf0Adam Langleysub reg_part { 615d9e397b599b13d642138480a28c14db7a136bf0Adam Langleymy ($reg,$conv)=@_; 616d9e397b599b13d642138480a28c14db7a136bf0Adam Langley if ($reg =~ /%r[0-9]+/) { $reg .= $conv; } 617d9e397b599b13d642138480a28c14db7a136bf0Adam Langley elsif ($conv eq "b") { $reg =~ s/%[er]([^x]+)x?/%$1l/; } 618d9e397b599b13d642138480a28c14db7a136bf0Adam Langley elsif ($conv eq "w") { $reg =~ s/%[er](.+)/%$1/; } 619d9e397b599b13d642138480a28c14db7a136bf0Adam Langley elsif ($conv eq "d") { $reg =~ s/%[er](.+)/%e$1/; } 620d9e397b599b13d642138480a28c14db7a136bf0Adam Langley return $reg; 621d9e397b599b13d642138480a28c14db7a136bf0Adam Langley} 622d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 623d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code =~ s/(%[a-z0-9]+)#([bwd])/reg_part($1,$2)/gem; 624d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code =~ s/\`([^\`]*)\`/eval $1/gem; 625d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code =~ s/pinsrw\s+\$0,/movd /gm; 626d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 627d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code =~ s/#md5#//gm if ($md5); 628d9e397b599b13d642138480a28c14db7a136bf0Adam Langley$code =~ s/#rc4#//gm if ($rc4); 629d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 630d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyprint $code; 631d9e397b599b13d642138480a28c14db7a136bf0Adam Langley 632d9e397b599b13d642138480a28c14db7a136bf0Adam Langleyclose STDOUT; 633