x86_64-gf2m.pl revision 04ef91b390dfcc6125913e2f2af502d23d7a5112
1f2038fb01417bcf7698b87a5dfaa4a861539618aerik.corry@gmail.com#!/usr/bin/env perl 2a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org# 3a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org# ==================================================================== 4a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 5a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org# project. The module is, however, dual licensed under OpenSSL and 6a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org# CRYPTOGAMS licenses depending on where you obtain it. For further 7a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org# details see http://www.openssl.org/~appro/cryptogams/. 8a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org# ==================================================================== 9a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org# 10a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org# May 2011 11a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org# 12a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org# The module implements bn_GF2m_mul_2x2 polynomial multiplication used 13a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org# in bn_gf2m.c. It's kind of low-hanging mechanical port from C for 14a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org# the time being... Except that it has two code paths: code suitable 15a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org# for any x86_64 CPU and PCLMULQDQ one suitable for Westmere and 16a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org# later. Improvement varies from one benchmark and �-arch to another. 17a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org# Vanilla code path is at most 20% faster than compiler-generated code 18a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org# [not very impressive], while PCLMULQDQ - whole 85%-160% better on 19a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org# 163- and 571-bit ECDH benchmarks on Intel CPUs. Keep in mind that 20a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org# these coefficients are not ones for bn_GF2m_mul_2x2 itself, as not 21a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org# all CPU time is burnt in it... 22a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org 23a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org$flavour = shift; 24a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org$output = shift; 25a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.orgif ($flavour =~ /\./) { $output = $flavour; undef $flavour; } 26a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org 27a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); 28c6c5718277d4047fad1e034396228ce15571b5a4sgjesse@chromium.org 29c6c5718277d4047fad1e034396228ce15571b5a4sgjesse@chromium.org$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 30c6c5718277d4047fad1e034396228ce15571b5a4sgjesse@chromium.org( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or 31c6c5718277d4047fad1e034396228ce15571b5a4sgjesse@chromium.org( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or 32a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.orgdie "can't locate x86_64-xlate.pl"; 33a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org 347979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.orgopen STDOUT,"| \"$^X\" $xlate $flavour $output"; 35a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org 360ad885c06ff6a0d68bc9ad75629f7ddfaa6860b9erikcorry($lo,$hi)=("%rax","%rdx"); $a=$lo; 37a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org($i0,$i1)=("%rsi","%rdi"); 38a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org($t0,$t1)=("%rbx","%rcx"); 39a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org($b,$mask)=("%rbp","%r8"); 40a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org($a1,$a2,$a4,$a8,$a12,$a48)=map("%r$_",(9..15)); 41a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org($R,$Tx)=("%xmm0","%xmm1"); 4231b1277ec3b8cd17acb01c66d85a456159072157kmillikin@chromium.org 4331b1277ec3b8cd17acb01c66d85a456159072157kmillikin@chromium.org$code.=<<___; 44fb144a0716afe7ab8bf245f2391a9e53b3db3c89fschneider@chromium.org.text 45a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org 46a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org.type _mul_1x1,\@abi-omnipotent 47a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org.align 16 4827bf28851c1fa362a3f7c709871c21dcc9c23ce7ricow@chromium.org_mul_1x1: 49a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org sub \$128+8,%rsp 50a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov \$-1,$a1 5127bf28851c1fa362a3f7c709871c21dcc9c23ce7ricow@chromium.org lea ($a,$a),$i0 52a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org shr \$3,$a1 53a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org lea (,$a,4),$i1 54fb144a0716afe7ab8bf245f2391a9e53b3db3c89fschneider@chromium.org and $a,$a1 # a1=a&0x1fffffffffffffff 55fb144a0716afe7ab8bf245f2391a9e53b3db3c89fschneider@chromium.org lea (,$a,8),$a8 56fb144a0716afe7ab8bf245f2391a9e53b3db3c89fschneider@chromium.org sar \$63,$a # broadcast 63rd bit 5727bf28851c1fa362a3f7c709871c21dcc9c23ce7ricow@chromium.org lea ($a1,$a1),$a2 58a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org sar \$63,$i0 # broadcast 62nd bit 59a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org lea (,$a1,4),$a4 60a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org and $b,$a 61a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org sar \$63,$i1 # boardcast 61st bit 62a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov $a,$hi # $a is $lo 6327bf28851c1fa362a3f7c709871c21dcc9c23ce7ricow@chromium.org shl \$63,$lo 64a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org and $b,$i0 65a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org shr \$1,$hi 66a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov $i0,$t1 67a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org shl \$62,$i0 68a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org and $b,$i1 69a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org shr \$2,$t1 709a21ec41a2007f01ba18cf5fa48f7987e40e5109ulan@chromium.org xor $i0,$lo 71a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov $i1,$t0 72a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org shl \$61,$i1 73a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org xor $t1,$hi 74c3b670ff19220959730d7886892bc4beb95d2ebaerik.corry@gmail.com shr \$3,$t0 75c3b670ff19220959730d7886892bc4beb95d2ebaerik.corry@gmail.com xor $i1,$lo 76c3b670ff19220959730d7886892bc4beb95d2ebaerik.corry@gmail.com xor $t0,$hi 77c3b670ff19220959730d7886892bc4beb95d2ebaerik.corry@gmail.com 78c3b670ff19220959730d7886892bc4beb95d2ebaerik.corry@gmail.com mov $a1,$a12 79c3b670ff19220959730d7886892bc4beb95d2ebaerik.corry@gmail.com movq \$0,0(%rsp) # tab[0]=0 80c3b670ff19220959730d7886892bc4beb95d2ebaerik.corry@gmail.com xor $a2,$a12 # a1^a2 81c3b670ff19220959730d7886892bc4beb95d2ebaerik.corry@gmail.com mov $a1,8(%rsp) # tab[1]=a1 827028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org mov $a4,$a48 837028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org mov $a2,16(%rsp) # tab[2]=a2 847028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org xor $a8,$a48 # a4^a8 857028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org mov $a12,24(%rsp) # tab[3]=a1^a2 86a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org 87a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org xor $a4,$a1 88a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov $a4,32(%rsp) # tab[4]=a4 89a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org xor $a4,$a2 90a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov $a1,40(%rsp) # tab[5]=a1^a4 91a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org xor $a4,$a12 92a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov $a2,48(%rsp) # tab[6]=a2^a4 93a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org xor $a48,$a1 # a1^a4^a4^a8=a1^a8 94a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov $a12,56(%rsp) # tab[7]=a1^a2^a4 95160a7b0747492f3f735353d9582521f3314bf4dfdanno@chromium.org xor $a48,$a2 # a2^a4^a4^a8=a1^a8 9683aa54905e559090bea7771b83f188762cfcf082ricow@chromium.org 97a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov $a8,64(%rsp) # tab[8]=a8 987979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org xor $a48,$a12 # a1^a2^a4^a4^a8=a1^a2^a8 99a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov $a1,72(%rsp) # tab[9]=a1^a8 100a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org xor $a4,$a1 # a1^a8^a4 101a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov $a2,80(%rsp) # tab[10]=a2^a8 102a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org xor $a4,$a2 # a2^a8^a4 103a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov $a12,88(%rsp) # tab[11]=a1^a2^a8 10483e168294456ca2f02db421a635f7d5f5d023966kmillikin@chromium.org 10583e168294456ca2f02db421a635f7d5f5d023966kmillikin@chromium.org xor $a4,$a12 # a1^a2^a8^a4 1068f806e8b8f108ca2c8899c5d31861ef1273dcd4akarlklose@chromium.org mov $a48,96(%rsp) # tab[12]=a4^a8 107a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov $mask,$i0 108a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov $a1,104(%rsp) # tab[13]=a1^a4^a8 109a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org and $b,$i0 110a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov $a2,112(%rsp) # tab[14]=a2^a4^a8 111a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org shr \$4,$b 112a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov $a12,120(%rsp) # tab[15]=a1^a2^a4^a8 113a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov $mask,$i1 114a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org and $b,$i1 115a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org shr \$4,$b 116a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org 117a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org movq (%rsp,$i0,8),$R # half of calculations is done in SSE2 118a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov $mask,$i0 119a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org and $b,$i0 120a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org shr \$4,$b 121a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org___ 122a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org for ($n=1;$n<8;$n++) { 123a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org $code.=<<___; 124a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov (%rsp,$i1,8),$t1 125a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov $mask,$i1 126a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov $t1,$t0 127a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org shl \$`8*$n-4`,$t1 128a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org and $b,$i1 129a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org movq (%rsp,$i0,8),$Tx 130a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org shr \$`64-(8*$n-4)`,$t0 131a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org xor $t1,$lo 132a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org pslldq \$$n,$Tx 133a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov $mask,$i0 134a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org shr \$4,$b 135a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org xor $t0,$hi 136a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org and $b,$i0 137a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org shr \$4,$b 138753aee4dcf0868130789b5af7c1eeb6ab2ab24f9verwaest@chromium.org pxor $Tx,$R 139753aee4dcf0868130789b5af7c1eeb6ab2ab24f9verwaest@chromium.org___ 140a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org } 141a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org$code.=<<___; 142a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov (%rsp,$i1,8),$t1 143a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov $t1,$t0 144a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org shl \$`8*$n-4`,$t1 145a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org movq $R,$i0 146a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org shr \$`64-(8*$n-4)`,$t0 147d2be901879306d8ff27e78e37783028d581d46fcricow@chromium.org xor $t1,$lo 148d2be901879306d8ff27e78e37783028d581d46fcricow@chromium.org psrldq \$8,$R 149d2be901879306d8ff27e78e37783028d581d46fcricow@chromium.org xor $t0,$hi 150d2be901879306d8ff27e78e37783028d581d46fcricow@chromium.org movq $R,$i1 1511b3afd1cab9087ca3c4e585d3da77d374d65c082mstarzinger@chromium.org xor $i0,$lo 15240cb878ef373bea9bdf7998829891e4096751dd0danno@chromium.org xor $i1,$hi 15340cb878ef373bea9bdf7998829891e4096751dd0danno@chromium.org 15440cb878ef373bea9bdf7998829891e4096751dd0danno@chromium.org add \$128+8,%rsp 15540cb878ef373bea9bdf7998829891e4096751dd0danno@chromium.org ret 15640cb878ef373bea9bdf7998829891e4096751dd0danno@chromium.org.Lend_mul_1x1: 15740cb878ef373bea9bdf7998829891e4096751dd0danno@chromium.org.size _mul_1x1,.-_mul_1x1 15840cb878ef373bea9bdf7998829891e4096751dd0danno@chromium.org___ 15940cb878ef373bea9bdf7998829891e4096751dd0danno@chromium.org 16040cb878ef373bea9bdf7998829891e4096751dd0danno@chromium.org($rp,$a1,$a0,$b1,$b0) = $win64? ("%rcx","%rdx","%r8", "%r9","%r10") : # Win64 order 16140cb878ef373bea9bdf7998829891e4096751dd0danno@chromium.org ("%rdi","%rsi","%rdx","%rcx","%r8"); # Unix order 1627028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org 1637028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org$code.=<<___; 1647028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org.extern OPENSSL_ia32cap_P 1657028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org.globl bn_GF2m_mul_2x2 1667028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org.type bn_GF2m_mul_2x2,\@abi-omnipotent 1677028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org.align 16 1687028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.orgbn_GF2m_mul_2x2: 1697028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org mov OPENSSL_ia32cap_P(%rip),%rax 1707028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org bt \$33,%rax 1717028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org jnc .Lvanilla_mul_2x2 1727028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org 1737028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org movq $a1,%xmm0 1747028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org movq $b1,%xmm1 1757028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org movq $a0,%xmm2 1767028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org___ 1777028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org$code.=<<___ if ($win64); 1787028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org movq 40(%rsp),%xmm3 1797028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org___ 1807028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org$code.=<<___ if (!$win64); 1817028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org movq $b0,%xmm3 1827028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org___ 1837028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org$code.=<<___; 1847028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org movdqa %xmm0,%xmm4 1857028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org movdqa %xmm1,%xmm5 1867028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org pclmulqdq \$0,%xmm1,%xmm0 # a1�b1 1877028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org pxor %xmm2,%xmm4 188a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org pxor %xmm3,%xmm5 189a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org pclmulqdq \$0,%xmm3,%xmm2 # a0�b0 190a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org pclmulqdq \$0,%xmm5,%xmm4 # (a0+a1)�(b0+b1) 191a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org xorps %xmm0,%xmm4 192a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org xorps %xmm2,%xmm4 # (a0+a1)�(b0+b1)-a0�b0-a1�b1 1937028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org movdqa %xmm4,%xmm5 1947028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org pslldq \$8,%xmm4 1957028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org psrldq \$8,%xmm5 1967028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org pxor %xmm4,%xmm2 1977028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org pxor %xmm5,%xmm0 198a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org movdqu %xmm2,0($rp) 199160a7b0747492f3f735353d9582521f3314bf4dfdanno@chromium.org movdqu %xmm0,16($rp) 2007028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org ret 2017028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org 2027028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org.align 16 2037028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org.Lvanilla_mul_2x2: 2047028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org lea -8*17(%rsp),%rsp 2057028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org___ 2067028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org$code.=<<___ if ($win64); 2077028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org mov `8*17+40`(%rsp),$b0 208a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov %rdi,8*15(%rsp) 209a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov %rsi,8*16(%rsp) 210a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org___ 211a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org$code.=<<___; 212a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov %r14,8*10(%rsp) 213a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov %r13,8*11(%rsp) 214a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov %r12,8*12(%rsp) 215a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov %rbp,8*13(%rsp) 216a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov %rbx,8*14(%rsp) 2177028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org.Lbody_mul_2x2: 218c6c5718277d4047fad1e034396228ce15571b5a4sgjesse@chromium.org mov $rp,32(%rsp) # save the arguments 219c6c5718277d4047fad1e034396228ce15571b5a4sgjesse@chromium.org mov $a1,40(%rsp) 220c6c5718277d4047fad1e034396228ce15571b5a4sgjesse@chromium.org mov $a0,48(%rsp) 221c6c5718277d4047fad1e034396228ce15571b5a4sgjesse@chromium.org mov $b1,56(%rsp) 222c6c5718277d4047fad1e034396228ce15571b5a4sgjesse@chromium.org mov $b0,64(%rsp) 223c6c5718277d4047fad1e034396228ce15571b5a4sgjesse@chromium.org 224c6c5718277d4047fad1e034396228ce15571b5a4sgjesse@chromium.org mov \$0xf,$mask 225c6c5718277d4047fad1e034396228ce15571b5a4sgjesse@chromium.org mov $a1,$a 226c6c5718277d4047fad1e034396228ce15571b5a4sgjesse@chromium.org mov $b1,$b 2277028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org call _mul_1x1 # a1�b1 2287028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org mov $lo,16(%rsp) 2297028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org mov $hi,24(%rsp) 2307028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org 2317028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org mov 48(%rsp),$a 2327028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org mov 64(%rsp),$b 2337028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org call _mul_1x1 # a0�b0 2347028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org mov $lo,0(%rsp) 2357028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org mov $hi,8(%rsp) 2367028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org 2377028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org mov 40(%rsp),$a 2387028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org mov 56(%rsp),$b 239a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org xor 48(%rsp),$a 240a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org xor 64(%rsp),$b 241a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org call _mul_1x1 # (a0+a1)�(b0+b1) 2423a5fd78f0ca6c2827bb05f69a373d152a9ce6ff3fschneider@chromium.org___ 2433a5fd78f0ca6c2827bb05f69a373d152a9ce6ff3fschneider@chromium.org @r=("%rbx","%rcx","%rdi","%rsi"); 2443a5fd78f0ca6c2827bb05f69a373d152a9ce6ff3fschneider@chromium.org$code.=<<___; 2453a5fd78f0ca6c2827bb05f69a373d152a9ce6ff3fschneider@chromium.org mov 0(%rsp),@r[0] 2463a5fd78f0ca6c2827bb05f69a373d152a9ce6ff3fschneider@chromium.org mov 8(%rsp),@r[1] 2473a5fd78f0ca6c2827bb05f69a373d152a9ce6ff3fschneider@chromium.org mov 16(%rsp),@r[2] 2483a5fd78f0ca6c2827bb05f69a373d152a9ce6ff3fschneider@chromium.org mov 24(%rsp),@r[3] 2493a5fd78f0ca6c2827bb05f69a373d152a9ce6ff3fschneider@chromium.org mov 32(%rsp),%rbp 2503a5fd78f0ca6c2827bb05f69a373d152a9ce6ff3fschneider@chromium.org 2513a5fd78f0ca6c2827bb05f69a373d152a9ce6ff3fschneider@chromium.org xor $hi,$lo 2526d786c9805481bd13ecb29c3155540f2f32950e1svenpanne@chromium.org xor @r[1],$hi 2533a5fd78f0ca6c2827bb05f69a373d152a9ce6ff3fschneider@chromium.org xor @r[0],$lo 25427bf28851c1fa362a3f7c709871c21dcc9c23ce7ricow@chromium.org mov @r[0],0(%rbp) 2553a5fd78f0ca6c2827bb05f69a373d152a9ce6ff3fschneider@chromium.org xor @r[2],$hi 2563a5fd78f0ca6c2827bb05f69a373d152a9ce6ff3fschneider@chromium.org mov @r[3],24(%rbp) 2573a5fd78f0ca6c2827bb05f69a373d152a9ce6ff3fschneider@chromium.org xor @r[3],$lo 2583a5fd78f0ca6c2827bb05f69a373d152a9ce6ff3fschneider@chromium.org xor @r[3],$hi 2593a5fd78f0ca6c2827bb05f69a373d152a9ce6ff3fschneider@chromium.org xor $hi,$lo 2603a5fd78f0ca6c2827bb05f69a373d152a9ce6ff3fschneider@chromium.org mov $hi,16(%rbp) 2613a5fd78f0ca6c2827bb05f69a373d152a9ce6ff3fschneider@chromium.org mov $lo,8(%rbp) 262486075aa3f2e6d0031ff182961b9eab00e1081d8jkummerow@chromium.org 263486075aa3f2e6d0031ff182961b9eab00e1081d8jkummerow@chromium.org mov 8*10(%rsp),%r14 2643a5fd78f0ca6c2827bb05f69a373d152a9ce6ff3fschneider@chromium.org mov 8*11(%rsp),%r13 2653a5fd78f0ca6c2827bb05f69a373d152a9ce6ff3fschneider@chromium.org mov 8*12(%rsp),%r12 2663a5fd78f0ca6c2827bb05f69a373d152a9ce6ff3fschneider@chromium.org mov 8*13(%rsp),%rbp 2673a5fd78f0ca6c2827bb05f69a373d152a9ce6ff3fschneider@chromium.org mov 8*14(%rsp),%rbx 2683a5fd78f0ca6c2827bb05f69a373d152a9ce6ff3fschneider@chromium.org___ 269486075aa3f2e6d0031ff182961b9eab00e1081d8jkummerow@chromium.org$code.=<<___ if ($win64); 2703a5fd78f0ca6c2827bb05f69a373d152a9ce6ff3fschneider@chromium.org mov 8*15(%rsp),%rdi 271c3b670ff19220959730d7886892bc4beb95d2ebaerik.corry@gmail.com mov 8*16(%rsp),%rsi 272c3b670ff19220959730d7886892bc4beb95d2ebaerik.corry@gmail.com___ 273c3b670ff19220959730d7886892bc4beb95d2ebaerik.corry@gmail.com$code.=<<___; 274c3b670ff19220959730d7886892bc4beb95d2ebaerik.corry@gmail.com lea 8*17(%rsp),%rsp 275c3b670ff19220959730d7886892bc4beb95d2ebaerik.corry@gmail.com ret 276c3b670ff19220959730d7886892bc4beb95d2ebaerik.corry@gmail.com.Lend_mul_2x2: 2773a5fd78f0ca6c2827bb05f69a373d152a9ce6ff3fschneider@chromium.org.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2 2783a5fd78f0ca6c2827bb05f69a373d152a9ce6ff3fschneider@chromium.org.asciz "GF(2^m) Multiplication for x86_64, CRYPTOGAMS by <appro\@openssl.org>" 2793a5fd78f0ca6c2827bb05f69a373d152a9ce6ff3fschneider@chromium.org.align 16 2803a5fd78f0ca6c2827bb05f69a373d152a9ce6ff3fschneider@chromium.org___ 2813a5fd78f0ca6c2827bb05f69a373d152a9ce6ff3fschneider@chromium.org 282a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, 283a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org# CONTEXT *context,DISPATCHER_CONTEXT *disp) 284496c03a64f12710e837204e261ef155601247895sgjesse@chromium.orgif ($win64) { 285496c03a64f12710e837204e261ef155601247895sgjesse@chromium.org$rec="%rcx"; 286a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org$frame="%rdx"; 287a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org$context="%r8"; 288a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org$disp="%r9"; 289a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org 290a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org$code.=<<___; 291a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org.extern __imp_RtlVirtualUnwind 292a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org 293a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org.type se_handler,\@abi-omnipotent 294a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org.align 16 295a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.orgse_handler: 296a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org push %rsi 297a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org push %rdi 298a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org push %rbx 299a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org push %rbp 300a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org push %r12 301a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org push %r13 302a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org push %r14 303a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org push %r15 304a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org pushfq 305a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org sub \$64,%rsp 306a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org 307a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov 152($context),%rax # pull context->Rsp 308a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov 248($context),%rbx # pull context->Rip 30927bf28851c1fa362a3f7c709871c21dcc9c23ce7ricow@chromium.org 310a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org lea .Lbody_mul_2x2(%rip),%r10 311a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org cmp %r10,%rbx # context->Rip<"prologue" label 312a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org jb .Lin_prologue 313a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org 314a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov 8*10(%rax),%r14 # mimic epilogue 315a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov 8*11(%rax),%r13 316ddda9e81d3175130f2029c0e1205d265a00c32edjkummerow@chromium.org mov 8*12(%rax),%r12 317ddda9e81d3175130f2029c0e1205d265a00c32edjkummerow@chromium.org mov 8*13(%rax),%rbp 318ddda9e81d3175130f2029c0e1205d265a00c32edjkummerow@chromium.org mov 8*14(%rax),%rbx 319ddda9e81d3175130f2029c0e1205d265a00c32edjkummerow@chromium.org mov 8*15(%rax),%rdi 320c3b670ff19220959730d7886892bc4beb95d2ebaerik.corry@gmail.com mov 8*16(%rax),%rsi 321c3b670ff19220959730d7886892bc4beb95d2ebaerik.corry@gmail.com 322c3b670ff19220959730d7886892bc4beb95d2ebaerik.corry@gmail.com mov %rbx,144($context) # restore context->Rbx 323ddda9e81d3175130f2029c0e1205d265a00c32edjkummerow@chromium.org mov %rbp,160($context) # restore context->Rbp 324ddda9e81d3175130f2029c0e1205d265a00c32edjkummerow@chromium.org mov %rsi,168($context) # restore context->Rsi 325ddda9e81d3175130f2029c0e1205d265a00c32edjkummerow@chromium.org mov %rdi,176($context) # restore context->Rdi 326a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov %r12,216($context) # restore context->R12 327a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov %r13,224($context) # restore context->R13 328a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov %r14,232($context) # restore context->R14 329a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org 330a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org.Lin_prologue: 331a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org lea 8*17(%rax),%rax 332a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov %rax,152($context) # restore context->Rsp 333a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org 334a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov 40($disp),%rdi # disp->ContextRecord 335a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov $context,%rsi # context 336a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov \$154,%ecx # sizeof(CONTEXT) 337160a7b0747492f3f735353d9582521f3314bf4dfdanno@chromium.org .long 0xa548f3fc # cld; rep movsq 338a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org 339a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov $disp,%rsi 340a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER 341a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov 8(%rsi),%rdx # arg2, disp->ImageBase 342a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov 0(%rsi),%r8 # arg3, disp->ControlPc 343a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov 16(%rsi),%r9 # arg4, disp->FunctionEntry 344a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov 40(%rsi),%r10 # disp->ContextRecord 345a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org lea 56(%rsi),%r11 # &disp->HandlerData 346a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org lea 24(%rsi),%r12 # &disp->EstablisherFrame 347a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov %r10,32(%rsp) # arg5 348a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov %r11,40(%rsp) # arg6 349a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov %r12,48(%rsp) # arg7 350a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov %rcx,56(%rsp) # arg8, (NULL) 351a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org call *__imp_RtlVirtualUnwind(%rip) 352a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org 353a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org mov \$1,%eax # ExceptionContinueSearch 354a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org add \$64,%rsp 355a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org popfq 356a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org pop %r15 357a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org pop %r14 358a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org pop %r13 359a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org pop %r12 360a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org pop %rbp 361a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org pop %rbx 362a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org pop %rdi 363a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org pop %rsi 364a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org ret 365657d53b99cb4d261f8245bcb4248c39eb0a2b10frossberg@chromium.org.size se_handler,.-se_handler 366a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org 367657d53b99cb4d261f8245bcb4248c39eb0a2b10frossberg@chromium.org.section .pdata 368657d53b99cb4d261f8245bcb4248c39eb0a2b10frossberg@chromium.org.align 4 369a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org .rva _mul_1x1 370a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org .rva .Lend_mul_1x1 371a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org .rva .LSEH_info_1x1 37264e3a4be4a99f31920128de34573c8ac9038de42ricow@chromium.org 373657d53b99cb4d261f8245bcb4248c39eb0a2b10frossberg@chromium.org .rva .Lvanilla_mul_2x2 37464e3a4be4a99f31920128de34573c8ac9038de42ricow@chromium.org .rva .Lend_mul_2x2 375657d53b99cb4d261f8245bcb4248c39eb0a2b10frossberg@chromium.org .rva .LSEH_info_2x2 37664e3a4be4a99f31920128de34573c8ac9038de42ricow@chromium.org.section .xdata 37764e3a4be4a99f31920128de34573c8ac9038de42ricow@chromium.org.align 8 37864e3a4be4a99f31920128de34573c8ac9038de42ricow@chromium.org.LSEH_info_1x1: 379394dbcf9009cf5203b6d85e8b515fcff072040f3erik.corry@gmail.com .byte 0x01,0x07,0x02,0x00 380657d53b99cb4d261f8245bcb4248c39eb0a2b10frossberg@chromium.org .byte 0x07,0x01,0x11,0x00 # sub rsp,128+8 381657d53b99cb4d261f8245bcb4248c39eb0a2b10frossberg@chromium.org.LSEH_info_2x2: 382657d53b99cb4d261f8245bcb4248c39eb0a2b10frossberg@chromium.org .byte 9,0,0,0 383394dbcf9009cf5203b6d85e8b515fcff072040f3erik.corry@gmail.com .rva se_handler 384394dbcf9009cf5203b6d85e8b515fcff072040f3erik.corry@gmail.com___ 385394dbcf9009cf5203b6d85e8b515fcff072040f3erik.corry@gmail.com} 386bf0c820d028452571c8c744ddd212c32c6d6a996danno@chromium.org 387bf0c820d028452571c8c744ddd212c32c6d6a996danno@chromium.org$code =~ s/\`([^\`]*)\`/eval($1)/gem; 388a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.orgprint $code; 389a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.orgclose STDOUT; 390a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org