12c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#!/usr/bin/env perl 22c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 32c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# ==================================================================== 42c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL 52c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# project. The module is, however, dual licensed under OpenSSL and 62c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# CRYPTOGAMS licenses depending on where you obtain it. For further 72c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# details see http://www.openssl.org/~appro/cryptogams/. 82c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# ==================================================================== 92c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# 102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# This module implements support for Intel AES-NI extension. In 112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# OpenSSL context it's used with Intel engine, but can also be used as 122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# drop-in replacement for crypto/aes/asm/aes-586.pl [see below for 132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# details]. 142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# 152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# Performance. 162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# 172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# To start with see corresponding paragraph in aesni-x86_64.pl... 182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# Instead of filling table similar to one found there I've chosen to 192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# summarize *comparison* results for raw ECB, CTR and CBC benchmarks. 202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# The simplified table below represents 32-bit performance relative 212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# to 64-bit one in every given point. Ratios vary for different 222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# encryption modes, therefore interval values. 232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# 242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# 16-byte 64-byte 256-byte 1-KB 8-KB 252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# 53-67% 67-84% 91-94% 95-98% 97-99.5% 262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# 272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# Lower ratios for smaller block sizes are perfectly understandable, 282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# because function call overhead is higher in 32-bit mode. Largest 292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# 8-KB block performance is virtually same: 32-bit code is less than 302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# 1% slower for ECB, CBC and CCM, and ~3% slower otherwise. 312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# January 2011 332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# 342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# See aesni-x86_64.pl for details. Unlike x86_64 version this module 352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# interleaves at most 6 aes[enc|dec] instructions, because there are 362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# not enough registers for 8x interleave [which should be optimal for 372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# Sandy Bridge]. Actually, performance results for 6x interleave 382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# factor presented in aesni-x86_64.pl (except for CTR) are for this 392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# module. 402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# April 2011 422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# 432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# Add aesni_xts_[en|de]crypt. Westmere spends 1.50 cycles processing 442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# one byte out of 8KB with 128-bit key, Sandy Bridge - 1.09. 452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org$PREFIX="aesni"; # if $PREFIX is set to "AES", the script 472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org # generates drop-in replacement for 482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org # crypto/aes/asm/aes-586.pl:-) 492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org$inline=1; # inline _aesni_[en|de]crypt 502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgpush(@INC,"${dir}","${dir}../../perlasm"); 532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgrequire "x86asm.pl"; 542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&asm_init($ARGV[0],$0); 562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgif ($PREFIX eq "aesni") { $movekey=*movups; } 582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgelse { $movekey=*movups; } 592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org$len="eax"; 612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org$rounds="ecx"; 622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org$key="edx"; 632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org$inp="esi"; 642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org$out="edi"; 652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org$rounds_="ebx"; # backup copy for $rounds 662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org$key_="ebp"; # backup copy for $key 672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org$rndkey0="xmm0"; 692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org$rndkey1="xmm1"; 702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org$inout0="xmm2"; 712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org$inout1="xmm3"; 722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org$inout2="xmm4"; 732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org$inout3="xmm5"; $in1="xmm5"; 742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org$inout4="xmm6"; $in0="xmm6"; 752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org$inout5="xmm7"; $ivec="xmm7"; 762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# AESNI extenstion 782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgsub aeskeygenassist 792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ my($dst,$src,$imm)=@_; 802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/) 812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org { &data_byte(0x66,0x0f,0x3a,0xdf,0xc0|($1<<3)|$2,$imm); } 822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org} 832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgsub aescommon 842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ my($opcodelet,$dst,$src)=@_; 852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/) 862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org { &data_byte(0x66,0x0f,0x38,$opcodelet,0xc0|($1<<3)|$2);} 872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org} 882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgsub aesimc { aescommon(0xdb,@_); } 892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgsub aesenc { aescommon(0xdc,@_); } 902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgsub aesenclast { aescommon(0xdd,@_); } 912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgsub aesdec { aescommon(0xde,@_); } 922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgsub aesdeclast { aescommon(0xdf,@_); } 932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# Inline version of internal aesni_[en|de]crypt1 952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ my $sn; 962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgsub aesni_inline_generate1 972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ my ($p,$inout,$ivec)=@_; $inout=$inout0 if (!defined($inout)); 982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org $sn++; 992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 1002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey0,&QWP(0,$key)); 1012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey1,&QWP(16,$key)); 1022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($ivec,$rndkey0) if (defined($ivec)); 1032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($key,&DWP(32,$key)); 1042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout,$ivec) if (defined($ivec)); 1052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout,$rndkey0) if (!defined($ivec)); 1062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &set_label("${p}1_loop_$sn"); 1072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout,$rndkey1)"; 1082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &dec ($rounds); 1092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey1,&QWP(0,$key)); 1102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($key,&DWP(16,$key)); 1112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jnz (&label("${p}1_loop_$sn")); 1122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p}last ($inout,$rndkey1)"; 1132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org}} 1142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 1152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgsub aesni_generate1 # fully unrolled loop 1162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ my ($p,$inout)=@_; $inout=$inout0 if (!defined($inout)); 1172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 1182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &function_begin_B("_aesni_${p}rypt1"); 1192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($rndkey0,&QWP(0,$key)); 1202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey1,&QWP(0x10,$key)); 1212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout,$rndkey0); 1222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey0,&QWP(0x20,$key)); 1232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($key,&DWP(0x30,$key)); 1242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &cmp ($rounds,11); 1252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jb (&label("${p}128")); 1262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($key,&DWP(0x20,$key)); 1272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &je (&label("${p}192")); 1282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($key,&DWP(0x20,$key)); 1292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout,$rndkey1)"; 1302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey1,&QWP(-0x40,$key)); 1312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout,$rndkey0)"; 1322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey0,&QWP(-0x30,$key)); 1332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &set_label("${p}192"); 1342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout,$rndkey1)"; 1352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey1,&QWP(-0x20,$key)); 1362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout,$rndkey0)"; 1372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey0,&QWP(-0x10,$key)); 1382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &set_label("${p}128"); 1392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout,$rndkey1)"; 1402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey1,&QWP(0,$key)); 1412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout,$rndkey0)"; 1422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey0,&QWP(0x10,$key)); 1432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout,$rndkey1)"; 1442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey1,&QWP(0x20,$key)); 1452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout,$rndkey0)"; 1462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey0,&QWP(0x30,$key)); 1472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout,$rndkey1)"; 1482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey1,&QWP(0x40,$key)); 1492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout,$rndkey0)"; 1502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey0,&QWP(0x50,$key)); 1512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout,$rndkey1)"; 1522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey1,&QWP(0x60,$key)); 1532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout,$rndkey0)"; 1542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey0,&QWP(0x70,$key)); 1552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout,$rndkey1)"; 1562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p}last ($inout,$rndkey0)"; 1572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &ret(); 1582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &function_end_B("_aesni_${p}rypt1"); 1592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org} 1602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 1612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# void $PREFIX_encrypt (const void *inp,void *out,const AES_KEY *key); 1622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&aesni_generate1("enc") if (!$inline); 1632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_begin_B("${PREFIX}_encrypt"); 1642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ("eax",&wparam(0)); 1652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key,&wparam(2)); 1662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout0,&QWP(0,"eax")); 1672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds,&DWP(240,$key)); 1682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ("eax",&wparam(1)); 1692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org if ($inline) 1702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org { &aesni_inline_generate1("enc"); } 1712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org else 1722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org { &call ("_aesni_encrypt1"); } 1732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,"eax"),$inout0); 1742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &ret (); 1752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_end_B("${PREFIX}_encrypt"); 1762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 1772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# void $PREFIX_decrypt (const void *inp,void *out,const AES_KEY *key); 1782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&aesni_generate1("dec") if(!$inline); 1792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_begin_B("${PREFIX}_decrypt"); 1802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ("eax",&wparam(0)); 1812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key,&wparam(2)); 1822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout0,&QWP(0,"eax")); 1832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds,&DWP(240,$key)); 1842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ("eax",&wparam(1)); 1852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org if ($inline) 1862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org { &aesni_inline_generate1("dec"); } 1872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org else 1882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org { &call ("_aesni_decrypt1"); } 1892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,"eax"),$inout0); 1902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &ret (); 1912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_end_B("${PREFIX}_decrypt"); 1922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 1932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# _aesni_[en|de]cryptN are private interfaces, N denotes interleave 1942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# factor. Why 3x subroutine were originally used in loops? Even though 1952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# aes[enc|dec] latency was originally 6, it could be scheduled only 1962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# every *2nd* cycle. Thus 3x interleave was the one providing optimal 1972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# utilization, i.e. when subroutine's throughput is virtually same as 1982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# of non-interleaved subroutine [for number of input blocks up to 3]. 1992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# This is why it makes no sense to implement 2x subroutine. 2002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# aes[enc|dec] latency in next processor generation is 8, but the 2012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# instructions can be scheduled every cycle. Optimal interleave for 2022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# new processor is therefore 8x, but it's unfeasible to accommodate it 2032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# in XMM registers addreassable in 32-bit mode and therefore 6x is 2042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# used instead... 2052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 2062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgsub aesni_generate3 2072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ my $p=shift; 2082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 2092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &function_begin_B("_aesni_${p}rypt3"); 2102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey0,&QWP(0,$key)); 2112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &shr ($rounds,1); 2122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey1,&QWP(16,$key)); 2132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($key,&DWP(32,$key)); 2142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,$rndkey0); 2152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout1,$rndkey0); 2162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout2,$rndkey0); 2172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey0,&QWP(0,$key)); 2182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 2192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &set_label("${p}3_loop"); 2202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout0,$rndkey1)"; 2212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout1,$rndkey1)"; 2222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &dec ($rounds); 2232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout2,$rndkey1)"; 2242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey1,&QWP(16,$key)); 2252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout0,$rndkey0)"; 2262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout1,$rndkey0)"; 2272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($key,&DWP(32,$key)); 2282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout2,$rndkey0)"; 2292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey0,&QWP(0,$key)); 2302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jnz (&label("${p}3_loop")); 2312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout0,$rndkey1)"; 2322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout1,$rndkey1)"; 2332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout2,$rndkey1)"; 2342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p}last ($inout0,$rndkey0)"; 2352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p}last ($inout1,$rndkey0)"; 2362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p}last ($inout2,$rndkey0)"; 2372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &ret(); 2382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &function_end_B("_aesni_${p}rypt3"); 2392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org} 2402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 2412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# 4x interleave is implemented to improve small block performance, 2422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# most notably [and naturally] 4 block by ~30%. One can argue that one 2432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# should have implemented 5x as well, but improvement would be <20%, 2442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# so it's not worth it... 2452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgsub aesni_generate4 2462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ my $p=shift; 2472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 2482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &function_begin_B("_aesni_${p}rypt4"); 2492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey0,&QWP(0,$key)); 2502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey1,&QWP(16,$key)); 2512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &shr ($rounds,1); 2522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($key,&DWP(32,$key)); 2532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,$rndkey0); 2542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout1,$rndkey0); 2552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout2,$rndkey0); 2562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout3,$rndkey0); 2572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey0,&QWP(0,$key)); 2582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 2592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &set_label("${p}4_loop"); 2602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout0,$rndkey1)"; 2612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout1,$rndkey1)"; 2622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &dec ($rounds); 2632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout2,$rndkey1)"; 2642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout3,$rndkey1)"; 2652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey1,&QWP(16,$key)); 2662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout0,$rndkey0)"; 2672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout1,$rndkey0)"; 2682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($key,&DWP(32,$key)); 2692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout2,$rndkey0)"; 2702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout3,$rndkey0)"; 2712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey0,&QWP(0,$key)); 2722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jnz (&label("${p}4_loop")); 2732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 2742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout0,$rndkey1)"; 2752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout1,$rndkey1)"; 2762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout2,$rndkey1)"; 2772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout3,$rndkey1)"; 2782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p}last ($inout0,$rndkey0)"; 2792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p}last ($inout1,$rndkey0)"; 2802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p}last ($inout2,$rndkey0)"; 2812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p}last ($inout3,$rndkey0)"; 2822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &ret(); 2832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &function_end_B("_aesni_${p}rypt4"); 2842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org} 2852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 2862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgsub aesni_generate6 2872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ my $p=shift; 2882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 2892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &function_begin_B("_aesni_${p}rypt6"); 2902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &static_label("_aesni_${p}rypt6_enter"); 2912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey0,&QWP(0,$key)); 2922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &shr ($rounds,1); 2932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey1,&QWP(16,$key)); 2942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($key,&DWP(32,$key)); 2952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,$rndkey0); 2962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout1,$rndkey0); # pxor does better here 2972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout0,$rndkey1)"; 2982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout2,$rndkey0); 2992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout1,$rndkey1)"; 3002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout3,$rndkey0); 3012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &dec ($rounds); 3022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout2,$rndkey1)"; 3032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout4,$rndkey0); 3042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout3,$rndkey1)"; 3052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout5,$rndkey0); 3062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout4,$rndkey1)"; 3072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey0,&QWP(0,$key)); 3082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout5,$rndkey1)"; 3092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("_aesni_${p}rypt6_enter")); 3102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 3112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &set_label("${p}6_loop",16); 3122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout0,$rndkey1)"; 3132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout1,$rndkey1)"; 3142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &dec ($rounds); 3152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout2,$rndkey1)"; 3162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout3,$rndkey1)"; 3172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout4,$rndkey1)"; 3182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout5,$rndkey1)"; 3192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &set_label("_aesni_${p}rypt6_enter",16); 3202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey1,&QWP(16,$key)); 3212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout0,$rndkey0)"; 3222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout1,$rndkey0)"; 3232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($key,&DWP(32,$key)); 3242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout2,$rndkey0)"; 3252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout3,$rndkey0)"; 3262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout4,$rndkey0)"; 3272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout5,$rndkey0)"; 3282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey0,&QWP(0,$key)); 3292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jnz (&label("${p}6_loop")); 3302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 3312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout0,$rndkey1)"; 3322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout1,$rndkey1)"; 3332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout2,$rndkey1)"; 3342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout3,$rndkey1)"; 3352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout4,$rndkey1)"; 3362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p} ($inout5,$rndkey1)"; 3372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p}last ($inout0,$rndkey0)"; 3382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p}last ($inout1,$rndkey0)"; 3392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p}last ($inout2,$rndkey0)"; 3402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p}last ($inout3,$rndkey0)"; 3412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p}last ($inout4,$rndkey0)"; 3422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org eval"&aes${p}last ($inout5,$rndkey0)"; 3432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &ret(); 3442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &function_end_B("_aesni_${p}rypt6"); 3452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org} 3462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&aesni_generate3("enc") if ($PREFIX eq "aesni"); 3472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&aesni_generate3("dec"); 3482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&aesni_generate4("enc") if ($PREFIX eq "aesni"); 3492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&aesni_generate4("dec"); 3502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&aesni_generate6("enc") if ($PREFIX eq "aesni"); 3512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&aesni_generate6("dec"); 3522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 3532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgif ($PREFIX eq "aesni") { 3542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org###################################################################### 3552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# void aesni_ecb_encrypt (const void *in, void *out, 3562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# size_t length, const AES_KEY *key, 3572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# int enc); 3582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_begin("aesni_ecb_encrypt"); 3592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($inp,&wparam(0)); 3602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($out,&wparam(1)); 3612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($len,&wparam(2)); 3622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key,&wparam(3)); 3632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds_,&wparam(4)); 3642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &and ($len,-16); 3652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jz (&label("ecb_ret")); 3662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds,&DWP(240,$key)); 3672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &test ($rounds_,$rounds_); 3682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jz (&label("ecb_decrypt")); 3692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 3702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key_,$key); # backup $key 3712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds_,$rounds); # backup $rounds 3722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &cmp ($len,0x60); 3732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jb (&label("ecb_enc_tail")); 3742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 3752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout0,&QWP(0,$inp)); 3762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout1,&QWP(0x10,$inp)); 3772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout2,&QWP(0x20,$inp)); 3782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout3,&QWP(0x30,$inp)); 3792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout4,&QWP(0x40,$inp)); 3802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout5,&QWP(0x50,$inp)); 3812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($inp,&DWP(0x60,$inp)); 3822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &sub ($len,0x60); 3832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("ecb_enc_loop6_enter")); 3842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 3852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ecb_enc_loop6",16); 3862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,$out),$inout0); 3872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout0,&QWP(0,$inp)); 3882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x10,$out),$inout1); 3892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout1,&QWP(0x10,$inp)); 3902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x20,$out),$inout2); 3912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout2,&QWP(0x20,$inp)); 3922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x30,$out),$inout3); 3932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout3,&QWP(0x30,$inp)); 3942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x40,$out),$inout4); 3952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout4,&QWP(0x40,$inp)); 3962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x50,$out),$inout5); 3972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($out,&DWP(0x60,$out)); 3982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout5,&QWP(0x50,$inp)); 3992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($inp,&DWP(0x60,$inp)); 4002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ecb_enc_loop6_enter"); 4012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 4022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call ("_aesni_encrypt6"); 4032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 4042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key,$key_); # restore $key 4052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds,$rounds_); # restore $rounds 4062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &sub ($len,0x60); 4072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jnc (&label("ecb_enc_loop6")); 4082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 4092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,$out),$inout0); 4102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x10,$out),$inout1); 4112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x20,$out),$inout2); 4122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x30,$out),$inout3); 4132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x40,$out),$inout4); 4142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x50,$out),$inout5); 4152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($out,&DWP(0x60,$out)); 4162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &add ($len,0x60); 4172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jz (&label("ecb_ret")); 4182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 4192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ecb_enc_tail"); 4202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout0,&QWP(0,$inp)); 4212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &cmp ($len,0x20); 4222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jb (&label("ecb_enc_one")); 4232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout1,&QWP(0x10,$inp)); 4242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &je (&label("ecb_enc_two")); 4252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout2,&QWP(0x20,$inp)); 4262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &cmp ($len,0x40); 4272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jb (&label("ecb_enc_three")); 4282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout3,&QWP(0x30,$inp)); 4292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &je (&label("ecb_enc_four")); 4302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout4,&QWP(0x40,$inp)); 4312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout5,$inout5); 4322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call ("_aesni_encrypt6"); 4332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,$out),$inout0); 4342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x10,$out),$inout1); 4352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x20,$out),$inout2); 4362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x30,$out),$inout3); 4372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x40,$out),$inout4); 4382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org jmp (&label("ecb_ret")); 4392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 4402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ecb_enc_one",16); 4412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org if ($inline) 4422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org { &aesni_inline_generate1("enc"); } 4432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org else 4442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org { &call ("_aesni_encrypt1"); } 4452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,$out),$inout0); 4462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("ecb_ret")); 4472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 4482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ecb_enc_two",16); 4492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout2,$inout2); 4502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call ("_aesni_encrypt3"); 4512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,$out),$inout0); 4522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x10,$out),$inout1); 4532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("ecb_ret")); 4542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 4552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ecb_enc_three",16); 4562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call ("_aesni_encrypt3"); 4572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,$out),$inout0); 4582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x10,$out),$inout1); 4592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x20,$out),$inout2); 4602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("ecb_ret")); 4612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 4622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ecb_enc_four",16); 4632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call ("_aesni_encrypt4"); 4642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,$out),$inout0); 4652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x10,$out),$inout1); 4662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x20,$out),$inout2); 4672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x30,$out),$inout3); 4682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("ecb_ret")); 4692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org###################################################################### 4702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ecb_decrypt",16); 4712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key_,$key); # backup $key 4722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds_,$rounds); # backup $rounds 4732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &cmp ($len,0x60); 4742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jb (&label("ecb_dec_tail")); 4752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 4762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout0,&QWP(0,$inp)); 4772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout1,&QWP(0x10,$inp)); 4782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout2,&QWP(0x20,$inp)); 4792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout3,&QWP(0x30,$inp)); 4802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout4,&QWP(0x40,$inp)); 4812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout5,&QWP(0x50,$inp)); 4822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($inp,&DWP(0x60,$inp)); 4832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &sub ($len,0x60); 4842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("ecb_dec_loop6_enter")); 4852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 4862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ecb_dec_loop6",16); 4872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,$out),$inout0); 4882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout0,&QWP(0,$inp)); 4892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x10,$out),$inout1); 4902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout1,&QWP(0x10,$inp)); 4912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x20,$out),$inout2); 4922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout2,&QWP(0x20,$inp)); 4932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x30,$out),$inout3); 4942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout3,&QWP(0x30,$inp)); 4952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x40,$out),$inout4); 4962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout4,&QWP(0x40,$inp)); 4972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x50,$out),$inout5); 4982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($out,&DWP(0x60,$out)); 4992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout5,&QWP(0x50,$inp)); 5002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($inp,&DWP(0x60,$inp)); 5012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ecb_dec_loop6_enter"); 5022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 5032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call ("_aesni_decrypt6"); 5042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 5052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key,$key_); # restore $key 5062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds,$rounds_); # restore $rounds 5072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &sub ($len,0x60); 5082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jnc (&label("ecb_dec_loop6")); 5092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 5102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,$out),$inout0); 5112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x10,$out),$inout1); 5122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x20,$out),$inout2); 5132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x30,$out),$inout3); 5142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x40,$out),$inout4); 5152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x50,$out),$inout5); 5162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($out,&DWP(0x60,$out)); 5172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &add ($len,0x60); 5182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jz (&label("ecb_ret")); 5192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 5202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ecb_dec_tail"); 5212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout0,&QWP(0,$inp)); 5222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &cmp ($len,0x20); 5232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jb (&label("ecb_dec_one")); 5242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout1,&QWP(0x10,$inp)); 5252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &je (&label("ecb_dec_two")); 5262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout2,&QWP(0x20,$inp)); 5272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &cmp ($len,0x40); 5282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jb (&label("ecb_dec_three")); 5292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout3,&QWP(0x30,$inp)); 5302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &je (&label("ecb_dec_four")); 5312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout4,&QWP(0x40,$inp)); 5322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout5,$inout5); 5332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call ("_aesni_decrypt6"); 5342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,$out),$inout0); 5352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x10,$out),$inout1); 5362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x20,$out),$inout2); 5372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x30,$out),$inout3); 5382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x40,$out),$inout4); 5392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("ecb_ret")); 5402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 5412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ecb_dec_one",16); 5422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org if ($inline) 5432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org { &aesni_inline_generate1("dec"); } 5442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org else 5452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org { &call ("_aesni_decrypt1"); } 5462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,$out),$inout0); 5472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("ecb_ret")); 5482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 5492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ecb_dec_two",16); 5502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout2,$inout2); 5512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call ("_aesni_decrypt3"); 5522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,$out),$inout0); 5532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x10,$out),$inout1); 5542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("ecb_ret")); 5552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 5562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ecb_dec_three",16); 5572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call ("_aesni_decrypt3"); 5582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,$out),$inout0); 5592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x10,$out),$inout1); 5602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x20,$out),$inout2); 5612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("ecb_ret")); 5622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 5632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ecb_dec_four",16); 5642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call ("_aesni_decrypt4"); 5652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,$out),$inout0); 5662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x10,$out),$inout1); 5672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x20,$out),$inout2); 5682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x30,$out),$inout3); 5692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 5702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ecb_ret"); 5712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_end("aesni_ecb_encrypt"); 5722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 5732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org###################################################################### 5742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# void aesni_ccm64_[en|de]crypt_blocks (const void *in, void *out, 5752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# size_t blocks, const AES_KEY *key, 5762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# const char *ivec,char *cmac); 5772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# 5782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# Handles only complete blocks, operates on 64-bit counter and 5792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# does not update *ivec! Nor does it finalize CMAC value 5802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# (see engine/eng_aesni.c for details) 5812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# 5822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ my $cmac=$inout1; 5832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_begin("aesni_ccm64_encrypt_blocks"); 5842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($inp,&wparam(0)); 5852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($out,&wparam(1)); 5862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($len,&wparam(2)); 5872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key,&wparam(3)); 5882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds_,&wparam(4)); 5892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds,&wparam(5)); 5902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key_,"esp"); 5912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &sub ("esp",60); 5922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &and ("esp",-16); # align stack 5932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(48,"esp"),$key_); 5942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 5952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($ivec,&QWP(0,$rounds_)); # load ivec 5962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($cmac,&QWP(0,$rounds)); # load cmac 5972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds,&DWP(240,$key)); 5982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 5992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org # compose byte-swap control mask for pshufb on stack 6002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(0,"esp"),0x0c0d0e0f); 6012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(4,"esp"),0x08090a0b); 6022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(8,"esp"),0x04050607); 6032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(12,"esp"),0x00010203); 6042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 6052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org # compose counter increment vector on stack 6062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds_,1); 6072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xor ($key_,$key_); 6082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(16,"esp"),$rounds_); 6092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(20,"esp"),$key_); 6102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(24,"esp"),$key_); 6112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(28,"esp"),$key_); 6122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 6132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &shr ($rounds,1); 6142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($key_,&DWP(0,$key)); 6152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ($inout3,&QWP(0,"esp")); 6162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ($inout0,$ivec); 6172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds_,$rounds); 6182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufb ($ivec,$inout3); 6192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 6202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ccm64_enc_outer"); 6212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey0,&QWP(0,$key_)); 6222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds,$rounds_); 6232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($in0,&QWP(0,$inp)); 6242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 6252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,$rndkey0); 6262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey1,&QWP(16,$key_)); 6272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($rndkey0,$in0); 6282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($key,&DWP(32,$key_)); 6292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($cmac,$rndkey0); # cmac^=inp 6302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey0,&QWP(0,$key)); 6312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 6322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ccm64_enc2_loop"); 6332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aesenc ($inout0,$rndkey1); 6342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &dec ($rounds); 6352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aesenc ($cmac,$rndkey1); 6362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey1,&QWP(16,$key)); 6372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aesenc ($inout0,$rndkey0); 6382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($key,&DWP(32,$key)); 6392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aesenc ($cmac,$rndkey0); 6402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey0,&QWP(0,$key)); 6412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jnz (&label("ccm64_enc2_loop")); 6422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aesenc ($inout0,$rndkey1); 6432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aesenc ($cmac,$rndkey1); 6442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &paddq ($ivec,&QWP(16,"esp")); 6452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aesenclast ($inout0,$rndkey0); 6462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aesenclast ($cmac,$rndkey0); 6472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 6482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &dec ($len); 6492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($inp,&DWP(16,$inp)); 6502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($in0,$inout0); # inp^=E(ivec) 6512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ($inout0,$ivec); 6522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,$out),$in0); # save output 6532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($out,&DWP(16,$out)); 6542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufb ($inout0,$inout3); 6552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jnz (&label("ccm64_enc_outer")); 6562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 6572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ("esp",&DWP(48,"esp")); 6582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($out,&wparam(5)); 6592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,$out),$cmac); 6602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_end("aesni_ccm64_encrypt_blocks"); 6612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 6622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_begin("aesni_ccm64_decrypt_blocks"); 6632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($inp,&wparam(0)); 6642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($out,&wparam(1)); 6652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($len,&wparam(2)); 6662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key,&wparam(3)); 6672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds_,&wparam(4)); 6682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds,&wparam(5)); 6692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key_,"esp"); 6702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &sub ("esp",60); 6712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &and ("esp",-16); # align stack 6722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(48,"esp"),$key_); 6732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 6742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($ivec,&QWP(0,$rounds_)); # load ivec 6752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($cmac,&QWP(0,$rounds)); # load cmac 6762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds,&DWP(240,$key)); 6772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 6782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org # compose byte-swap control mask for pshufb on stack 6792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(0,"esp"),0x0c0d0e0f); 6802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(4,"esp"),0x08090a0b); 6812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(8,"esp"),0x04050607); 6822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(12,"esp"),0x00010203); 6832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 6842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org # compose counter increment vector on stack 6852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds_,1); 6862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xor ($key_,$key_); 6872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(16,"esp"),$rounds_); 6882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(20,"esp"),$key_); 6892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(24,"esp"),$key_); 6902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(28,"esp"),$key_); 6912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 6922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ($inout3,&QWP(0,"esp")); # bswap mask 6932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ($inout0,$ivec); 6942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 6952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key_,$key); 6962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds_,$rounds); 6972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 6982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufb ($ivec,$inout3); 6992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org if ($inline) 7002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org { &aesni_inline_generate1("enc"); } 7012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org else 7022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org { &call ("_aesni_encrypt1"); } 7032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($in0,&QWP(0,$inp)); # load inp 7042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &paddq ($ivec,&QWP(16,"esp")); 7052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($inp,&QWP(16,$inp)); 7062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("ccm64_dec_outer")); 7072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 7082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ccm64_dec_outer",16); 7092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($in0,$inout0); # inp ^= E(ivec) 7102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ($inout0,$ivec); 7112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds,$rounds_); 7122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,$out),$in0); # save output 7132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($out,&DWP(16,$out)); 7142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufb ($inout0,$inout3); 7152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 7162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &sub ($len,1); 7172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jz (&label("ccm64_dec_break")); 7182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 7192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey0,&QWP(0,$key_)); 7202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &shr ($rounds,1); 7212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey1,&QWP(16,$key_)); 7222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($in0,$rndkey0); 7232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($key,&DWP(32,$key_)); 7242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,$rndkey0); 7252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($cmac,$in0); # cmac^=out 7262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey0,&QWP(0,$key)); 7272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 7282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ccm64_dec2_loop"); 7292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aesenc ($inout0,$rndkey1); 7302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &dec ($rounds); 7312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aesenc ($cmac,$rndkey1); 7322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey1,&QWP(16,$key)); 7332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aesenc ($inout0,$rndkey0); 7342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($key,&DWP(32,$key)); 7352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aesenc ($cmac,$rndkey0); 7362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey0,&QWP(0,$key)); 7372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jnz (&label("ccm64_dec2_loop")); 7382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($in0,&QWP(0,$inp)); # load inp 7392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &paddq ($ivec,&QWP(16,"esp")); 7402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aesenc ($inout0,$rndkey1); 7412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aesenc ($cmac,$rndkey1); 7422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($inp,&QWP(16,$inp)); 7432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aesenclast ($inout0,$rndkey0); 7442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aesenclast ($cmac,$rndkey0); 7452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("ccm64_dec_outer")); 7462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 7472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ccm64_dec_break",16); 7482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key,$key_); 7492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org if ($inline) 7502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org { &aesni_inline_generate1("enc",$cmac,$in0); } 7512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org else 7522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org { &call ("_aesni_encrypt1",$cmac); } 7532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 7542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ("esp",&DWP(48,"esp")); 7552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($out,&wparam(5)); 7562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,$out),$cmac); 7572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_end("aesni_ccm64_decrypt_blocks"); 7582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org} 7592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 7602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org###################################################################### 7612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# void aesni_ctr32_encrypt_blocks (const void *in, void *out, 7622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# size_t blocks, const AES_KEY *key, 7632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# const char *ivec); 7642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# 7652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# Handles only complete blocks, operates on 32-bit counter and 7662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# does not update *ivec! (see engine/eng_aesni.c for details) 7672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# 7682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# stack layout: 7692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# 0 pshufb mask 7702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# 16 vector addend: 0,6,6,6 7712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# 32 counter-less ivec 7722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# 48 1st triplet of counter vector 7732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# 64 2nd triplet of counter vector 7742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# 80 saved %esp 7752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 7762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_begin("aesni_ctr32_encrypt_blocks"); 7772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($inp,&wparam(0)); 7782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($out,&wparam(1)); 7792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($len,&wparam(2)); 7802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key,&wparam(3)); 7812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds_,&wparam(4)); 7822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key_,"esp"); 7832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &sub ("esp",88); 7842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &and ("esp",-16); # align stack 7852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(80,"esp"),$key_); 7862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 7872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &cmp ($len,1); 7882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &je (&label("ctr32_one_shortcut")); 7892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 7902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout5,&QWP(0,$rounds_)); # load ivec 7912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 7922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org # compose byte-swap control mask for pshufb on stack 7932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(0,"esp"),0x0c0d0e0f); 7942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(4,"esp"),0x08090a0b); 7952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(8,"esp"),0x04050607); 7962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(12,"esp"),0x00010203); 7972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 7982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org # compose counter increment vector on stack 7992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds,6); 8002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xor ($key_,$key_); 8012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(16,"esp"),$rounds); 8022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(20,"esp"),$rounds); 8032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(24,"esp"),$rounds); 8042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(28,"esp"),$key_); 8052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 8062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pextrd ($rounds_,$inout5,3); # pull 32-bit counter 8072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pinsrd ($inout5,$key_,3); # wipe 32-bit counter 8082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 8092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds,&DWP(240,$key)); # key->rounds 8102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 8112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org # compose 2 vectors of 3x32-bit counters 8122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &bswap ($rounds_); 8132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($rndkey1,$rndkey1); 8142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($rndkey0,$rndkey0); 8152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ($inout0,&QWP(0,"esp")); # load byte-swap mask 8162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pinsrd ($rndkey1,$rounds_,0); 8172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($key_,&DWP(3,$rounds_)); 8182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pinsrd ($rndkey0,$key_,0); 8192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &inc ($rounds_); 8202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pinsrd ($rndkey1,$rounds_,1); 8212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &inc ($key_); 8222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pinsrd ($rndkey0,$key_,1); 8232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &inc ($rounds_); 8242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pinsrd ($rndkey1,$rounds_,2); 8252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &inc ($key_); 8262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pinsrd ($rndkey0,$key_,2); 8272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa (&QWP(48,"esp"),$rndkey1); # save 1st triplet 8282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufb ($rndkey1,$inout0); # byte swap 8292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa (&QWP(64,"esp"),$rndkey0); # save 2nd triplet 8302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufb ($rndkey0,$inout0); # byte swap 8312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 8322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufd ($inout0,$rndkey1,3<<6); # place counter to upper dword 8332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufd ($inout1,$rndkey1,2<<6); 8342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &cmp ($len,6); 8352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jb (&label("ctr32_tail")); 8362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa (&QWP(32,"esp"),$inout5); # save counter-less ivec 8372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &shr ($rounds,1); 8382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key_,$key); # backup $key 8392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds_,$rounds); # backup $rounds 8402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &sub ($len,6); 8412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("ctr32_loop6")); 8422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 8432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ctr32_loop6",16); 8442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufd ($inout2,$rndkey1,1<<6); 8452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ($rndkey1,&QWP(32,"esp")); # pull counter-less ivec 8462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufd ($inout3,$rndkey0,3<<6); 8472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &por ($inout0,$rndkey1); # merge counter-less ivec 8482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufd ($inout4,$rndkey0,2<<6); 8492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &por ($inout1,$rndkey1); 8502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufd ($inout5,$rndkey0,1<<6); 8512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &por ($inout2,$rndkey1); 8522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &por ($inout3,$rndkey1); 8532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &por ($inout4,$rndkey1); 8542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &por ($inout5,$rndkey1); 8552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 8562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org # inlining _aesni_encrypt6's prologue gives ~4% improvement... 8572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey0,&QWP(0,$key_)); 8582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey1,&QWP(16,$key_)); 8592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($key,&DWP(32,$key_)); 8602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &dec ($rounds); 8612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout0,$rndkey0); 8622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout1,$rndkey0); 8632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aesenc ($inout0,$rndkey1); 8642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout2,$rndkey0); 8652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aesenc ($inout1,$rndkey1); 8662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout3,$rndkey0); 8672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aesenc ($inout2,$rndkey1); 8682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout4,$rndkey0); 8692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aesenc ($inout3,$rndkey1); 8702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout5,$rndkey0); 8712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aesenc ($inout4,$rndkey1); 8722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey0,&QWP(0,$key)); 8732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aesenc ($inout5,$rndkey1); 8742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 8752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call (&label("_aesni_encrypt6_enter")); 8762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 8772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($rndkey1,&QWP(0,$inp)); 8782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($rndkey0,&QWP(0x10,$inp)); 8792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,$rndkey1); 8802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($rndkey1,&QWP(0x20,$inp)); 8812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout1,$rndkey0); 8822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,$out),$inout0); 8832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ($rndkey0,&QWP(16,"esp")); # load increment 8842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout2,$rndkey1); 8852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ($rndkey1,&QWP(48,"esp")); # load 1st triplet 8862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x10,$out),$inout1); 8872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x20,$out),$inout2); 8882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 8892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &paddd ($rndkey1,$rndkey0); # 1st triplet increment 8902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &paddd ($rndkey0,&QWP(64,"esp")); # 2nd triplet increment 8912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ($inout0,&QWP(0,"esp")); # load byte swap mask 8922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 8932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout1,&QWP(0x30,$inp)); 8942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout2,&QWP(0x40,$inp)); 8952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout3,$inout1); 8962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout1,&QWP(0x50,$inp)); 8972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($inp,&DWP(0x60,$inp)); 8982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa (&QWP(48,"esp"),$rndkey1); # save 1st triplet 8992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufb ($rndkey1,$inout0); # byte swap 9002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout4,$inout2); 9012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x30,$out),$inout3); 9022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout5,$inout1); 9032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa (&QWP(64,"esp"),$rndkey0); # save 2nd triplet 9042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufb ($rndkey0,$inout0); # byte swap 9052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x40,$out),$inout4); 9062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufd ($inout0,$rndkey1,3<<6); 9072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x50,$out),$inout5); 9082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($out,&DWP(0x60,$out)); 9092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 9102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds,$rounds_); 9112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufd ($inout1,$rndkey1,2<<6); 9122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &sub ($len,6); 9132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jnc (&label("ctr32_loop6")); 9142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 9152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &add ($len,6); 9162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jz (&label("ctr32_ret")); 9172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key,$key_); 9182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($rounds,&DWP(1,"",$rounds,2)); # restore $rounds 9192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ($inout5,&QWP(32,"esp")); # pull count-less ivec 9202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 9212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ctr32_tail"); 9222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &por ($inout0,$inout5); 9232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &cmp ($len,2); 9242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jb (&label("ctr32_one")); 9252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 9262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufd ($inout2,$rndkey1,1<<6); 9272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &por ($inout1,$inout5); 9282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &je (&label("ctr32_two")); 9292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 9302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufd ($inout3,$rndkey0,3<<6); 9312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &por ($inout2,$inout5); 9322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &cmp ($len,4); 9332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jb (&label("ctr32_three")); 9342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 9352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufd ($inout4,$rndkey0,2<<6); 9362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &por ($inout3,$inout5); 9372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &je (&label("ctr32_four")); 9382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 9392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &por ($inout4,$inout5); 9402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call ("_aesni_encrypt6"); 9412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($rndkey1,&QWP(0,$inp)); 9422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($rndkey0,&QWP(0x10,$inp)); 9432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,$rndkey1); 9442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($rndkey1,&QWP(0x20,$inp)); 9452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout1,$rndkey0); 9462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($rndkey0,&QWP(0x30,$inp)); 9472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout2,$rndkey1); 9482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($rndkey1,&QWP(0x40,$inp)); 9492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout3,$rndkey0); 9502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,$out),$inout0); 9512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout4,$rndkey1); 9522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x10,$out),$inout1); 9532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x20,$out),$inout2); 9542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x30,$out),$inout3); 9552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x40,$out),$inout4); 9562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("ctr32_ret")); 9572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 9582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ctr32_one_shortcut",16); 9592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout0,&QWP(0,$rounds_)); # load ivec 9602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds,&DWP(240,$key)); 9612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 9622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ctr32_one"); 9632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org if ($inline) 9642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org { &aesni_inline_generate1("enc"); } 9652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org else 9662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org { &call ("_aesni_encrypt1"); } 9672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($in0,&QWP(0,$inp)); 9682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($in0,$inout0); 9692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,$out),$in0); 9702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("ctr32_ret")); 9712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 9722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ctr32_two",16); 9732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call ("_aesni_encrypt3"); 9742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout3,&QWP(0,$inp)); 9752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout4,&QWP(0x10,$inp)); 9762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,$inout3); 9772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout1,$inout4); 9782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,$out),$inout0); 9792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x10,$out),$inout1); 9802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("ctr32_ret")); 9812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 9822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ctr32_three",16); 9832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call ("_aesni_encrypt3"); 9842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout3,&QWP(0,$inp)); 9852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout4,&QWP(0x10,$inp)); 9862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,$inout3); 9872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout5,&QWP(0x20,$inp)); 9882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout1,$inout4); 9892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,$out),$inout0); 9902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout2,$inout5); 9912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x10,$out),$inout1); 9922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x20,$out),$inout2); 9932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("ctr32_ret")); 9942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 9952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ctr32_four",16); 9962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call ("_aesni_encrypt4"); 9972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout4,&QWP(0,$inp)); 9982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout5,&QWP(0x10,$inp)); 9992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($rndkey1,&QWP(0x20,$inp)); 10002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,$inout4); 10012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($rndkey0,&QWP(0x30,$inp)); 10022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout1,$inout5); 10032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,$out),$inout0); 10042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout2,$rndkey1); 10052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x10,$out),$inout1); 10062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout3,$rndkey0); 10072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x20,$out),$inout2); 10082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x30,$out),$inout3); 10092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 10102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ctr32_ret"); 10112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ("esp",&DWP(80,"esp")); 10122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_end("aesni_ctr32_encrypt_blocks"); 10132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 10142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org###################################################################### 10152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# void aesni_xts_[en|de]crypt(const char *inp,char *out,size_t len, 10162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# const AES_KEY *key1, const AES_KEY *key2 10172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# const unsigned char iv[16]); 10182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# 10192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ my ($tweak,$twtmp,$twres,$twmask)=($rndkey1,$rndkey0,$inout0,$inout1); 10202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 10212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_begin("aesni_xts_encrypt"); 10222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key,&wparam(4)); # key2 10232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($inp,&wparam(5)); # clear-text tweak 10242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 10252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds,&DWP(240,$key)); # key2->rounds 10262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout0,&QWP(0,$inp)); 10272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org if ($inline) 10282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org { &aesni_inline_generate1("enc"); } 10292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org else 10302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org { &call ("_aesni_encrypt1"); } 10312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 10322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($inp,&wparam(0)); 10332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($out,&wparam(1)); 10342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($len,&wparam(2)); 10352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key,&wparam(3)); # key1 10362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 10372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key_,"esp"); 10382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &sub ("esp",16*7+8); 10392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds,&DWP(240,$key)); # key1->rounds 10402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &and ("esp",-16); # align stack 10412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 10422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(16*6+0,"esp"),0x87); # compose the magic constant 10432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(16*6+4,"esp"),0); 10442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(16*6+8,"esp"),1); 10452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(16*6+12,"esp"),0); 10462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(16*7+0,"esp"),$len); # save original $len 10472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(16*7+4,"esp"),$key_); # save original %esp 10482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 10492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ($tweak,$inout0); 10502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($twtmp,$twtmp); 10512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ($twmask,&QWP(6*16,"esp")); # 0x0...010...87 10522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pcmpgtd($twtmp,$tweak); # broadcast upper bits 10532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 10542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &and ($len,-16); 10552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key_,$key); # backup $key 10562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds_,$rounds); # backup $rounds 10572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &sub ($len,16*6); 10582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jc (&label("xts_enc_short")); 10592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 10602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &shr ($rounds,1); 10612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds_,$rounds); 10622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("xts_enc_loop6")); 10632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 10642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_enc_loop6",16); 10652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org for ($i=0;$i<4;$i++) { 10662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufd ($twres,$twtmp,0x13); 10672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($twtmp,$twtmp); 10682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa (&QWP(16*$i,"esp"),$tweak); 10692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &paddq ($tweak,$tweak); # &psllq($tweak,1); 10702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pand ($twres,$twmask); # isolate carry and residue 10712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pcmpgtd ($twtmp,$tweak); # broadcast upper bits 10722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($tweak,$twres); 10732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org } 10742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufd ($inout5,$twtmp,0x13); 10752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa (&QWP(16*$i++,"esp"),$tweak); 10762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &paddq ($tweak,$tweak); # &psllq($tweak,1); 10772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey0,&QWP(0,$key_)); 10782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pand ($inout5,$twmask); # isolate carry and residue 10792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout0,&QWP(0,$inp)); # load input 10802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout5,$tweak); 10812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 10822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org # inline _aesni_encrypt6 prologue and flip xor with tweak and key[0] 10832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout1,&QWP(16*1,$inp)); 10842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,$rndkey0); # input^=rndkey[0] 10852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout2,&QWP(16*2,$inp)); 10862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout1,$rndkey0); 10872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout3,&QWP(16*3,$inp)); 10882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout2,$rndkey0); 10892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout4,&QWP(16*4,$inp)); 10902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout3,$rndkey0); 10912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($rndkey1,&QWP(16*5,$inp)); 10922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout4,$rndkey0); 10932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($inp,&DWP(16*6,$inp)); 10942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout0,&QWP(16*0,"esp")); # input^=tweak 10952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa (&QWP(16*$i,"esp"),$inout5); # save last tweak 10962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout5,$rndkey1); 10972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 10982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey1,&QWP(16,$key_)); 10992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($key,&DWP(32,$key_)); 11002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout1,&QWP(16*1,"esp")); 11012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aesenc ($inout0,$rndkey1); 11022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout2,&QWP(16*2,"esp")); 11032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aesenc ($inout1,$rndkey1); 11042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout3,&QWP(16*3,"esp")); 11052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &dec ($rounds); 11062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aesenc ($inout2,$rndkey1); 11072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout4,&QWP(16*4,"esp")); 11082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aesenc ($inout3,$rndkey1); 11092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout5,$rndkey0); 11102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aesenc ($inout4,$rndkey1); 11112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey0,&QWP(0,$key)); 11122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aesenc ($inout5,$rndkey1); 11132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call (&label("_aesni_encrypt6_enter")); 11142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 11152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ($tweak,&QWP(16*5,"esp")); # last tweak 11162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($twtmp,$twtmp); 11172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,&QWP(16*0,"esp")); # output^=tweak 11182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pcmpgtd ($twtmp,$tweak); # broadcast upper bits 11192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout1,&QWP(16*1,"esp")); 11202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*0,$out),$inout0); # write output 11212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout2,&QWP(16*2,"esp")); 11222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*1,$out),$inout1); 11232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout3,&QWP(16*3,"esp")); 11242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*2,$out),$inout2); 11252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout4,&QWP(16*4,"esp")); 11262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*3,$out),$inout3); 11272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout5,$tweak); 11282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*4,$out),$inout4); 11292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufd ($twres,$twtmp,0x13); 11302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*5,$out),$inout5); 11312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($out,&DWP(16*6,$out)); 11322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ($twmask,&QWP(16*6,"esp")); # 0x0...010...87 11332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 11342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($twtmp,$twtmp); 11352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &paddq ($tweak,$tweak); # &psllq($tweak,1); 11362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pand ($twres,$twmask); # isolate carry and residue 11372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pcmpgtd($twtmp,$tweak); # broadcast upper bits 11382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds,$rounds_); # restore $rounds 11392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($tweak,$twres); 11402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 11412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &sub ($len,16*6); 11422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jnc (&label("xts_enc_loop6")); 11432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 11442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($rounds,&DWP(1,"",$rounds,2)); # restore $rounds 11452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key,$key_); # restore $key 11462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds_,$rounds); 11472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 11482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_enc_short"); 11492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &add ($len,16*6); 11502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jz (&label("xts_enc_done6x")); 11512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 11522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ($inout3,$tweak); # put aside previous tweak 11532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &cmp ($len,0x20); 11542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jb (&label("xts_enc_one")); 11552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 11562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufd ($twres,$twtmp,0x13); 11572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($twtmp,$twtmp); 11582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &paddq ($tweak,$tweak); # &psllq($tweak,1); 11592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pand ($twres,$twmask); # isolate carry and residue 11602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pcmpgtd($twtmp,$tweak); # broadcast upper bits 11612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($tweak,$twres); 11622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &je (&label("xts_enc_two")); 11632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 11642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufd ($twres,$twtmp,0x13); 11652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($twtmp,$twtmp); 11662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ($inout4,$tweak); # put aside previous tweak 11672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &paddq ($tweak,$tweak); # &psllq($tweak,1); 11682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pand ($twres,$twmask); # isolate carry and residue 11692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pcmpgtd($twtmp,$tweak); # broadcast upper bits 11702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($tweak,$twres); 11712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &cmp ($len,0x40); 11722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jb (&label("xts_enc_three")); 11732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 11742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufd ($twres,$twtmp,0x13); 11752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($twtmp,$twtmp); 11762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ($inout5,$tweak); # put aside previous tweak 11772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &paddq ($tweak,$tweak); # &psllq($tweak,1); 11782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pand ($twres,$twmask); # isolate carry and residue 11792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pcmpgtd($twtmp,$tweak); # broadcast upper bits 11802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($tweak,$twres); 11812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa (&QWP(16*0,"esp"),$inout3); 11822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa (&QWP(16*1,"esp"),$inout4); 11832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &je (&label("xts_enc_four")); 11842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 11852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa (&QWP(16*2,"esp"),$inout5); 11862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufd ($inout5,$twtmp,0x13); 11872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa (&QWP(16*3,"esp"),$tweak); 11882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &paddq ($tweak,$tweak); # &psllq($inout0,1); 11892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pand ($inout5,$twmask); # isolate carry and residue 11902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout5,$tweak); 11912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 11922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout0,&QWP(16*0,$inp)); # load input 11932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout1,&QWP(16*1,$inp)); 11942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout2,&QWP(16*2,$inp)); 11952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout0,&QWP(16*0,"esp")); # input^=tweak 11962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout3,&QWP(16*3,$inp)); 11972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout1,&QWP(16*1,"esp")); 11982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout4,&QWP(16*4,$inp)); 11992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout2,&QWP(16*2,"esp")); 12002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($inp,&DWP(16*5,$inp)); 12012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout3,&QWP(16*3,"esp")); 12022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa (&QWP(16*4,"esp"),$inout5); # save last tweak 12032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout4,$inout5); 12042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 12052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call ("_aesni_encrypt6"); 12062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 12072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movaps ($tweak,&QWP(16*4,"esp")); # last tweak 12082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,&QWP(16*0,"esp")); # output^=tweak 12092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout1,&QWP(16*1,"esp")); 12102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout2,&QWP(16*2,"esp")); 12112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*0,$out),$inout0); # write output 12122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout3,&QWP(16*3,"esp")); 12132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*1,$out),$inout1); 12142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout4,$tweak); 12152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*2,$out),$inout2); 12162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*3,$out),$inout3); 12172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*4,$out),$inout4); 12182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($out,&DWP(16*5,$out)); 12192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("xts_enc_done")); 12202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 12212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_enc_one",16); 12222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout0,&QWP(16*0,$inp)); # load input 12232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($inp,&DWP(16*1,$inp)); 12242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,$inout3); # input^=tweak 12252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org if ($inline) 12262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org { &aesni_inline_generate1("enc"); } 12272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org else 12282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org { &call ("_aesni_encrypt1"); } 12292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,$inout3); # output^=tweak 12302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*0,$out),$inout0); # write output 12312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($out,&DWP(16*1,$out)); 12322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 12332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ($tweak,$inout3); # last tweak 12342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("xts_enc_done")); 12352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 12362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_enc_two",16); 12372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movaps ($inout4,$tweak); # put aside last tweak 12382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 12392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout0,&QWP(16*0,$inp)); # load input 12402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout1,&QWP(16*1,$inp)); 12412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($inp,&DWP(16*2,$inp)); 12422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,$inout3); # input^=tweak 12432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout1,$inout4); 12442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout2,$inout2); 12452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 12462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call ("_aesni_encrypt3"); 12472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 12482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,$inout3); # output^=tweak 12492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout1,$inout4); 12502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*0,$out),$inout0); # write output 12512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*1,$out),$inout1); 12522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($out,&DWP(16*2,$out)); 12532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 12542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ($tweak,$inout4); # last tweak 12552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("xts_enc_done")); 12562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 12572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_enc_three",16); 12582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movaps ($inout5,$tweak); # put aside last tweak 12592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout0,&QWP(16*0,$inp)); # load input 12602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout1,&QWP(16*1,$inp)); 12612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout2,&QWP(16*2,$inp)); 12622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($inp,&DWP(16*3,$inp)); 12632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,$inout3); # input^=tweak 12642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout1,$inout4); 12652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout2,$inout5); 12662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 12672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call ("_aesni_encrypt3"); 12682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 12692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,$inout3); # output^=tweak 12702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout1,$inout4); 12712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout2,$inout5); 12722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*0,$out),$inout0); # write output 12732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*1,$out),$inout1); 12742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*2,$out),$inout2); 12752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($out,&DWP(16*3,$out)); 12762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 12772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ($tweak,$inout5); # last tweak 12782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("xts_enc_done")); 12792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 12802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_enc_four",16); 12812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movaps ($inout4,$tweak); # put aside last tweak 12822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 12832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout0,&QWP(16*0,$inp)); # load input 12842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout1,&QWP(16*1,$inp)); 12852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout2,&QWP(16*2,$inp)); 12862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,&QWP(16*0,"esp")); # input^=tweak 12872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout3,&QWP(16*3,$inp)); 12882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($inp,&DWP(16*4,$inp)); 12892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout1,&QWP(16*1,"esp")); 12902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout2,$inout5); 12912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout3,$inout4); 12922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 12932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call ("_aesni_encrypt4"); 12942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 12952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,&QWP(16*0,"esp")); # output^=tweak 12962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout1,&QWP(16*1,"esp")); 12972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout2,$inout5); 12982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*0,$out),$inout0); # write output 12992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout3,$inout4); 13002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*1,$out),$inout1); 13012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*2,$out),$inout2); 13022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*3,$out),$inout3); 13032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($out,&DWP(16*4,$out)); 13042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 13052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ($tweak,$inout4); # last tweak 13062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("xts_enc_done")); 13072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 13082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_enc_done6x",16); # $tweak is pre-calculated 13092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($len,&DWP(16*7+0,"esp")); # restore original $len 13102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &and ($len,15); 13112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jz (&label("xts_enc_ret")); 13122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ($inout3,$tweak); 13132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(16*7+0,"esp"),$len); # save $len%16 13142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("xts_enc_steal")); 13152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 13162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_enc_done",16); 13172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($len,&DWP(16*7+0,"esp")); # restore original $len 13182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($twtmp,$twtmp); 13192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &and ($len,15); 13202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jz (&label("xts_enc_ret")); 13212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 13222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pcmpgtd($twtmp,$tweak); # broadcast upper bits 13232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(16*7+0,"esp"),$len); # save $len%16 13242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufd ($inout3,$twtmp,0x13); 13252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &paddq ($tweak,$tweak); # &psllq($tweak,1); 13262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pand ($inout3,&QWP(16*6,"esp")); # isolate carry and residue 13272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout3,$tweak); 13282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 13292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_enc_steal"); 13302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movz ($rounds,&BP(0,$inp)); 13312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movz ($key,&BP(-16,$out)); 13322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($inp,&DWP(1,$inp)); 13332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&BP(-16,$out),&LB($rounds)); 13342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&BP(0,$out),&LB($key)); 13352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($out,&DWP(1,$out)); 13362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &sub ($len,1); 13372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jnz (&label("xts_enc_steal")); 13382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 13392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &sub ($out,&DWP(16*7+0,"esp")); # rewind $out 13402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key,$key_); # restore $key 13412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds,$rounds_); # restore $rounds 13422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 13432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout0,&QWP(-16,$out)); # load input 13442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,$inout3); # input^=tweak 13452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org if ($inline) 13462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org { &aesni_inline_generate1("enc"); } 13472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org else 13482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org { &call ("_aesni_encrypt1"); } 13492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,$inout3); # output^=tweak 13502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(-16,$out),$inout0); # write output 13512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 13522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_enc_ret"); 13532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ("esp",&DWP(16*7+4,"esp")); # restore %esp 13542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_end("aesni_xts_encrypt"); 13552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 13562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_begin("aesni_xts_decrypt"); 13572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key,&wparam(4)); # key2 13582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($inp,&wparam(5)); # clear-text tweak 13592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 13602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds,&DWP(240,$key)); # key2->rounds 13612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout0,&QWP(0,$inp)); 13622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org if ($inline) 13632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org { &aesni_inline_generate1("enc"); } 13642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org else 13652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org { &call ("_aesni_encrypt1"); } 13662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 13672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($inp,&wparam(0)); 13682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($out,&wparam(1)); 13692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($len,&wparam(2)); 13702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key,&wparam(3)); # key1 13712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 13722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key_,"esp"); 13732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &sub ("esp",16*7+8); 13742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &and ("esp",-16); # align stack 13752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 13762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xor ($rounds_,$rounds_); # if(len%16) len-=16; 13772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &test ($len,15); 13782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &setnz (&LB($rounds_)); 13792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &shl ($rounds_,4); 13802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &sub ($len,$rounds_); 13812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 13822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(16*6+0,"esp"),0x87); # compose the magic constant 13832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(16*6+4,"esp"),0); 13842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(16*6+8,"esp"),1); 13852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(16*6+12,"esp"),0); 13862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(16*7+0,"esp"),$len); # save original $len 13872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(16*7+4,"esp"),$key_); # save original %esp 13882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 13892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds,&DWP(240,$key)); # key1->rounds 13902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key_,$key); # backup $key 13912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds_,$rounds); # backup $rounds 13922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 13932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ($tweak,$inout0); 13942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($twtmp,$twtmp); 13952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ($twmask,&QWP(6*16,"esp")); # 0x0...010...87 13962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pcmpgtd($twtmp,$tweak); # broadcast upper bits 13972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 13982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &and ($len,-16); 13992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &sub ($len,16*6); 14002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jc (&label("xts_dec_short")); 14012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 14022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &shr ($rounds,1); 14032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds_,$rounds); 14042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("xts_dec_loop6")); 14052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 14062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_dec_loop6",16); 14072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org for ($i=0;$i<4;$i++) { 14082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufd ($twres,$twtmp,0x13); 14092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($twtmp,$twtmp); 14102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa (&QWP(16*$i,"esp"),$tweak); 14112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &paddq ($tweak,$tweak); # &psllq($tweak,1); 14122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pand ($twres,$twmask); # isolate carry and residue 14132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pcmpgtd ($twtmp,$tweak); # broadcast upper bits 14142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($tweak,$twres); 14152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org } 14162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufd ($inout5,$twtmp,0x13); 14172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa (&QWP(16*$i++,"esp"),$tweak); 14182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &paddq ($tweak,$tweak); # &psllq($tweak,1); 14192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey0,&QWP(0,$key_)); 14202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pand ($inout5,$twmask); # isolate carry and residue 14212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout0,&QWP(0,$inp)); # load input 14222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout5,$tweak); 14232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 14242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org # inline _aesni_encrypt6 prologue and flip xor with tweak and key[0] 14252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout1,&QWP(16*1,$inp)); 14262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,$rndkey0); # input^=rndkey[0] 14272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout2,&QWP(16*2,$inp)); 14282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout1,$rndkey0); 14292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout3,&QWP(16*3,$inp)); 14302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout2,$rndkey0); 14312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout4,&QWP(16*4,$inp)); 14322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout3,$rndkey0); 14332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($rndkey1,&QWP(16*5,$inp)); 14342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout4,$rndkey0); 14352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($inp,&DWP(16*6,$inp)); 14362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout0,&QWP(16*0,"esp")); # input^=tweak 14372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa (&QWP(16*$i,"esp"),$inout5); # save last tweak 14382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout5,$rndkey1); 14392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 14402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey1,&QWP(16,$key_)); 14412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($key,&DWP(32,$key_)); 14422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout1,&QWP(16*1,"esp")); 14432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aesdec ($inout0,$rndkey1); 14442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout2,&QWP(16*2,"esp")); 14452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aesdec ($inout1,$rndkey1); 14462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout3,&QWP(16*3,"esp")); 14472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &dec ($rounds); 14482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aesdec ($inout2,$rndkey1); 14492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout4,&QWP(16*4,"esp")); 14502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aesdec ($inout3,$rndkey1); 14512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout5,$rndkey0); 14522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aesdec ($inout4,$rndkey1); 14532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ($rndkey0,&QWP(0,$key)); 14542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aesdec ($inout5,$rndkey1); 14552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call (&label("_aesni_decrypt6_enter")); 14562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 14572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ($tweak,&QWP(16*5,"esp")); # last tweak 14582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($twtmp,$twtmp); 14592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,&QWP(16*0,"esp")); # output^=tweak 14602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pcmpgtd ($twtmp,$tweak); # broadcast upper bits 14612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout1,&QWP(16*1,"esp")); 14622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*0,$out),$inout0); # write output 14632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout2,&QWP(16*2,"esp")); 14642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*1,$out),$inout1); 14652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout3,&QWP(16*3,"esp")); 14662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*2,$out),$inout2); 14672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout4,&QWP(16*4,"esp")); 14682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*3,$out),$inout3); 14692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout5,$tweak); 14702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*4,$out),$inout4); 14712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufd ($twres,$twtmp,0x13); 14722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*5,$out),$inout5); 14732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($out,&DWP(16*6,$out)); 14742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ($twmask,&QWP(16*6,"esp")); # 0x0...010...87 14752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 14762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($twtmp,$twtmp); 14772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &paddq ($tweak,$tweak); # &psllq($tweak,1); 14782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pand ($twres,$twmask); # isolate carry and residue 14792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pcmpgtd($twtmp,$tweak); # broadcast upper bits 14802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds,$rounds_); # restore $rounds 14812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($tweak,$twres); 14822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 14832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &sub ($len,16*6); 14842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jnc (&label("xts_dec_loop6")); 14852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 14862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($rounds,&DWP(1,"",$rounds,2)); # restore $rounds 14872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key,$key_); # restore $key 14882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds_,$rounds); 14892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 14902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_dec_short"); 14912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &add ($len,16*6); 14922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jz (&label("xts_dec_done6x")); 14932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 14942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ($inout3,$tweak); # put aside previous tweak 14952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &cmp ($len,0x20); 14962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jb (&label("xts_dec_one")); 14972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 14982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufd ($twres,$twtmp,0x13); 14992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($twtmp,$twtmp); 15002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &paddq ($tweak,$tweak); # &psllq($tweak,1); 15012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pand ($twres,$twmask); # isolate carry and residue 15022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pcmpgtd($twtmp,$tweak); # broadcast upper bits 15032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($tweak,$twres); 15042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &je (&label("xts_dec_two")); 15052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 15062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufd ($twres,$twtmp,0x13); 15072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($twtmp,$twtmp); 15082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ($inout4,$tweak); # put aside previous tweak 15092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &paddq ($tweak,$tweak); # &psllq($tweak,1); 15102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pand ($twres,$twmask); # isolate carry and residue 15112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pcmpgtd($twtmp,$tweak); # broadcast upper bits 15122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($tweak,$twres); 15132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &cmp ($len,0x40); 15142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jb (&label("xts_dec_three")); 15152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 15162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufd ($twres,$twtmp,0x13); 15172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($twtmp,$twtmp); 15182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ($inout5,$tweak); # put aside previous tweak 15192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &paddq ($tweak,$tweak); # &psllq($tweak,1); 15202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pand ($twres,$twmask); # isolate carry and residue 15212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pcmpgtd($twtmp,$tweak); # broadcast upper bits 15222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($tweak,$twres); 15232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa (&QWP(16*0,"esp"),$inout3); 15242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa (&QWP(16*1,"esp"),$inout4); 15252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &je (&label("xts_dec_four")); 15262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 15272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa (&QWP(16*2,"esp"),$inout5); 15282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufd ($inout5,$twtmp,0x13); 15292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa (&QWP(16*3,"esp"),$tweak); 15302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &paddq ($tweak,$tweak); # &psllq($inout0,1); 15312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pand ($inout5,$twmask); # isolate carry and residue 15322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout5,$tweak); 15332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 15342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout0,&QWP(16*0,$inp)); # load input 15352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout1,&QWP(16*1,$inp)); 15362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout2,&QWP(16*2,$inp)); 15372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout0,&QWP(16*0,"esp")); # input^=tweak 15382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout3,&QWP(16*3,$inp)); 15392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout1,&QWP(16*1,"esp")); 15402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout4,&QWP(16*4,$inp)); 15412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout2,&QWP(16*2,"esp")); 15422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($inp,&DWP(16*5,$inp)); 15432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout3,&QWP(16*3,"esp")); 15442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa (&QWP(16*4,"esp"),$inout5); # save last tweak 15452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout4,$inout5); 15462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 15472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call ("_aesni_decrypt6"); 15482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 15492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movaps ($tweak,&QWP(16*4,"esp")); # last tweak 15502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,&QWP(16*0,"esp")); # output^=tweak 15512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout1,&QWP(16*1,"esp")); 15522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout2,&QWP(16*2,"esp")); 15532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*0,$out),$inout0); # write output 15542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout3,&QWP(16*3,"esp")); 15552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*1,$out),$inout1); 15562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout4,$tweak); 15572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*2,$out),$inout2); 15582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*3,$out),$inout3); 15592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*4,$out),$inout4); 15602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($out,&DWP(16*5,$out)); 15612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("xts_dec_done")); 15622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 15632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_dec_one",16); 15642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout0,&QWP(16*0,$inp)); # load input 15652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($inp,&DWP(16*1,$inp)); 15662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,$inout3); # input^=tweak 15672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org if ($inline) 15682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org { &aesni_inline_generate1("dec"); } 15692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org else 15702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org { &call ("_aesni_decrypt1"); } 15712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,$inout3); # output^=tweak 15722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*0,$out),$inout0); # write output 15732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($out,&DWP(16*1,$out)); 15742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 15752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ($tweak,$inout3); # last tweak 15762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("xts_dec_done")); 15772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 15782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_dec_two",16); 15792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movaps ($inout4,$tweak); # put aside last tweak 15802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 15812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout0,&QWP(16*0,$inp)); # load input 15822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout1,&QWP(16*1,$inp)); 15832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($inp,&DWP(16*2,$inp)); 15842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,$inout3); # input^=tweak 15852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout1,$inout4); 15862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 15872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call ("_aesni_decrypt3"); 15882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 15892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,$inout3); # output^=tweak 15902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout1,$inout4); 15912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*0,$out),$inout0); # write output 15922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*1,$out),$inout1); 15932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($out,&DWP(16*2,$out)); 15942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 15952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ($tweak,$inout4); # last tweak 15962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("xts_dec_done")); 15972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 15982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_dec_three",16); 15992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movaps ($inout5,$tweak); # put aside last tweak 16002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout0,&QWP(16*0,$inp)); # load input 16012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout1,&QWP(16*1,$inp)); 16022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout2,&QWP(16*2,$inp)); 16032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($inp,&DWP(16*3,$inp)); 16042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,$inout3); # input^=tweak 16052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout1,$inout4); 16062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout2,$inout5); 16072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 16082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call ("_aesni_decrypt3"); 16092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 16102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,$inout3); # output^=tweak 16112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout1,$inout4); 16122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout2,$inout5); 16132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*0,$out),$inout0); # write output 16142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*1,$out),$inout1); 16152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*2,$out),$inout2); 16162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($out,&DWP(16*3,$out)); 16172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 16182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ($tweak,$inout5); # last tweak 16192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("xts_dec_done")); 16202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 16212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_dec_four",16); 16222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movaps ($inout4,$tweak); # put aside last tweak 16232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 16242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout0,&QWP(16*0,$inp)); # load input 16252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout1,&QWP(16*1,$inp)); 16262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout2,&QWP(16*2,$inp)); 16272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,&QWP(16*0,"esp")); # input^=tweak 16282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout3,&QWP(16*3,$inp)); 16292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($inp,&DWP(16*4,$inp)); 16302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout1,&QWP(16*1,"esp")); 16312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout2,$inout5); 16322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout3,$inout4); 16332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 16342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call ("_aesni_decrypt4"); 16352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 16362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,&QWP(16*0,"esp")); # output^=tweak 16372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout1,&QWP(16*1,"esp")); 16382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout2,$inout5); 16392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*0,$out),$inout0); # write output 16402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout3,$inout4); 16412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*1,$out),$inout1); 16422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*2,$out),$inout2); 16432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(16*3,$out),$inout3); 16442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($out,&DWP(16*4,$out)); 16452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 16462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ($tweak,$inout4); # last tweak 16472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("xts_dec_done")); 16482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 16492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_dec_done6x",16); # $tweak is pre-calculated 16502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($len,&DWP(16*7+0,"esp")); # restore original $len 16512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &and ($len,15); 16522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jz (&label("xts_dec_ret")); 16532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(16*7+0,"esp"),$len); # save $len%16 16542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("xts_dec_only_one_more")); 16552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 16562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_dec_done",16); 16572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($len,&DWP(16*7+0,"esp")); # restore original $len 16582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($twtmp,$twtmp); 16592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &and ($len,15); 16602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jz (&label("xts_dec_ret")); 16612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 16622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pcmpgtd($twtmp,$tweak); # broadcast upper bits 16632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(16*7+0,"esp"),$len); # save $len%16 16642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufd ($twres,$twtmp,0x13); 16652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($twtmp,$twtmp); 16662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ($twmask,&QWP(16*6,"esp")); 16672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &paddq ($tweak,$tweak); # &psllq($tweak,1); 16682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pand ($twres,$twmask); # isolate carry and residue 16692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pcmpgtd($twtmp,$tweak); # broadcast upper bits 16702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($tweak,$twres); 16712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 16722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_dec_only_one_more"); 16732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufd ($inout3,$twtmp,0x13); 16742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ($inout4,$tweak); # put aside previous tweak 16752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &paddq ($tweak,$tweak); # &psllq($tweak,1); 16762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pand ($inout3,$twmask); # isolate carry and residue 16772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ($inout3,$tweak); 16782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 16792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key,$key_); # restore $key 16802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds,$rounds_); # restore $rounds 16812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 16822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout0,&QWP(0,$inp)); # load input 16832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,$inout3); # input^=tweak 16842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org if ($inline) 16852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org { &aesni_inline_generate1("dec"); } 16862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org else 16872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org { &call ("_aesni_decrypt1"); } 16882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,$inout3); # output^=tweak 16892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,$out),$inout0); # write output 16902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 16912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_dec_steal"); 16922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movz ($rounds,&BP(16,$inp)); 16932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movz ($key,&BP(0,$out)); 16942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($inp,&DWP(1,$inp)); 16952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&BP(0,$out),&LB($rounds)); 16962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&BP(16,$out),&LB($key)); 16972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($out,&DWP(1,$out)); 16982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &sub ($len,1); 16992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jnz (&label("xts_dec_steal")); 17002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 17012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &sub ($out,&DWP(16*7+0,"esp")); # rewind $out 17022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key,$key_); # restore $key 17032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds,$rounds_); # restore $rounds 17042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 17052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout0,&QWP(0,$out)); # load input 17062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,$inout4); # input^=tweak 17072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org if ($inline) 17082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org { &aesni_inline_generate1("dec"); } 17092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org else 17102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org { &call ("_aesni_decrypt1"); } 17112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,$inout4); # output^=tweak 17122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,$out),$inout0); # write output 17132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 17142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_dec_ret"); 17152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ("esp",&DWP(16*7+4,"esp")); # restore %esp 17162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_end("aesni_xts_decrypt"); 17172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org} 17182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org} 17192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 17202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org###################################################################### 17212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# void $PREFIX_cbc_encrypt (const void *inp, void *out, 17222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# size_t length, const AES_KEY *key, 17232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# unsigned char *ivp,const int enc); 17242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_begin("${PREFIX}_cbc_encrypt"); 17252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($inp,&wparam(0)); 17262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds_,"esp"); 17272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($out,&wparam(1)); 17282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &sub ($rounds_,24); 17292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($len,&wparam(2)); 17302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &and ($rounds_,-16); 17312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key,&wparam(3)); 17322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key_,&wparam(4)); 17332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &test ($len,$len); 17342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jz (&label("cbc_abort")); 17352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 17362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &cmp (&wparam(5),0); 17372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xchg ($rounds_,"esp"); # alloca 17382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($ivec,&QWP(0,$key_)); # load IV 17392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds,&DWP(240,$key)); 17402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key_,$key); # backup $key 17412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(16,"esp"),$rounds_); # save original %esp 17422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds_,$rounds); # backup $rounds 17432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &je (&label("cbc_decrypt")); 17442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 17452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movaps ($inout0,$ivec); 17462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &cmp ($len,16); 17472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jb (&label("cbc_enc_tail")); 17482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &sub ($len,16); 17492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("cbc_enc_loop")); 17502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 17512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("cbc_enc_loop",16); 17522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($ivec,&QWP(0,$inp)); # input actually 17532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($inp,&DWP(16,$inp)); 17542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org if ($inline) 17552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org { &aesni_inline_generate1("enc",$inout0,$ivec); } 17562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org else 17572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org { &xorps($inout0,$ivec); &call("_aesni_encrypt1"); } 17582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds,$rounds_); # restore $rounds 17592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key,$key_); # restore $key 17602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,$out),$inout0); # store output 17612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($out,&DWP(16,$out)); 17622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &sub ($len,16); 17632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jnc (&label("cbc_enc_loop")); 17642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &add ($len,16); 17652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jnz (&label("cbc_enc_tail")); 17662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movaps ($ivec,$inout0); 17672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("cbc_ret")); 17682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 17692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("cbc_enc_tail"); 17702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ("ecx",$len); # zaps $rounds 17712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &data_word(0xA4F3F689); # rep movsb 17722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ("ecx",16); # zero tail 17732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &sub ("ecx",$len); 17742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xor ("eax","eax"); # zaps $len 17752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &data_word(0xAAF3F689); # rep stosb 17762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($out,&DWP(-16,$out)); # rewind $out by 1 block 17772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds,$rounds_); # restore $rounds 17782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($inp,$out); # $inp and $out are the same 17792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key,$key_); # restore $key 17802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("cbc_enc_loop")); 17812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org###################################################################### 17822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("cbc_decrypt",16); 17832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &cmp ($len,0x50); 17842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jbe (&label("cbc_dec_tail")); 17852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movaps (&QWP(0,"esp"),$ivec); # save IV 17862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &sub ($len,0x50); 17872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("cbc_dec_loop6_enter")); 17882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 17892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("cbc_dec_loop6",16); 17902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movaps (&QWP(0,"esp"),$rndkey0); # save IV 17912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,$out),$inout5); 17922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($out,&DWP(0x10,$out)); 17932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("cbc_dec_loop6_enter"); 17942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout0,&QWP(0,$inp)); 17952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout1,&QWP(0x10,$inp)); 17962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout2,&QWP(0x20,$inp)); 17972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout3,&QWP(0x30,$inp)); 17982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout4,&QWP(0x40,$inp)); 17992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqu ($inout5,&QWP(0x50,$inp)); 18002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 18012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call ("_aesni_decrypt6"); 18022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 18032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($rndkey1,&QWP(0,$inp)); 18042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($rndkey0,&QWP(0x10,$inp)); 18052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,&QWP(0,"esp")); # ^=IV 18062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout1,$rndkey1); 18072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($rndkey1,&QWP(0x20,$inp)); 18082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout2,$rndkey0); 18092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($rndkey0,&QWP(0x30,$inp)); 18102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout3,$rndkey1); 18112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($rndkey1,&QWP(0x40,$inp)); 18122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout4,$rndkey0); 18132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($rndkey0,&QWP(0x50,$inp)); # IV 18142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout5,$rndkey1); 18152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,$out),$inout0); 18162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x10,$out),$inout1); 18172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($inp,&DWP(0x60,$inp)); 18182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x20,$out),$inout2); 18192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds,$rounds_) # restore $rounds 18202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x30,$out),$inout3); 18212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key,$key_); # restore $key 18222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x40,$out),$inout4); 18232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($out,&DWP(0x50,$out)); 18242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &sub ($len,0x60); 18252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &ja (&label("cbc_dec_loop6")); 18262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 18272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movaps ($inout0,$inout5); 18282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movaps ($ivec,$rndkey0); 18292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &add ($len,0x50); 18302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jle (&label("cbc_dec_tail_collected")); 18312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,$out),$inout0); 18322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($out,&DWP(0x10,$out)); 18332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("cbc_dec_tail"); 18342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout0,&QWP(0,$inp)); 18352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movaps ($in0,$inout0); 18362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &cmp ($len,0x10); 18372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jbe (&label("cbc_dec_one")); 18382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 18392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout1,&QWP(0x10,$inp)); 18402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movaps ($in1,$inout1); 18412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &cmp ($len,0x20); 18422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jbe (&label("cbc_dec_two")); 18432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 18442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout2,&QWP(0x20,$inp)); 18452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &cmp ($len,0x30); 18462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jbe (&label("cbc_dec_three")); 18472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 18482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout3,&QWP(0x30,$inp)); 18492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &cmp ($len,0x40); 18502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jbe (&label("cbc_dec_four")); 18512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 18522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout4,&QWP(0x40,$inp)); 18532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movaps (&QWP(0,"esp"),$ivec); # save IV 18542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($inout0,&QWP(0,$inp)); 18552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout5,$inout5); 18562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call ("_aesni_decrypt6"); 18572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($rndkey1,&QWP(0,$inp)); 18582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($rndkey0,&QWP(0x10,$inp)); 18592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,&QWP(0,"esp")); # ^= IV 18602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout1,$rndkey1); 18612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($rndkey1,&QWP(0x20,$inp)); 18622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout2,$rndkey0); 18632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($rndkey0,&QWP(0x30,$inp)); 18642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout3,$rndkey1); 18652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($ivec,&QWP(0x40,$inp)); # IV 18662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout4,$rndkey0); 18672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,$out),$inout0); 18682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x10,$out),$inout1); 18692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x20,$out),$inout2); 18702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x30,$out),$inout3); 18712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($out,&DWP(0x40,$out)); 18722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movaps ($inout0,$inout4); 18732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &sub ($len,0x50); 18742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("cbc_dec_tail_collected")); 18752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 18762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("cbc_dec_one",16); 18772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org if ($inline) 18782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org { &aesni_inline_generate1("dec"); } 18792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org else 18802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org { &call ("_aesni_decrypt1"); } 18812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,$ivec); 18822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movaps ($ivec,$in0); 18832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &sub ($len,0x10); 18842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("cbc_dec_tail_collected")); 18852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 18862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("cbc_dec_two",16); 18872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout2,$inout2); 18882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call ("_aesni_decrypt3"); 18892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,$ivec); 18902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout1,$in0); 18912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,$out),$inout0); 18922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movaps ($inout0,$inout1); 18932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($out,&DWP(0x10,$out)); 18942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movaps ($ivec,$in1); 18952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &sub ($len,0x20); 18962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("cbc_dec_tail_collected")); 18972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 18982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("cbc_dec_three",16); 18992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call ("_aesni_decrypt3"); 19002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,$ivec); 19012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout1,$in0); 19022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout2,$in1); 19032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,$out),$inout0); 19042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movaps ($inout0,$inout2); 19052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x10,$out),$inout1); 19062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($out,&DWP(0x20,$out)); 19072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($ivec,&QWP(0x20,$inp)); 19082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &sub ($len,0x30); 19092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("cbc_dec_tail_collected")); 19102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 19112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("cbc_dec_four",16); 19122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call ("_aesni_decrypt4"); 19132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($rndkey1,&QWP(0x10,$inp)); 19142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($rndkey0,&QWP(0x20,$inp)); 19152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout0,$ivec); 19162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ($ivec,&QWP(0x30,$inp)); 19172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout1,$in0); 19182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,$out),$inout0); 19192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout2,$rndkey1); 19202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x10,$out),$inout1); 19212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ($inout3,$rndkey0); 19222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0x20,$out),$inout2); 19232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($out,&DWP(0x30,$out)); 19242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movaps ($inout0,$inout3); 19252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &sub ($len,0x40); 19262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 19272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("cbc_dec_tail_collected"); 19282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &and ($len,15); 19292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jnz (&label("cbc_dec_tail_partial")); 19302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,$out),$inout0); 19312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("cbc_ret")); 19322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 19332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("cbc_dec_tail_partial",16); 19342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movaps (&QWP(0,"esp"),$inout0); 19352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ("ecx",16); 19362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($inp,"esp"); 19372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &sub ("ecx",$len); 19382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &data_word(0xA4F3F689); # rep movsb 19392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 19402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("cbc_ret"); 19412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ("esp",&DWP(16,"esp")); # pull original %esp 19422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key_,&wparam(4)); 19432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups (&QWP(0,$key_),$ivec); # output IV 19442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("cbc_abort"); 19452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_end("${PREFIX}_cbc_encrypt"); 19462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 19472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org###################################################################### 19482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# Mechanical port from aesni-x86_64.pl. 19492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# 19502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# _aesni_set_encrypt_key is private interface, 19512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# input: 19522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# "eax" const unsigned char *userKey 19532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# $rounds int bits 19542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# $key AES_KEY *key 19552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# output: 19562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# "eax" return code 19572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# $round rounds 19582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 19592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_begin_B("_aesni_set_encrypt_key"); 19602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &test ("eax","eax"); 19612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jz (&label("bad_pointer")); 19622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &test ($key,$key); 19632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jz (&label("bad_pointer")); 19642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 19652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ("xmm0",&QWP(0,"eax")); # pull first 128 bits of *userKey 19662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ("xmm4","xmm4"); # low dword of xmm4 is assumed 0 19672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($key,&DWP(16,$key)); 19682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &cmp ($rounds,256); 19692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &je (&label("14rounds")); 19702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &cmp ($rounds,192); 19712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &je (&label("12rounds")); 19722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &cmp ($rounds,128); 19732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jne (&label("bad_keybits")); 19742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 19752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("10rounds",16); 19762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds,9); 19772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey (&QWP(-16,$key),"xmm0"); # round 0 19782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aeskeygenassist("xmm1","xmm0",0x01); # round 1 19792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call (&label("key_128_cold")); 19802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aeskeygenassist("xmm1","xmm0",0x2); # round 2 19812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call (&label("key_128")); 19822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aeskeygenassist("xmm1","xmm0",0x04); # round 3 19832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call (&label("key_128")); 19842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aeskeygenassist("xmm1","xmm0",0x08); # round 4 19852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call (&label("key_128")); 19862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aeskeygenassist("xmm1","xmm0",0x10); # round 5 19872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call (&label("key_128")); 19882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aeskeygenassist("xmm1","xmm0",0x20); # round 6 19892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call (&label("key_128")); 19902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aeskeygenassist("xmm1","xmm0",0x40); # round 7 19912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call (&label("key_128")); 19922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aeskeygenassist("xmm1","xmm0",0x80); # round 8 19932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call (&label("key_128")); 19942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aeskeygenassist("xmm1","xmm0",0x1b); # round 9 19952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call (&label("key_128")); 19962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aeskeygenassist("xmm1","xmm0",0x36); # round 10 19972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call (&label("key_128")); 19982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey (&QWP(0,$key),"xmm0"); 19992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(80,$key),$rounds); 20002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xor ("eax","eax"); 20012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &ret(); 20022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 20032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("key_128",16); 20042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey (&QWP(0,$key),"xmm0"); 20052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($key,&DWP(16,$key)); 20062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("key_128_cold"); 20072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &shufps ("xmm4","xmm0",0b00010000); 20082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ("xmm0","xmm4"); 20092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &shufps ("xmm4","xmm0",0b10001100); 20102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ("xmm0","xmm4"); 20112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &shufps ("xmm1","xmm1",0b11111111); # critical path 20122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ("xmm0","xmm1"); 20132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &ret(); 20142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 20152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("12rounds",16); 20162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movq ("xmm2",&QWP(16,"eax")); # remaining 1/3 of *userKey 20172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds,11); 20182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey (&QWP(-16,$key),"xmm0") # round 0 20192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aeskeygenassist("xmm1","xmm2",0x01); # round 1,2 20202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call (&label("key_192a_cold")); 20212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aeskeygenassist("xmm1","xmm2",0x02); # round 2,3 20222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call (&label("key_192b")); 20232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aeskeygenassist("xmm1","xmm2",0x04); # round 4,5 20242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call (&label("key_192a")); 20252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aeskeygenassist("xmm1","xmm2",0x08); # round 5,6 20262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call (&label("key_192b")); 20272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aeskeygenassist("xmm1","xmm2",0x10); # round 7,8 20282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call (&label("key_192a")); 20292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aeskeygenassist("xmm1","xmm2",0x20); # round 8,9 20302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call (&label("key_192b")); 20312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aeskeygenassist("xmm1","xmm2",0x40); # round 10,11 20322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call (&label("key_192a")); 20332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aeskeygenassist("xmm1","xmm2",0x80); # round 11,12 20342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call (&label("key_192b")); 20352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey (&QWP(0,$key),"xmm0"); 20362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(48,$key),$rounds); 20372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xor ("eax","eax"); 20382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &ret(); 20392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 20402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("key_192a",16); 20412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey (&QWP(0,$key),"xmm0"); 20422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($key,&DWP(16,$key)); 20432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("key_192a_cold",16); 20442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movaps ("xmm5","xmm2"); 20452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("key_192b_warm"); 20462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &shufps ("xmm4","xmm0",0b00010000); 20472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movdqa ("xmm3","xmm2"); 20482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ("xmm0","xmm4"); 20492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &shufps ("xmm4","xmm0",0b10001100); 20502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pslldq ("xmm3",4); 20512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ("xmm0","xmm4"); 20522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufd ("xmm1","xmm1",0b01010101); # critical path 20532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ("xmm2","xmm3"); 20542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ("xmm0","xmm1"); 20552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pshufd ("xmm3","xmm0",0b11111111); 20562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &pxor ("xmm2","xmm3"); 20572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &ret(); 20582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 20592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("key_192b",16); 20602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movaps ("xmm3","xmm0"); 20612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &shufps ("xmm5","xmm0",0b01000100); 20622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey (&QWP(0,$key),"xmm5"); 20632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &shufps ("xmm3","xmm2",0b01001110); 20642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey (&QWP(16,$key),"xmm3"); 20652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($key,&DWP(32,$key)); 20662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jmp (&label("key_192b_warm")); 20672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 20682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("14rounds",16); 20692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &movups ("xmm2",&QWP(16,"eax")); # remaining half of *userKey 20702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds,13); 20712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($key,&DWP(16,$key)); 20722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey (&QWP(-32,$key),"xmm0"); # round 0 20732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey (&QWP(-16,$key),"xmm2"); # round 1 20742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aeskeygenassist("xmm1","xmm2",0x01); # round 2 20752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call (&label("key_256a_cold")); 20762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aeskeygenassist("xmm1","xmm0",0x01); # round 3 20772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call (&label("key_256b")); 20782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aeskeygenassist("xmm1","xmm2",0x02); # round 4 20792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call (&label("key_256a")); 20802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aeskeygenassist("xmm1","xmm0",0x02); # round 5 20812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call (&label("key_256b")); 20822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aeskeygenassist("xmm1","xmm2",0x04); # round 6 20832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call (&label("key_256a")); 20842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aeskeygenassist("xmm1","xmm0",0x04); # round 7 20852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call (&label("key_256b")); 20862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aeskeygenassist("xmm1","xmm2",0x08); # round 8 20872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call (&label("key_256a")); 20882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aeskeygenassist("xmm1","xmm0",0x08); # round 9 20892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call (&label("key_256b")); 20902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aeskeygenassist("xmm1","xmm2",0x10); # round 10 20912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call (&label("key_256a")); 20922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aeskeygenassist("xmm1","xmm0",0x10); # round 11 20932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call (&label("key_256b")); 20942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aeskeygenassist("xmm1","xmm2",0x20); # round 12 20952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call (&label("key_256a")); 20962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aeskeygenassist("xmm1","xmm0",0x20); # round 13 20972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call (&label("key_256b")); 20982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aeskeygenassist("xmm1","xmm2",0x40); # round 14 20992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call (&label("key_256a")); 21002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey (&QWP(0,$key),"xmm0"); 21012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov (&DWP(16,$key),$rounds); 21022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xor ("eax","eax"); 21032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &ret(); 21042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 21052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("key_256a",16); 21062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey (&QWP(0,$key),"xmm2"); 21072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($key,&DWP(16,$key)); 21082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("key_256a_cold"); 21092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &shufps ("xmm4","xmm0",0b00010000); 21102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ("xmm0","xmm4"); 21112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &shufps ("xmm4","xmm0",0b10001100); 21122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ("xmm0","xmm4"); 21132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &shufps ("xmm1","xmm1",0b11111111); # critical path 21142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ("xmm0","xmm1"); 21152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &ret(); 21162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 21172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("key_256b",16); 21182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey (&QWP(0,$key),"xmm0"); 21192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($key,&DWP(16,$key)); 21202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 21212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &shufps ("xmm4","xmm2",0b00010000); 21222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ("xmm2","xmm4"); 21232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &shufps ("xmm4","xmm2",0b10001100); 21242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ("xmm2","xmm4"); 21252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &shufps ("xmm1","xmm1",0b10101010); # critical path 21262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xorps ("xmm2","xmm1"); 21272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &ret(); 21282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 21292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("bad_pointer",4); 21302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ("eax",-1); 21312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &ret (); 21322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("bad_keybits",4); 21332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ("eax",-2); 21342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &ret (); 21352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_end_B("_aesni_set_encrypt_key"); 21362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 21372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# int $PREFIX_set_encrypt_key (const unsigned char *userKey, int bits, 21382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# AES_KEY *key) 21392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_begin_B("${PREFIX}_set_encrypt_key"); 21402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ("eax",&wparam(0)); 21412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds,&wparam(1)); 21422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key,&wparam(2)); 21432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call ("_aesni_set_encrypt_key"); 21442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &ret (); 21452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_end_B("${PREFIX}_set_encrypt_key"); 21462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 21472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# int $PREFIX_set_decrypt_key (const unsigned char *userKey, int bits, 21482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# AES_KEY *key) 21492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_begin_B("${PREFIX}_set_decrypt_key"); 21502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ("eax",&wparam(0)); 21512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($rounds,&wparam(1)); 21522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key,&wparam(2)); 21532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &call ("_aesni_set_encrypt_key"); 21542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &mov ($key,&wparam(2)); 21552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &shl ($rounds,4) # rounds-1 after _aesni_set_encrypt_key 21562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &test ("eax","eax"); 21572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &jnz (&label("dec_key_ret")); 21582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ("eax",&DWP(16,$key,$rounds)); # end of key schedule 21592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 21602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ("xmm0",&QWP(0,$key)); # just swap 21612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ("xmm1",&QWP(0,"eax")); 21622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey (&QWP(0,"eax"),"xmm0"); 21632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey (&QWP(0,$key),"xmm1"); 21642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($key,&DWP(16,$key)); 21652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ("eax",&DWP(-16,"eax")); 21662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 21672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("dec_key_inverse"); 21682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ("xmm0",&QWP(0,$key)); # swap and inverse 21692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ("xmm1",&QWP(0,"eax")); 21702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aesimc ("xmm0","xmm0"); 21712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aesimc ("xmm1","xmm1"); 21722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ($key,&DWP(16,$key)); 21732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &lea ("eax",&DWP(-16,"eax")); 21742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey (&QWP(16,"eax"),"xmm0"); 21752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey (&QWP(-16,$key),"xmm1"); 21762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &cmp ("eax",$key); 21772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &ja (&label("dec_key_inverse")); 21782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 21792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey ("xmm0",&QWP(0,$key)); # inverse middle 21802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &aesimc ("xmm0","xmm0"); 21812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &$movekey (&QWP(0,$key),"xmm0"); 21822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 21832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &xor ("eax","eax"); # return success 21842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("dec_key_ret"); 21852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org &ret (); 21862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_end_B("${PREFIX}_set_decrypt_key"); 21872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&asciz("AES for Intel AES-NI, CRYPTOGAMS by <appro\@openssl.org>"); 21882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org 21892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&asm_finish(); 2190