12c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#!/usr/bin/env perl
22c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
32c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# ====================================================================
42c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
52c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# project. The module is, however, dual licensed under OpenSSL and
62c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# CRYPTOGAMS licenses depending on where you obtain it. For further
72c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# details see http://www.openssl.org/~appro/cryptogams/.
82c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# ====================================================================
92c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#
102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# This module implements support for Intel AES-NI extension. In
112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# OpenSSL context it's used with Intel engine, but can also be used as
122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# drop-in replacement for crypto/aes/asm/aes-586.pl [see below for
132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# details].
142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#
152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# Performance.
162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#
172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# To start with see corresponding paragraph in aesni-x86_64.pl...
182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# Instead of filling table similar to one found there I've chosen to
192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# summarize *comparison* results for raw ECB, CTR and CBC benchmarks.
202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# The simplified table below represents 32-bit performance relative
212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# to 64-bit one in every given point. Ratios vary for different
222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# encryption modes, therefore interval values.
232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#
242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#	16-byte     64-byte     256-byte    1-KB        8-KB
252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#	53-67%      67-84%      91-94%      95-98%      97-99.5%
262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#
272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# Lower ratios for smaller block sizes are perfectly understandable,
282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# because function call overhead is higher in 32-bit mode. Largest
292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# 8-KB block performance is virtually same: 32-bit code is less than
302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# 1% slower for ECB, CBC and CCM, and ~3% slower otherwise.
312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# January 2011
332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#
342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# See aesni-x86_64.pl for details. Unlike x86_64 version this module
352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# interleaves at most 6 aes[enc|dec] instructions, because there are
362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# not enough registers for 8x interleave [which should be optimal for
372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# Sandy Bridge]. Actually, performance results for 6x interleave
382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# factor presented in aesni-x86_64.pl (except for CTR) are for this
392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# module.
402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# April 2011
422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#
432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# Add aesni_xts_[en|de]crypt. Westmere spends 1.50 cycles processing
442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# one byte out of 8KB with 128-bit key, Sandy Bridge - 1.09.
452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org$PREFIX="aesni";	# if $PREFIX is set to "AES", the script
472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org			# generates drop-in replacement for
482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org			# crypto/aes/asm/aes-586.pl:-)
492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org$inline=1;		# inline _aesni_[en|de]crypt
502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgpush(@INC,"${dir}","${dir}../../perlasm");
532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgrequire "x86asm.pl";
542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&asm_init($ARGV[0],$0);
562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgif ($PREFIX eq "aesni")	{ $movekey=*movups; }
582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgelse			{ $movekey=*movups; }
592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org$len="eax";
612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org$rounds="ecx";
622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org$key="edx";
632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org$inp="esi";
642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org$out="edi";
652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org$rounds_="ebx";	# backup copy for $rounds
662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org$key_="ebp";	# backup copy for $key
672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org$rndkey0="xmm0";
692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org$rndkey1="xmm1";
702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org$inout0="xmm2";
712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org$inout1="xmm3";
722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org$inout2="xmm4";
732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org$inout3="xmm5";	$in1="xmm5";
742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org$inout4="xmm6";	$in0="xmm6";
752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org$inout5="xmm7";	$ivec="xmm7";
762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# AESNI extenstion
782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgsub aeskeygenassist
792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ my($dst,$src,$imm)=@_;
802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/)
812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    {	&data_byte(0x66,0x0f,0x3a,0xdf,0xc0|($1<<3)|$2,$imm);	}
822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org}
832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgsub aescommon
842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ my($opcodelet,$dst,$src)=@_;
852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/)
862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    {	&data_byte(0x66,0x0f,0x38,$opcodelet,0xc0|($1<<3)|$2);}
872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org}
882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgsub aesimc	{ aescommon(0xdb,@_); }
892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgsub aesenc	{ aescommon(0xdc,@_); }
902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgsub aesenclast	{ aescommon(0xdd,@_); }
912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgsub aesdec	{ aescommon(0xde,@_); }
922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgsub aesdeclast	{ aescommon(0xdf,@_); }
932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# Inline version of internal aesni_[en|de]crypt1
952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ my $sn;
962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgsub aesni_inline_generate1
972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ my ($p,$inout,$ivec)=@_; $inout=$inout0 if (!defined($inout));
982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org  $sn++;
992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
1002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    &$movekey		($rndkey0,&QWP(0,$key));
1012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    &$movekey		($rndkey1,&QWP(16,$key));
1022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    &xorps		($ivec,$rndkey0)	if (defined($ivec));
1032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    &lea		($key,&DWP(32,$key));
1042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    &xorps		($inout,$ivec)		if (defined($ivec));
1052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    &xorps		($inout,$rndkey0)	if (!defined($ivec));
1062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    &set_label("${p}1_loop_$sn");
1072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout,$rndkey1)";
1082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&dec		($rounds);
1092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey1,&QWP(0,$key));
1102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea		($key,&DWP(16,$key));
1112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    &jnz		(&label("${p}1_loop_$sn"));
1122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    eval"&aes${p}last	($inout,$rndkey1)";
1132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org}}
1142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
1152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgsub aesni_generate1	# fully unrolled loop
1162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ my ($p,$inout)=@_; $inout=$inout0 if (!defined($inout));
1172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
1182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    &function_begin_B("_aesni_${p}rypt1");
1192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups		($rndkey0,&QWP(0,$key));
1202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey1,&QWP(0x10,$key));
1212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps		($inout,$rndkey0);
1222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey0,&QWP(0x20,$key));
1232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea		($key,&DWP(0x30,$key));
1242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&cmp		($rounds,11);
1252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jb		(&label("${p}128"));
1262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea		($key,&DWP(0x20,$key));
1272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&je		(&label("${p}192"));
1282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea		($key,&DWP(0x20,$key));
1292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout,$rndkey1)";
1302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey1,&QWP(-0x40,$key));
1312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout,$rndkey0)";
1322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey0,&QWP(-0x30,$key));
1332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    &set_label("${p}192");
1342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout,$rndkey1)";
1352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey1,&QWP(-0x20,$key));
1362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout,$rndkey0)";
1372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey0,&QWP(-0x10,$key));
1382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    &set_label("${p}128");
1392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout,$rndkey1)";
1402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey1,&QWP(0,$key));
1412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout,$rndkey0)";
1422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey0,&QWP(0x10,$key));
1432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout,$rndkey1)";
1442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey1,&QWP(0x20,$key));
1452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout,$rndkey0)";
1462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey0,&QWP(0x30,$key));
1472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout,$rndkey1)";
1482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey1,&QWP(0x40,$key));
1492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout,$rndkey0)";
1502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey0,&QWP(0x50,$key));
1512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout,$rndkey1)";
1522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey1,&QWP(0x60,$key));
1532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout,$rndkey0)";
1542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey0,&QWP(0x70,$key));
1552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout,$rndkey1)";
1562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    eval"&aes${p}last	($inout,$rndkey0)";
1572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    &ret();
1582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    &function_end_B("_aesni_${p}rypt1");
1592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org}
1602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
1612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# void $PREFIX_encrypt (const void *inp,void *out,const AES_KEY *key);
1622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&aesni_generate1("enc") if (!$inline);
1632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_begin_B("${PREFIX}_encrypt");
1642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	("eax",&wparam(0));
1652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key,&wparam(2));
1662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout0,&QWP(0,"eax"));
1672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds,&DWP(240,$key));
1682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	("eax",&wparam(1));
1692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	if ($inline)
1702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	{   &aesni_inline_generate1("enc");	}
1712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	else
1722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	{   &call	("_aesni_encrypt1");	}
1732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,"eax"),$inout0);
1742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&ret	();
1752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_end_B("${PREFIX}_encrypt");
1762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
1772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# void $PREFIX_decrypt (const void *inp,void *out,const AES_KEY *key);
1782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&aesni_generate1("dec") if(!$inline);
1792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_begin_B("${PREFIX}_decrypt");
1802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	("eax",&wparam(0));
1812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key,&wparam(2));
1822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout0,&QWP(0,"eax"));
1832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds,&DWP(240,$key));
1842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	("eax",&wparam(1));
1852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	if ($inline)
1862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	{   &aesni_inline_generate1("dec");	}
1872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	else
1882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	{   &call	("_aesni_decrypt1");	}
1892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,"eax"),$inout0);
1902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&ret	();
1912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_end_B("${PREFIX}_decrypt");
1922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
1932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# _aesni_[en|de]cryptN are private interfaces, N denotes interleave
1942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# factor. Why 3x subroutine were originally used in loops? Even though
1952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# aes[enc|dec] latency was originally 6, it could be scheduled only
1962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# every *2nd* cycle. Thus 3x interleave was the one providing optimal
1972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# utilization, i.e. when subroutine's throughput is virtually same as
1982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# of non-interleaved subroutine [for number of input blocks up to 3].
1992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# This is why it makes no sense to implement 2x subroutine.
2002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# aes[enc|dec] latency in next processor generation is 8, but the
2012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# instructions can be scheduled every cycle. Optimal interleave for
2022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# new processor is therefore 8x, but it's unfeasible to accommodate it
2032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# in XMM registers addreassable in 32-bit mode and therefore 6x is
2042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# used instead...
2052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
2062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgsub aesni_generate3
2072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ my $p=shift;
2082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
2092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    &function_begin_B("_aesni_${p}rypt3");
2102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey0,&QWP(0,$key));
2112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&shr		($rounds,1);
2122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey1,&QWP(16,$key));
2132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea		($key,&DWP(32,$key));
2142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps		($inout0,$rndkey0);
2152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor		($inout1,$rndkey0);
2162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor		($inout2,$rndkey0);
2172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey0,&QWP(0,$key));
2182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
2192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    &set_label("${p}3_loop");
2202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout0,$rndkey1)";
2212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout1,$rndkey1)";
2222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&dec		($rounds);
2232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout2,$rndkey1)";
2242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey1,&QWP(16,$key));
2252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout0,$rndkey0)";
2262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout1,$rndkey0)";
2272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea		($key,&DWP(32,$key));
2282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout2,$rndkey0)";
2292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey0,&QWP(0,$key));
2302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jnz		(&label("${p}3_loop"));
2312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    eval"&aes${p}	($inout0,$rndkey1)";
2322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    eval"&aes${p}	($inout1,$rndkey1)";
2332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    eval"&aes${p}	($inout2,$rndkey1)";
2342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    eval"&aes${p}last	($inout0,$rndkey0)";
2352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    eval"&aes${p}last	($inout1,$rndkey0)";
2362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    eval"&aes${p}last	($inout2,$rndkey0)";
2372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    &ret();
2382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    &function_end_B("_aesni_${p}rypt3");
2392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org}
2402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
2412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# 4x interleave is implemented to improve small block performance,
2422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# most notably [and naturally] 4 block by ~30%. One can argue that one
2432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# should have implemented 5x as well, but improvement  would be <20%,
2442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# so it's not worth it...
2452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgsub aesni_generate4
2462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ my $p=shift;
2472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
2482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    &function_begin_B("_aesni_${p}rypt4");
2492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey0,&QWP(0,$key));
2502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey1,&QWP(16,$key));
2512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&shr		($rounds,1);
2522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea		($key,&DWP(32,$key));
2532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps		($inout0,$rndkey0);
2542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor		($inout1,$rndkey0);
2552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor		($inout2,$rndkey0);
2562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor		($inout3,$rndkey0);
2572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey0,&QWP(0,$key));
2582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
2592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    &set_label("${p}4_loop");
2602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout0,$rndkey1)";
2612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout1,$rndkey1)";
2622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&dec		($rounds);
2632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout2,$rndkey1)";
2642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout3,$rndkey1)";
2652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey1,&QWP(16,$key));
2662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout0,$rndkey0)";
2672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout1,$rndkey0)";
2682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea		($key,&DWP(32,$key));
2692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout2,$rndkey0)";
2702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout3,$rndkey0)";
2712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey0,&QWP(0,$key));
2722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    &jnz		(&label("${p}4_loop"));
2732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
2742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    eval"&aes${p}	($inout0,$rndkey1)";
2752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    eval"&aes${p}	($inout1,$rndkey1)";
2762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    eval"&aes${p}	($inout2,$rndkey1)";
2772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    eval"&aes${p}	($inout3,$rndkey1)";
2782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    eval"&aes${p}last	($inout0,$rndkey0)";
2792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    eval"&aes${p}last	($inout1,$rndkey0)";
2802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    eval"&aes${p}last	($inout2,$rndkey0)";
2812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    eval"&aes${p}last	($inout3,$rndkey0)";
2822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    &ret();
2832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    &function_end_B("_aesni_${p}rypt4");
2842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org}
2852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
2862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgsub aesni_generate6
2872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ my $p=shift;
2882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
2892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    &function_begin_B("_aesni_${p}rypt6");
2902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    &static_label("_aesni_${p}rypt6_enter");
2912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey0,&QWP(0,$key));
2922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&shr		($rounds,1);
2932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey1,&QWP(16,$key));
2942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea		($key,&DWP(32,$key));
2952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps		($inout0,$rndkey0);
2962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor		($inout1,$rndkey0);	# pxor does better here
2972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout0,$rndkey1)";
2982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor		($inout2,$rndkey0);
2992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout1,$rndkey1)";
3002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor		($inout3,$rndkey0);
3012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&dec		($rounds);
3022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout2,$rndkey1)";
3032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor		($inout4,$rndkey0);
3042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout3,$rndkey1)";
3052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor		($inout5,$rndkey0);
3062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout4,$rndkey1)";
3072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey0,&QWP(0,$key));
3082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout5,$rndkey1)";
3092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp		(&label("_aesni_${p}rypt6_enter"));
3102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
3112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    &set_label("${p}6_loop",16);
3122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout0,$rndkey1)";
3132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout1,$rndkey1)";
3142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&dec		($rounds);
3152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout2,$rndkey1)";
3162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout3,$rndkey1)";
3172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout4,$rndkey1)";
3182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout5,$rndkey1)";
3192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    &set_label("_aesni_${p}rypt6_enter",16);
3202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey1,&QWP(16,$key));
3212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout0,$rndkey0)";
3222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout1,$rndkey0)";
3232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea		($key,&DWP(32,$key));
3242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout2,$rndkey0)";
3252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout3,$rndkey0)";
3262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout4,$rndkey0)";
3272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	eval"&aes${p}	($inout5,$rndkey0)";
3282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey0,&QWP(0,$key));
3292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    &jnz		(&label("${p}6_loop"));
3302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
3312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    eval"&aes${p}	($inout0,$rndkey1)";
3322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    eval"&aes${p}	($inout1,$rndkey1)";
3332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    eval"&aes${p}	($inout2,$rndkey1)";
3342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    eval"&aes${p}	($inout3,$rndkey1)";
3352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    eval"&aes${p}	($inout4,$rndkey1)";
3362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    eval"&aes${p}	($inout5,$rndkey1)";
3372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    eval"&aes${p}last	($inout0,$rndkey0)";
3382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    eval"&aes${p}last	($inout1,$rndkey0)";
3392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    eval"&aes${p}last	($inout2,$rndkey0)";
3402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    eval"&aes${p}last	($inout3,$rndkey0)";
3412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    eval"&aes${p}last	($inout4,$rndkey0)";
3422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    eval"&aes${p}last	($inout5,$rndkey0)";
3432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    &ret();
3442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org    &function_end_B("_aesni_${p}rypt6");
3452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org}
3462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&aesni_generate3("enc") if ($PREFIX eq "aesni");
3472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&aesni_generate3("dec");
3482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&aesni_generate4("enc") if ($PREFIX eq "aesni");
3492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&aesni_generate4("dec");
3502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&aesni_generate6("enc") if ($PREFIX eq "aesni");
3512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&aesni_generate6("dec");
3522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
3532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.orgif ($PREFIX eq "aesni") {
3542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org######################################################################
3552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# void aesni_ecb_encrypt (const void *in, void *out,
3562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#                         size_t length, const AES_KEY *key,
3572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#                         int enc);
3582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_begin("aesni_ecb_encrypt");
3592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($inp,&wparam(0));
3602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($out,&wparam(1));
3612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($len,&wparam(2));
3622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key,&wparam(3));
3632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds_,&wparam(4));
3642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&and	($len,-16);
3652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jz	(&label("ecb_ret"));
3662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds,&DWP(240,$key));
3672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&test	($rounds_,$rounds_);
3682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jz	(&label("ecb_decrypt"));
3692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
3702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key_,$key);		# backup $key
3712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds_,$rounds);	# backup $rounds
3722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&cmp	($len,0x60);
3732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jb	(&label("ecb_enc_tail"));
3742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
3752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout0,&QWP(0,$inp));
3762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout1,&QWP(0x10,$inp));
3772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout2,&QWP(0x20,$inp));
3782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout3,&QWP(0x30,$inp));
3792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout4,&QWP(0x40,$inp));
3802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout5,&QWP(0x50,$inp));
3812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($inp,&DWP(0x60,$inp));
3822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&sub	($len,0x60);
3832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("ecb_enc_loop6_enter"));
3842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
3852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ecb_enc_loop6",16);
3862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,$out),$inout0);
3872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout0,&QWP(0,$inp));
3882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x10,$out),$inout1);
3892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout1,&QWP(0x10,$inp));
3902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x20,$out),$inout2);
3912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout2,&QWP(0x20,$inp));
3922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x30,$out),$inout3);
3932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout3,&QWP(0x30,$inp));
3942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x40,$out),$inout4);
3952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout4,&QWP(0x40,$inp));
3962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x50,$out),$inout5);
3972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($out,&DWP(0x60,$out));
3982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout5,&QWP(0x50,$inp));
3992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($inp,&DWP(0x60,$inp));
4002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ecb_enc_loop6_enter");
4012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
4022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call	("_aesni_encrypt6");
4032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
4042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key,$key_);		# restore $key
4052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds,$rounds_);	# restore $rounds
4062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&sub	($len,0x60);
4072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jnc	(&label("ecb_enc_loop6"));
4082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
4092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,$out),$inout0);
4102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x10,$out),$inout1);
4112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x20,$out),$inout2);
4122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x30,$out),$inout3);
4132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x40,$out),$inout4);
4142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x50,$out),$inout5);
4152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($out,&DWP(0x60,$out));
4162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&add	($len,0x60);
4172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jz	(&label("ecb_ret"));
4182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
4192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ecb_enc_tail");
4202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout0,&QWP(0,$inp));
4212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&cmp	($len,0x20);
4222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jb	(&label("ecb_enc_one"));
4232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout1,&QWP(0x10,$inp));
4242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&je	(&label("ecb_enc_two"));
4252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout2,&QWP(0x20,$inp));
4262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&cmp	($len,0x40);
4272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jb	(&label("ecb_enc_three"));
4282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout3,&QWP(0x30,$inp));
4292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&je	(&label("ecb_enc_four"));
4302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout4,&QWP(0x40,$inp));
4312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout5,$inout5);
4322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call	("_aesni_encrypt6");
4332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,$out),$inout0);
4342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x10,$out),$inout1);
4352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x20,$out),$inout2);
4362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x30,$out),$inout3);
4372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x40,$out),$inout4);
4382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	jmp	(&label("ecb_ret"));
4392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
4402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ecb_enc_one",16);
4412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	if ($inline)
4422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	{   &aesni_inline_generate1("enc");	}
4432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	else
4442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	{   &call	("_aesni_encrypt1");	}
4452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,$out),$inout0);
4462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("ecb_ret"));
4472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
4482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ecb_enc_two",16);
4492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout2,$inout2);
4502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call	("_aesni_encrypt3");
4512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,$out),$inout0);
4522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x10,$out),$inout1);
4532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("ecb_ret"));
4542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
4552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ecb_enc_three",16);
4562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call	("_aesni_encrypt3");
4572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,$out),$inout0);
4582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x10,$out),$inout1);
4592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x20,$out),$inout2);
4602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("ecb_ret"));
4612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
4622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ecb_enc_four",16);
4632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call	("_aesni_encrypt4");
4642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,$out),$inout0);
4652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x10,$out),$inout1);
4662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x20,$out),$inout2);
4672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x30,$out),$inout3);
4682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("ecb_ret"));
4692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org######################################################################
4702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ecb_decrypt",16);
4712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key_,$key);		# backup $key
4722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds_,$rounds);	# backup $rounds
4732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&cmp	($len,0x60);
4742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jb	(&label("ecb_dec_tail"));
4752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
4762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout0,&QWP(0,$inp));
4772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout1,&QWP(0x10,$inp));
4782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout2,&QWP(0x20,$inp));
4792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout3,&QWP(0x30,$inp));
4802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout4,&QWP(0x40,$inp));
4812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout5,&QWP(0x50,$inp));
4822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($inp,&DWP(0x60,$inp));
4832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&sub	($len,0x60);
4842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("ecb_dec_loop6_enter"));
4852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
4862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ecb_dec_loop6",16);
4872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,$out),$inout0);
4882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout0,&QWP(0,$inp));
4892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x10,$out),$inout1);
4902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout1,&QWP(0x10,$inp));
4912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x20,$out),$inout2);
4922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout2,&QWP(0x20,$inp));
4932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x30,$out),$inout3);
4942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout3,&QWP(0x30,$inp));
4952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x40,$out),$inout4);
4962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout4,&QWP(0x40,$inp));
4972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x50,$out),$inout5);
4982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($out,&DWP(0x60,$out));
4992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout5,&QWP(0x50,$inp));
5002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($inp,&DWP(0x60,$inp));
5012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ecb_dec_loop6_enter");
5022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
5032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call	("_aesni_decrypt6");
5042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
5052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key,$key_);		# restore $key
5062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds,$rounds_);	# restore $rounds
5072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&sub	($len,0x60);
5082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jnc	(&label("ecb_dec_loop6"));
5092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
5102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,$out),$inout0);
5112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x10,$out),$inout1);
5122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x20,$out),$inout2);
5132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x30,$out),$inout3);
5142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x40,$out),$inout4);
5152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x50,$out),$inout5);
5162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($out,&DWP(0x60,$out));
5172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&add	($len,0x60);
5182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jz	(&label("ecb_ret"));
5192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
5202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ecb_dec_tail");
5212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout0,&QWP(0,$inp));
5222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&cmp	($len,0x20);
5232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jb	(&label("ecb_dec_one"));
5242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout1,&QWP(0x10,$inp));
5252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&je	(&label("ecb_dec_two"));
5262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout2,&QWP(0x20,$inp));
5272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&cmp	($len,0x40);
5282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jb	(&label("ecb_dec_three"));
5292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout3,&QWP(0x30,$inp));
5302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&je	(&label("ecb_dec_four"));
5312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout4,&QWP(0x40,$inp));
5322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout5,$inout5);
5332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call	("_aesni_decrypt6");
5342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,$out),$inout0);
5352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x10,$out),$inout1);
5362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x20,$out),$inout2);
5372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x30,$out),$inout3);
5382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x40,$out),$inout4);
5392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("ecb_ret"));
5402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
5412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ecb_dec_one",16);
5422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	if ($inline)
5432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	{   &aesni_inline_generate1("dec");	}
5442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	else
5452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	{   &call	("_aesni_decrypt1");	}
5462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,$out),$inout0);
5472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("ecb_ret"));
5482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
5492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ecb_dec_two",16);
5502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout2,$inout2);
5512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call	("_aesni_decrypt3");
5522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,$out),$inout0);
5532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x10,$out),$inout1);
5542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("ecb_ret"));
5552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
5562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ecb_dec_three",16);
5572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call	("_aesni_decrypt3");
5582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,$out),$inout0);
5592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x10,$out),$inout1);
5602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x20,$out),$inout2);
5612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("ecb_ret"));
5622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
5632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ecb_dec_four",16);
5642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call	("_aesni_decrypt4");
5652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,$out),$inout0);
5662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x10,$out),$inout1);
5672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x20,$out),$inout2);
5682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x30,$out),$inout3);
5692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
5702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ecb_ret");
5712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_end("aesni_ecb_encrypt");
5722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
5732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org######################################################################
5742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# void aesni_ccm64_[en|de]crypt_blocks (const void *in, void *out,
5752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#                         size_t blocks, const AES_KEY *key,
5762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#                         const char *ivec,char *cmac);
5772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#
5782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# Handles only complete blocks, operates on 64-bit counter and
5792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# does not update *ivec! Nor does it finalize CMAC value
5802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# (see engine/eng_aesni.c for details)
5812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#
5822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ my $cmac=$inout1;
5832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_begin("aesni_ccm64_encrypt_blocks");
5842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($inp,&wparam(0));
5852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($out,&wparam(1));
5862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($len,&wparam(2));
5872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key,&wparam(3));
5882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds_,&wparam(4));
5892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds,&wparam(5));
5902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key_,"esp");
5912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&sub	("esp",60);
5922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&and	("esp",-16);			# align stack
5932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(48,"esp"),$key_);
5942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
5952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($ivec,&QWP(0,$rounds_));	# load ivec
5962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($cmac,&QWP(0,$rounds));	# load cmac
5972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds,&DWP(240,$key));
5982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
5992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	# compose byte-swap control mask for pshufb on stack
6002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(0,"esp"),0x0c0d0e0f);
6012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(4,"esp"),0x08090a0b);
6022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(8,"esp"),0x04050607);
6032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(12,"esp"),0x00010203);
6042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
6052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	# compose counter increment vector on stack
6062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds_,1);
6072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xor	($key_,$key_);
6082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(16,"esp"),$rounds_);
6092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(20,"esp"),$key_);
6102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(24,"esp"),$key_);
6112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(28,"esp"),$key_);
6122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
6132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&shr	($rounds,1);
6142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($key_,&DWP(0,$key));
6152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	($inout3,&QWP(0,"esp"));
6162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	($inout0,$ivec);
6172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds_,$rounds);
6182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pshufb	($ivec,$inout3);
6192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
6202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ccm64_enc_outer");
6212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey0,&QWP(0,$key_));
6222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov		($rounds,$rounds_);
6232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups		($in0,&QWP(0,$inp));
6242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
6252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps		($inout0,$rndkey0);
6262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey1,&QWP(16,$key_));
6272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps		($rndkey0,$in0);
6282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea		($key,&DWP(32,$key_));
6292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps		($cmac,$rndkey0);		# cmac^=inp
6302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey0,&QWP(0,$key));
6312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
6322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ccm64_enc2_loop");
6332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aesenc		($inout0,$rndkey1);
6342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&dec		($rounds);
6352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aesenc		($cmac,$rndkey1);
6362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey1,&QWP(16,$key));
6372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aesenc		($inout0,$rndkey0);
6382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea		($key,&DWP(32,$key));
6392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aesenc		($cmac,$rndkey0);
6402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey0,&QWP(0,$key));
6412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jnz		(&label("ccm64_enc2_loop"));
6422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aesenc		($inout0,$rndkey1);
6432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aesenc		($cmac,$rndkey1);
6442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&paddq		($ivec,&QWP(16,"esp"));
6452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aesenclast	($inout0,$rndkey0);
6462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aesenclast	($cmac,$rndkey0);
6472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
6482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&dec	($len);
6492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($inp,&DWP(16,$inp));
6502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($in0,$inout0);			# inp^=E(ivec)
6512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	($inout0,$ivec);
6522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,$out),$in0);		# save output
6532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($out,&DWP(16,$out));
6542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pshufb	($inout0,$inout3);
6552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jnz	(&label("ccm64_enc_outer"));
6562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
6572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	("esp",&DWP(48,"esp"));
6582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($out,&wparam(5));
6592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,$out),$cmac);
6602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_end("aesni_ccm64_encrypt_blocks");
6612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
6622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_begin("aesni_ccm64_decrypt_blocks");
6632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($inp,&wparam(0));
6642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($out,&wparam(1));
6652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($len,&wparam(2));
6662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key,&wparam(3));
6672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds_,&wparam(4));
6682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds,&wparam(5));
6692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key_,"esp");
6702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&sub	("esp",60);
6712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&and	("esp",-16);			# align stack
6722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(48,"esp"),$key_);
6732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
6742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($ivec,&QWP(0,$rounds_));	# load ivec
6752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($cmac,&QWP(0,$rounds));	# load cmac
6762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds,&DWP(240,$key));
6772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
6782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	# compose byte-swap control mask for pshufb on stack
6792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(0,"esp"),0x0c0d0e0f);
6802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(4,"esp"),0x08090a0b);
6812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(8,"esp"),0x04050607);
6822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(12,"esp"),0x00010203);
6832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
6842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	# compose counter increment vector on stack
6852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds_,1);
6862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xor	($key_,$key_);
6872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(16,"esp"),$rounds_);
6882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(20,"esp"),$key_);
6892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(24,"esp"),$key_);
6902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(28,"esp"),$key_);
6912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
6922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	($inout3,&QWP(0,"esp"));	# bswap mask
6932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	($inout0,$ivec);
6942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
6952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key_,$key);
6962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds_,$rounds);
6972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
6982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pshufb	($ivec,$inout3);
6992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	if ($inline)
7002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	{   &aesni_inline_generate1("enc");	}
7012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	else
7022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	{   &call	("_aesni_encrypt1");	}
7032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($in0,&QWP(0,$inp));		# load inp
7042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&paddq	($ivec,&QWP(16,"esp"));
7052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($inp,&QWP(16,$inp));
7062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("ccm64_dec_outer"));
7072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
7082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ccm64_dec_outer",16);
7092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($in0,$inout0);			# inp ^= E(ivec)
7102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	($inout0,$ivec);
7112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds,$rounds_);
7122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,$out),$in0);		# save output
7132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($out,&DWP(16,$out));
7142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pshufb	($inout0,$inout3);
7152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
7162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&sub	($len,1);
7172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jz	(&label("ccm64_dec_break"));
7182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
7192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey0,&QWP(0,$key_));
7202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&shr		($rounds,1);
7212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey1,&QWP(16,$key_));
7222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps		($in0,$rndkey0);
7232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea		($key,&DWP(32,$key_));
7242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps		($inout0,$rndkey0);
7252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps		($cmac,$in0);		# cmac^=out
7262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey0,&QWP(0,$key));
7272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
7282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ccm64_dec2_loop");
7292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aesenc		($inout0,$rndkey1);
7302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&dec		($rounds);
7312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aesenc		($cmac,$rndkey1);
7322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey1,&QWP(16,$key));
7332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aesenc		($inout0,$rndkey0);
7342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea		($key,&DWP(32,$key));
7352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aesenc		($cmac,$rndkey0);
7362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey0,&QWP(0,$key));
7372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jnz		(&label("ccm64_dec2_loop"));
7382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups		($in0,&QWP(0,$inp));	# load inp
7392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&paddq		($ivec,&QWP(16,"esp"));
7402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aesenc		($inout0,$rndkey1);
7412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aesenc		($cmac,$rndkey1);
7422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea		($inp,&QWP(16,$inp));
7432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aesenclast	($inout0,$rndkey0);
7442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aesenclast	($cmac,$rndkey0);
7452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("ccm64_dec_outer"));
7462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
7472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ccm64_dec_break",16);
7482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key,$key_);
7492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	if ($inline)
7502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	{   &aesni_inline_generate1("enc",$cmac,$in0);	}
7512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	else
7522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	{   &call	("_aesni_encrypt1",$cmac);	}
7532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
7542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	("esp",&DWP(48,"esp"));
7552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($out,&wparam(5));
7562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,$out),$cmac);
7572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_end("aesni_ccm64_decrypt_blocks");
7582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org}
7592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
7602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org######################################################################
7612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# void aesni_ctr32_encrypt_blocks (const void *in, void *out,
7622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#                         size_t blocks, const AES_KEY *key,
7632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#                         const char *ivec);
7642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#
7652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# Handles only complete blocks, operates on 32-bit counter and
7662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# does not update *ivec! (see engine/eng_aesni.c for details)
7672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#
7682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# stack layout:
7692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#	0	pshufb mask
7702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#	16	vector addend: 0,6,6,6
7712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# 	32	counter-less ivec
7722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#	48	1st triplet of counter vector
7732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#	64	2nd triplet of counter vector
7742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#	80	saved %esp
7752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
7762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_begin("aesni_ctr32_encrypt_blocks");
7772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($inp,&wparam(0));
7782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($out,&wparam(1));
7792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($len,&wparam(2));
7802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key,&wparam(3));
7812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds_,&wparam(4));
7822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key_,"esp");
7832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&sub	("esp",88);
7842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&and	("esp",-16);			# align stack
7852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(80,"esp"),$key_);
7862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
7872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&cmp	($len,1);
7882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&je	(&label("ctr32_one_shortcut"));
7892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
7902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout5,&QWP(0,$rounds_));	# load ivec
7912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
7922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	# compose byte-swap control mask for pshufb on stack
7932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(0,"esp"),0x0c0d0e0f);
7942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(4,"esp"),0x08090a0b);
7952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(8,"esp"),0x04050607);
7962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(12,"esp"),0x00010203);
7972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
7982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	# compose counter increment vector on stack
7992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds,6);
8002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xor	($key_,$key_);
8012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(16,"esp"),$rounds);
8022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(20,"esp"),$rounds);
8032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(24,"esp"),$rounds);
8042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(28,"esp"),$key_);
8052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
8062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pextrd	($rounds_,$inout5,3);		# pull 32-bit counter
8072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pinsrd	($inout5,$key_,3);		# wipe 32-bit counter
8082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
8092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds,&DWP(240,$key));	# key->rounds
8102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
8112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	# compose 2 vectors of 3x32-bit counters
8122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&bswap	($rounds_);
8132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($rndkey1,$rndkey1);
8142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($rndkey0,$rndkey0);
8152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	($inout0,&QWP(0,"esp"));	# load byte-swap mask
8162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pinsrd	($rndkey1,$rounds_,0);
8172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($key_,&DWP(3,$rounds_));
8182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pinsrd	($rndkey0,$key_,0);
8192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&inc	($rounds_);
8202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pinsrd	($rndkey1,$rounds_,1);
8212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&inc	($key_);
8222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pinsrd	($rndkey0,$key_,1);
8232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&inc	($rounds_);
8242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pinsrd	($rndkey1,$rounds_,2);
8252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&inc	($key_);
8262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pinsrd	($rndkey0,$key_,2);
8272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	(&QWP(48,"esp"),$rndkey1);	# save 1st triplet
8282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pshufb	($rndkey1,$inout0);		# byte swap
8292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	(&QWP(64,"esp"),$rndkey0);	# save 2nd triplet
8302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pshufb	($rndkey0,$inout0);		# byte swap
8312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
8322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pshufd	($inout0,$rndkey1,3<<6);	# place counter to upper dword
8332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pshufd	($inout1,$rndkey1,2<<6);
8342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&cmp	($len,6);
8352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jb	(&label("ctr32_tail"));
8362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	(&QWP(32,"esp"),$inout5);	# save counter-less ivec
8372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&shr	($rounds,1);
8382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key_,$key);			# backup $key
8392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds_,$rounds);		# backup $rounds
8402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&sub	($len,6);
8412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("ctr32_loop6"));
8422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
8432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ctr32_loop6",16);
8442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pshufd	($inout2,$rndkey1,1<<6);
8452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	($rndkey1,&QWP(32,"esp"));	# pull counter-less ivec
8462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pshufd	($inout3,$rndkey0,3<<6);
8472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&por	($inout0,$rndkey1);		# merge counter-less ivec
8482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pshufd	($inout4,$rndkey0,2<<6);
8492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&por	($inout1,$rndkey1);
8502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pshufd	($inout5,$rndkey0,1<<6);
8512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&por	($inout2,$rndkey1);
8522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&por	($inout3,$rndkey1);
8532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&por	($inout4,$rndkey1);
8542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&por	($inout5,$rndkey1);
8552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
8562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	# inlining _aesni_encrypt6's prologue gives ~4% improvement...
8572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey0,&QWP(0,$key_));
8582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey1,&QWP(16,$key_));
8592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea		($key,&DWP(32,$key_));
8602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&dec		($rounds);
8612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor		($inout0,$rndkey0);
8622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor		($inout1,$rndkey0);
8632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aesenc		($inout0,$rndkey1);
8642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor		($inout2,$rndkey0);
8652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aesenc		($inout1,$rndkey1);
8662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor		($inout3,$rndkey0);
8672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aesenc		($inout2,$rndkey1);
8682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor		($inout4,$rndkey0);
8692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aesenc		($inout3,$rndkey1);
8702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor		($inout5,$rndkey0);
8712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aesenc		($inout4,$rndkey1);
8722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	($rndkey0,&QWP(0,$key));
8732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aesenc		($inout5,$rndkey1);
8742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
8752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call		(&label("_aesni_encrypt6_enter"));
8762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
8772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($rndkey1,&QWP(0,$inp));
8782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($rndkey0,&QWP(0x10,$inp));
8792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout0,$rndkey1);
8802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($rndkey1,&QWP(0x20,$inp));
8812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout1,$rndkey0);
8822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,$out),$inout0);
8832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	($rndkey0,&QWP(16,"esp"));	# load increment
8842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout2,$rndkey1);
8852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	($rndkey1,&QWP(48,"esp"));	# load 1st triplet
8862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x10,$out),$inout1);
8872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x20,$out),$inout2);
8882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
8892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&paddd	($rndkey1,$rndkey0);		# 1st triplet increment
8902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&paddd	($rndkey0,&QWP(64,"esp"));	# 2nd triplet increment
8912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	($inout0,&QWP(0,"esp"));	# load byte swap mask
8922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
8932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout1,&QWP(0x30,$inp));
8942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout2,&QWP(0x40,$inp));
8952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout3,$inout1);
8962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout1,&QWP(0x50,$inp));
8972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($inp,&DWP(0x60,$inp));
8982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	(&QWP(48,"esp"),$rndkey1);	# save 1st triplet
8992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pshufb	($rndkey1,$inout0);		# byte swap
9002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout4,$inout2);
9012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x30,$out),$inout3);
9022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout5,$inout1);
9032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	(&QWP(64,"esp"),$rndkey0);	# save 2nd triplet
9042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pshufb	($rndkey0,$inout0);		# byte swap
9052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x40,$out),$inout4);
9062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pshufd	($inout0,$rndkey1,3<<6);
9072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x50,$out),$inout5);
9082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($out,&DWP(0x60,$out));
9092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
9102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds,$rounds_);
9112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pshufd	($inout1,$rndkey1,2<<6);
9122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&sub	($len,6);
9132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jnc	(&label("ctr32_loop6"));
9142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
9152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&add	($len,6);
9162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jz	(&label("ctr32_ret"));
9172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key,$key_);
9182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($rounds,&DWP(1,"",$rounds,2));	# restore $rounds
9192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	($inout5,&QWP(32,"esp"));	# pull count-less ivec
9202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
9212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ctr32_tail");
9222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&por	($inout0,$inout5);
9232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&cmp	($len,2);
9242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jb	(&label("ctr32_one"));
9252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
9262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pshufd	($inout2,$rndkey1,1<<6);
9272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&por	($inout1,$inout5);
9282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&je	(&label("ctr32_two"));
9292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
9302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pshufd	($inout3,$rndkey0,3<<6);
9312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&por	($inout2,$inout5);
9322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&cmp	($len,4);
9332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jb	(&label("ctr32_three"));
9342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
9352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pshufd	($inout4,$rndkey0,2<<6);
9362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&por	($inout3,$inout5);
9372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&je	(&label("ctr32_four"));
9382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
9392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&por	($inout4,$inout5);
9402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call	("_aesni_encrypt6");
9412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($rndkey1,&QWP(0,$inp));
9422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($rndkey0,&QWP(0x10,$inp));
9432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout0,$rndkey1);
9442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($rndkey1,&QWP(0x20,$inp));
9452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout1,$rndkey0);
9462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($rndkey0,&QWP(0x30,$inp));
9472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout2,$rndkey1);
9482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($rndkey1,&QWP(0x40,$inp));
9492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout3,$rndkey0);
9502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,$out),$inout0);
9512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout4,$rndkey1);
9522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x10,$out),$inout1);
9532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x20,$out),$inout2);
9542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x30,$out),$inout3);
9552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x40,$out),$inout4);
9562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("ctr32_ret"));
9572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
9582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ctr32_one_shortcut",16);
9592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout0,&QWP(0,$rounds_));	# load ivec
9602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds,&DWP(240,$key));
9612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
9622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ctr32_one");
9632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	if ($inline)
9642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	{   &aesni_inline_generate1("enc");	}
9652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	else
9662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	{   &call	("_aesni_encrypt1");	}
9672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($in0,&QWP(0,$inp));
9682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($in0,$inout0);
9692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,$out),$in0);
9702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("ctr32_ret"));
9712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
9722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ctr32_two",16);
9732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call	("_aesni_encrypt3");
9742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout3,&QWP(0,$inp));
9752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout4,&QWP(0x10,$inp));
9762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout0,$inout3);
9772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout1,$inout4);
9782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,$out),$inout0);
9792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x10,$out),$inout1);
9802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("ctr32_ret"));
9812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
9822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ctr32_three",16);
9832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call	("_aesni_encrypt3");
9842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout3,&QWP(0,$inp));
9852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout4,&QWP(0x10,$inp));
9862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout0,$inout3);
9872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout5,&QWP(0x20,$inp));
9882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout1,$inout4);
9892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,$out),$inout0);
9902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout2,$inout5);
9912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x10,$out),$inout1);
9922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x20,$out),$inout2);
9932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("ctr32_ret"));
9942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
9952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ctr32_four",16);
9962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call	("_aesni_encrypt4");
9972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout4,&QWP(0,$inp));
9982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout5,&QWP(0x10,$inp));
9992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($rndkey1,&QWP(0x20,$inp));
10002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout0,$inout4);
10012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($rndkey0,&QWP(0x30,$inp));
10022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout1,$inout5);
10032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,$out),$inout0);
10042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout2,$rndkey1);
10052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x10,$out),$inout1);
10062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout3,$rndkey0);
10072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x20,$out),$inout2);
10082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x30,$out),$inout3);
10092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
10102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("ctr32_ret");
10112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	("esp",&DWP(80,"esp"));
10122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_end("aesni_ctr32_encrypt_blocks");
10132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
10142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org######################################################################
10152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# void aesni_xts_[en|de]crypt(const char *inp,char *out,size_t len,
10162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#	const AES_KEY *key1, const AES_KEY *key2
10172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#	const unsigned char iv[16]);
10182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#
10192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org{ my ($tweak,$twtmp,$twres,$twmask)=($rndkey1,$rndkey0,$inout0,$inout1);
10202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
10212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_begin("aesni_xts_encrypt");
10222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key,&wparam(4));		# key2
10232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($inp,&wparam(5));		# clear-text tweak
10242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
10252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds,&DWP(240,$key));	# key2->rounds
10262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout0,&QWP(0,$inp));
10272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	if ($inline)
10282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	{   &aesni_inline_generate1("enc");	}
10292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	else
10302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	{   &call	("_aesni_encrypt1");	}
10312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
10322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($inp,&wparam(0));
10332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($out,&wparam(1));
10342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($len,&wparam(2));
10352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key,&wparam(3));		# key1
10362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
10372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key_,"esp");
10382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&sub	("esp",16*7+8);
10392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds,&DWP(240,$key));	# key1->rounds
10402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&and	("esp",-16);			# align stack
10412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
10422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(16*6+0,"esp"),0x87);	# compose the magic constant
10432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(16*6+4,"esp"),0);
10442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(16*6+8,"esp"),1);
10452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(16*6+12,"esp"),0);
10462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(16*7+0,"esp"),$len);	# save original $len
10472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(16*7+4,"esp"),$key_);	# save original %esp
10482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
10492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	($tweak,$inout0);
10502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($twtmp,$twtmp);
10512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	($twmask,&QWP(6*16,"esp"));	# 0x0...010...87
10522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pcmpgtd($twtmp,$tweak);		# broadcast upper bits
10532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
10542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&and	($len,-16);
10552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key_,$key);			# backup $key
10562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds_,$rounds);		# backup $rounds
10572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&sub	($len,16*6);
10582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jc	(&label("xts_enc_short"));
10592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
10602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&shr	($rounds,1);
10612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds_,$rounds);
10622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("xts_enc_loop6"));
10632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
10642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_enc_loop6",16);
10652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	for ($i=0;$i<4;$i++) {
10662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	    &pshufd	($twres,$twtmp,0x13);
10672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	    &pxor	($twtmp,$twtmp);
10682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	    &movdqa	(&QWP(16*$i,"esp"),$tweak);
10692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	    &paddq	($tweak,$tweak);	# &psllq($tweak,1);
10702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	    &pand	($twres,$twmask);	# isolate carry and residue
10712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	    &pcmpgtd	($twtmp,$tweak);	# broadcast upper bits
10722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	    &pxor	($tweak,$twres);
10732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	}
10742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pshufd	($inout5,$twtmp,0x13);
10752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	(&QWP(16*$i++,"esp"),$tweak);
10762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&paddq	($tweak,$tweak);		# &psllq($tweak,1);
10772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	 &$movekey	($rndkey0,&QWP(0,$key_));
10782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pand	($inout5,$twmask);		# isolate carry and residue
10792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	 &movups	($inout0,&QWP(0,$inp));	# load input
10802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($inout5,$tweak);
10812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
10822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	# inline _aesni_encrypt6 prologue and flip xor with tweak and key[0]
10832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout1,&QWP(16*1,$inp));
10842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	 &xorps		($inout0,$rndkey0);	# input^=rndkey[0]
10852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout2,&QWP(16*2,$inp));
10862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	 &pxor		($inout1,$rndkey0);
10872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout3,&QWP(16*3,$inp));
10882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	 &pxor		($inout2,$rndkey0);
10892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout4,&QWP(16*4,$inp));
10902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	 &pxor		($inout3,$rndkey0);
10912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($rndkey1,&QWP(16*5,$inp));
10922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	 &pxor		($inout4,$rndkey0);
10932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($inp,&DWP(16*6,$inp));
10942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($inout0,&QWP(16*0,"esp"));	# input^=tweak
10952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	(&QWP(16*$i,"esp"),$inout5);	# save last tweak
10962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($inout5,$rndkey1);
10972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
10982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	 &$movekey	($rndkey1,&QWP(16,$key_));
10992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	 &lea		($key,&DWP(32,$key_));
11002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($inout1,&QWP(16*1,"esp"));
11012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	 &aesenc	($inout0,$rndkey1);
11022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($inout2,&QWP(16*2,"esp"));
11032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	 &aesenc	($inout1,$rndkey1);
11042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($inout3,&QWP(16*3,"esp"));
11052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	 &dec		($rounds);
11062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	 &aesenc	($inout2,$rndkey1);
11072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($inout4,&QWP(16*4,"esp"));
11082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	 &aesenc	($inout3,$rndkey1);
11092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor		($inout5,$rndkey0);
11102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	 &aesenc	($inout4,$rndkey1);
11112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	 &$movekey	($rndkey0,&QWP(0,$key));
11122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	 &aesenc	($inout5,$rndkey1);
11132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call		(&label("_aesni_encrypt6_enter"));
11142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
11152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	($tweak,&QWP(16*5,"esp"));	# last tweak
11162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org       &pxor	($twtmp,$twtmp);
11172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout0,&QWP(16*0,"esp"));	# output^=tweak
11182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org       &pcmpgtd	($twtmp,$tweak);		# broadcast upper bits
11192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout1,&QWP(16*1,"esp"));
11202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*0,$out),$inout0);	# write output
11212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout2,&QWP(16*2,"esp"));
11222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*1,$out),$inout1);
11232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout3,&QWP(16*3,"esp"));
11242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*2,$out),$inout2);
11252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout4,&QWP(16*4,"esp"));
11262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*3,$out),$inout3);
11272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout5,$tweak);
11282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*4,$out),$inout4);
11292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org       &pshufd	($twres,$twtmp,0x13);
11302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*5,$out),$inout5);
11312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($out,&DWP(16*6,$out));
11322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org       &movdqa	($twmask,&QWP(16*6,"esp"));	# 0x0...010...87
11332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
11342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($twtmp,$twtmp);
11352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&paddq	($tweak,$tweak);		# &psllq($tweak,1);
11362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pand	($twres,$twmask);		# isolate carry and residue
11372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pcmpgtd($twtmp,$tweak);		# broadcast upper bits
11382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds,$rounds_);		# restore $rounds
11392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($tweak,$twres);
11402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
11412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&sub	($len,16*6);
11422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jnc	(&label("xts_enc_loop6"));
11432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
11442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($rounds,&DWP(1,"",$rounds,2));	# restore $rounds
11452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key,$key_);			# restore $key
11462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds_,$rounds);
11472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
11482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_enc_short");
11492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&add	($len,16*6);
11502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jz	(&label("xts_enc_done6x"));
11512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
11522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	($inout3,$tweak);		# put aside previous tweak
11532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&cmp	($len,0x20);
11542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jb	(&label("xts_enc_one"));
11552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
11562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pshufd	($twres,$twtmp,0x13);
11572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($twtmp,$twtmp);
11582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&paddq	($tweak,$tweak);		# &psllq($tweak,1);
11592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pand	($twres,$twmask);		# isolate carry and residue
11602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pcmpgtd($twtmp,$tweak);		# broadcast upper bits
11612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($tweak,$twres);
11622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&je	(&label("xts_enc_two"));
11632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
11642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pshufd	($twres,$twtmp,0x13);
11652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($twtmp,$twtmp);
11662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	($inout4,$tweak);		# put aside previous tweak
11672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&paddq	($tweak,$tweak);		# &psllq($tweak,1);
11682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pand	($twres,$twmask);		# isolate carry and residue
11692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pcmpgtd($twtmp,$tweak);		# broadcast upper bits
11702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($tweak,$twres);
11712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&cmp	($len,0x40);
11722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jb	(&label("xts_enc_three"));
11732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
11742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pshufd	($twres,$twtmp,0x13);
11752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($twtmp,$twtmp);
11762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	($inout5,$tweak);		# put aside previous tweak
11772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&paddq	($tweak,$tweak);		# &psllq($tweak,1);
11782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pand	($twres,$twmask);		# isolate carry and residue
11792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pcmpgtd($twtmp,$tweak);		# broadcast upper bits
11802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($tweak,$twres);
11812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	(&QWP(16*0,"esp"),$inout3);
11822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	(&QWP(16*1,"esp"),$inout4);
11832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&je	(&label("xts_enc_four"));
11842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
11852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	(&QWP(16*2,"esp"),$inout5);
11862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pshufd	($inout5,$twtmp,0x13);
11872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	(&QWP(16*3,"esp"),$tweak);
11882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&paddq	($tweak,$tweak);		# &psllq($inout0,1);
11892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pand	($inout5,$twmask);		# isolate carry and residue
11902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($inout5,$tweak);
11912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
11922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout0,&QWP(16*0,$inp));	# load input
11932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout1,&QWP(16*1,$inp));
11942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout2,&QWP(16*2,$inp));
11952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($inout0,&QWP(16*0,"esp"));	# input^=tweak
11962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout3,&QWP(16*3,$inp));
11972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($inout1,&QWP(16*1,"esp"));
11982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout4,&QWP(16*4,$inp));
11992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($inout2,&QWP(16*2,"esp"));
12002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($inp,&DWP(16*5,$inp));
12012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($inout3,&QWP(16*3,"esp"));
12022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	(&QWP(16*4,"esp"),$inout5);	# save last tweak
12032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($inout4,$inout5);
12042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
12052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call	("_aesni_encrypt6");
12062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
12072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movaps	($tweak,&QWP(16*4,"esp"));	# last tweak
12082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout0,&QWP(16*0,"esp"));	# output^=tweak
12092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout1,&QWP(16*1,"esp"));
12102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout2,&QWP(16*2,"esp"));
12112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*0,$out),$inout0);	# write output
12122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout3,&QWP(16*3,"esp"));
12132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*1,$out),$inout1);
12142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout4,$tweak);
12152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*2,$out),$inout2);
12162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*3,$out),$inout3);
12172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*4,$out),$inout4);
12182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($out,&DWP(16*5,$out));
12192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("xts_enc_done"));
12202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
12212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_enc_one",16);
12222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout0,&QWP(16*0,$inp));	# load input
12232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($inp,&DWP(16*1,$inp));
12242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout0,$inout3);		# input^=tweak
12252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	if ($inline)
12262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	{   &aesni_inline_generate1("enc");	}
12272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	else
12282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	{   &call	("_aesni_encrypt1");	}
12292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout0,$inout3);		# output^=tweak
12302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*0,$out),$inout0);	# write output
12312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($out,&DWP(16*1,$out));
12322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
12332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	($tweak,$inout3);		# last tweak
12342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("xts_enc_done"));
12352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
12362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_enc_two",16);
12372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movaps	($inout4,$tweak);		# put aside last tweak
12382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
12392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout0,&QWP(16*0,$inp));	# load input
12402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout1,&QWP(16*1,$inp));
12412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($inp,&DWP(16*2,$inp));
12422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout0,$inout3);		# input^=tweak
12432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout1,$inout4);
12442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout2,$inout2);
12452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
12462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call	("_aesni_encrypt3");
12472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
12482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout0,$inout3);		# output^=tweak
12492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout1,$inout4);
12502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*0,$out),$inout0);	# write output
12512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*1,$out),$inout1);
12522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($out,&DWP(16*2,$out));
12532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
12542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	($tweak,$inout4);		# last tweak
12552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("xts_enc_done"));
12562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
12572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_enc_three",16);
12582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movaps	($inout5,$tweak);		# put aside last tweak
12592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout0,&QWP(16*0,$inp));	# load input
12602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout1,&QWP(16*1,$inp));
12612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout2,&QWP(16*2,$inp));
12622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($inp,&DWP(16*3,$inp));
12632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout0,$inout3);		# input^=tweak
12642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout1,$inout4);
12652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout2,$inout5);
12662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
12672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call	("_aesni_encrypt3");
12682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
12692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout0,$inout3);		# output^=tweak
12702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout1,$inout4);
12712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout2,$inout5);
12722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*0,$out),$inout0);	# write output
12732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*1,$out),$inout1);
12742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*2,$out),$inout2);
12752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($out,&DWP(16*3,$out));
12762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
12772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	($tweak,$inout5);		# last tweak
12782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("xts_enc_done"));
12792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
12802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_enc_four",16);
12812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movaps	($inout4,$tweak);		# put aside last tweak
12822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
12832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout0,&QWP(16*0,$inp));	# load input
12842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout1,&QWP(16*1,$inp));
12852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout2,&QWP(16*2,$inp));
12862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout0,&QWP(16*0,"esp"));	# input^=tweak
12872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout3,&QWP(16*3,$inp));
12882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($inp,&DWP(16*4,$inp));
12892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout1,&QWP(16*1,"esp"));
12902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout2,$inout5);
12912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout3,$inout4);
12922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
12932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call	("_aesni_encrypt4");
12942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
12952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout0,&QWP(16*0,"esp"));	# output^=tweak
12962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout1,&QWP(16*1,"esp"));
12972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout2,$inout5);
12982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*0,$out),$inout0);	# write output
12992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout3,$inout4);
13002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*1,$out),$inout1);
13012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*2,$out),$inout2);
13022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*3,$out),$inout3);
13032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($out,&DWP(16*4,$out));
13042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
13052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	($tweak,$inout4);		# last tweak
13062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("xts_enc_done"));
13072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
13082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_enc_done6x",16);		# $tweak is pre-calculated
13092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($len,&DWP(16*7+0,"esp"));	# restore original $len
13102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&and	($len,15);
13112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jz	(&label("xts_enc_ret"));
13122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	($inout3,$tweak);
13132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(16*7+0,"esp"),$len);	# save $len%16
13142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("xts_enc_steal"));
13152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
13162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_enc_done",16);
13172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($len,&DWP(16*7+0,"esp"));	# restore original $len
13182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($twtmp,$twtmp);
13192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&and	($len,15);
13202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jz	(&label("xts_enc_ret"));
13212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
13222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pcmpgtd($twtmp,$tweak);		# broadcast upper bits
13232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(16*7+0,"esp"),$len);	# save $len%16
13242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pshufd	($inout3,$twtmp,0x13);
13252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&paddq	($tweak,$tweak);		# &psllq($tweak,1);
13262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pand	($inout3,&QWP(16*6,"esp"));	# isolate carry and residue
13272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($inout3,$tweak);
13282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
13292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_enc_steal");
13302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movz	($rounds,&BP(0,$inp));
13312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movz	($key,&BP(-16,$out));
13322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($inp,&DWP(1,$inp));
13332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&BP(-16,$out),&LB($rounds));
13342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&BP(0,$out),&LB($key));
13352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($out,&DWP(1,$out));
13362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&sub	($len,1);
13372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jnz	(&label("xts_enc_steal"));
13382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
13392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&sub	($out,&DWP(16*7+0,"esp"));	# rewind $out
13402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key,$key_);			# restore $key
13412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds,$rounds_);		# restore $rounds
13422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
13432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout0,&QWP(-16,$out));	# load input
13442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout0,$inout3);		# input^=tweak
13452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	if ($inline)
13462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	{   &aesni_inline_generate1("enc");	}
13472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	else
13482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	{   &call	("_aesni_encrypt1");	}
13492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout0,$inout3);		# output^=tweak
13502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(-16,$out),$inout0);	# write output
13512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
13522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_enc_ret");
13532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	("esp",&DWP(16*7+4,"esp"));	# restore %esp
13542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_end("aesni_xts_encrypt");
13552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
13562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_begin("aesni_xts_decrypt");
13572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key,&wparam(4));		# key2
13582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($inp,&wparam(5));		# clear-text tweak
13592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
13602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds,&DWP(240,$key));	# key2->rounds
13612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout0,&QWP(0,$inp));
13622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	if ($inline)
13632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	{   &aesni_inline_generate1("enc");	}
13642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	else
13652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	{   &call	("_aesni_encrypt1");	}
13662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
13672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($inp,&wparam(0));
13682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($out,&wparam(1));
13692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($len,&wparam(2));
13702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key,&wparam(3));		# key1
13712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
13722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key_,"esp");
13732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&sub	("esp",16*7+8);
13742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&and	("esp",-16);			# align stack
13752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
13762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xor	($rounds_,$rounds_);		# if(len%16) len-=16;
13772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&test	($len,15);
13782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&setnz	(&LB($rounds_));
13792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&shl	($rounds_,4);
13802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&sub	($len,$rounds_);
13812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
13822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(16*6+0,"esp"),0x87);	# compose the magic constant
13832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(16*6+4,"esp"),0);
13842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(16*6+8,"esp"),1);
13852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(16*6+12,"esp"),0);
13862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(16*7+0,"esp"),$len);	# save original $len
13872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(16*7+4,"esp"),$key_);	# save original %esp
13882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
13892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds,&DWP(240,$key));	# key1->rounds
13902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key_,$key);			# backup $key
13912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds_,$rounds);		# backup $rounds
13922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
13932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	($tweak,$inout0);
13942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($twtmp,$twtmp);
13952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	($twmask,&QWP(6*16,"esp"));	# 0x0...010...87
13962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pcmpgtd($twtmp,$tweak);		# broadcast upper bits
13972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
13982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&and	($len,-16);
13992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&sub	($len,16*6);
14002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jc	(&label("xts_dec_short"));
14012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
14022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&shr	($rounds,1);
14032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds_,$rounds);
14042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("xts_dec_loop6"));
14052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
14062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_dec_loop6",16);
14072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	for ($i=0;$i<4;$i++) {
14082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	    &pshufd	($twres,$twtmp,0x13);
14092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	    &pxor	($twtmp,$twtmp);
14102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	    &movdqa	(&QWP(16*$i,"esp"),$tweak);
14112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	    &paddq	($tweak,$tweak);	# &psllq($tweak,1);
14122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	    &pand	($twres,$twmask);	# isolate carry and residue
14132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	    &pcmpgtd	($twtmp,$tweak);	# broadcast upper bits
14142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	    &pxor	($tweak,$twres);
14152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	}
14162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pshufd	($inout5,$twtmp,0x13);
14172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	(&QWP(16*$i++,"esp"),$tweak);
14182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&paddq	($tweak,$tweak);		# &psllq($tweak,1);
14192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	 &$movekey	($rndkey0,&QWP(0,$key_));
14202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pand	($inout5,$twmask);		# isolate carry and residue
14212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	 &movups	($inout0,&QWP(0,$inp));	# load input
14222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($inout5,$tweak);
14232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
14242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	# inline _aesni_encrypt6 prologue and flip xor with tweak and key[0]
14252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout1,&QWP(16*1,$inp));
14262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	 &xorps		($inout0,$rndkey0);	# input^=rndkey[0]
14272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout2,&QWP(16*2,$inp));
14282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	 &pxor		($inout1,$rndkey0);
14292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout3,&QWP(16*3,$inp));
14302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	 &pxor		($inout2,$rndkey0);
14312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout4,&QWP(16*4,$inp));
14322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	 &pxor		($inout3,$rndkey0);
14332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($rndkey1,&QWP(16*5,$inp));
14342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	 &pxor		($inout4,$rndkey0);
14352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($inp,&DWP(16*6,$inp));
14362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($inout0,&QWP(16*0,"esp"));	# input^=tweak
14372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	(&QWP(16*$i,"esp"),$inout5);	# save last tweak
14382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($inout5,$rndkey1);
14392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
14402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	 &$movekey	($rndkey1,&QWP(16,$key_));
14412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	 &lea		($key,&DWP(32,$key_));
14422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($inout1,&QWP(16*1,"esp"));
14432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	 &aesdec	($inout0,$rndkey1);
14442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($inout2,&QWP(16*2,"esp"));
14452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	 &aesdec	($inout1,$rndkey1);
14462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($inout3,&QWP(16*3,"esp"));
14472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	 &dec		($rounds);
14482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	 &aesdec	($inout2,$rndkey1);
14492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($inout4,&QWP(16*4,"esp"));
14502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	 &aesdec	($inout3,$rndkey1);
14512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor		($inout5,$rndkey0);
14522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	 &aesdec	($inout4,$rndkey1);
14532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	 &$movekey	($rndkey0,&QWP(0,$key));
14542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	 &aesdec	($inout5,$rndkey1);
14552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call		(&label("_aesni_decrypt6_enter"));
14562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
14572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	($tweak,&QWP(16*5,"esp"));	# last tweak
14582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org       &pxor	($twtmp,$twtmp);
14592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout0,&QWP(16*0,"esp"));	# output^=tweak
14602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org       &pcmpgtd	($twtmp,$tweak);		# broadcast upper bits
14612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout1,&QWP(16*1,"esp"));
14622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*0,$out),$inout0);	# write output
14632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout2,&QWP(16*2,"esp"));
14642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*1,$out),$inout1);
14652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout3,&QWP(16*3,"esp"));
14662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*2,$out),$inout2);
14672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout4,&QWP(16*4,"esp"));
14682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*3,$out),$inout3);
14692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout5,$tweak);
14702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*4,$out),$inout4);
14712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org       &pshufd	($twres,$twtmp,0x13);
14722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*5,$out),$inout5);
14732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($out,&DWP(16*6,$out));
14742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org       &movdqa	($twmask,&QWP(16*6,"esp"));	# 0x0...010...87
14752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
14762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($twtmp,$twtmp);
14772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&paddq	($tweak,$tweak);		# &psllq($tweak,1);
14782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pand	($twres,$twmask);		# isolate carry and residue
14792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pcmpgtd($twtmp,$tweak);		# broadcast upper bits
14802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds,$rounds_);		# restore $rounds
14812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($tweak,$twres);
14822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
14832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&sub	($len,16*6);
14842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jnc	(&label("xts_dec_loop6"));
14852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
14862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($rounds,&DWP(1,"",$rounds,2));	# restore $rounds
14872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key,$key_);			# restore $key
14882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds_,$rounds);
14892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
14902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_dec_short");
14912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&add	($len,16*6);
14922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jz	(&label("xts_dec_done6x"));
14932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
14942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	($inout3,$tweak);		# put aside previous tweak
14952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&cmp	($len,0x20);
14962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jb	(&label("xts_dec_one"));
14972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
14982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pshufd	($twres,$twtmp,0x13);
14992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($twtmp,$twtmp);
15002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&paddq	($tweak,$tweak);		# &psllq($tweak,1);
15012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pand	($twres,$twmask);		# isolate carry and residue
15022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pcmpgtd($twtmp,$tweak);		# broadcast upper bits
15032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($tweak,$twres);
15042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&je	(&label("xts_dec_two"));
15052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
15062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pshufd	($twres,$twtmp,0x13);
15072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($twtmp,$twtmp);
15082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	($inout4,$tweak);		# put aside previous tweak
15092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&paddq	($tweak,$tweak);		# &psllq($tweak,1);
15102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pand	($twres,$twmask);		# isolate carry and residue
15112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pcmpgtd($twtmp,$tweak);		# broadcast upper bits
15122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($tweak,$twres);
15132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&cmp	($len,0x40);
15142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jb	(&label("xts_dec_three"));
15152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
15162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pshufd	($twres,$twtmp,0x13);
15172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($twtmp,$twtmp);
15182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	($inout5,$tweak);		# put aside previous tweak
15192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&paddq	($tweak,$tweak);		# &psllq($tweak,1);
15202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pand	($twres,$twmask);		# isolate carry and residue
15212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pcmpgtd($twtmp,$tweak);		# broadcast upper bits
15222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($tweak,$twres);
15232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	(&QWP(16*0,"esp"),$inout3);
15242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	(&QWP(16*1,"esp"),$inout4);
15252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&je	(&label("xts_dec_four"));
15262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
15272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	(&QWP(16*2,"esp"),$inout5);
15282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pshufd	($inout5,$twtmp,0x13);
15292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	(&QWP(16*3,"esp"),$tweak);
15302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&paddq	($tweak,$tweak);		# &psllq($inout0,1);
15312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pand	($inout5,$twmask);		# isolate carry and residue
15322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($inout5,$tweak);
15332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
15342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout0,&QWP(16*0,$inp));	# load input
15352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout1,&QWP(16*1,$inp));
15362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout2,&QWP(16*2,$inp));
15372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($inout0,&QWP(16*0,"esp"));	# input^=tweak
15382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout3,&QWP(16*3,$inp));
15392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($inout1,&QWP(16*1,"esp"));
15402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout4,&QWP(16*4,$inp));
15412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($inout2,&QWP(16*2,"esp"));
15422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($inp,&DWP(16*5,$inp));
15432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($inout3,&QWP(16*3,"esp"));
15442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	(&QWP(16*4,"esp"),$inout5);	# save last tweak
15452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($inout4,$inout5);
15462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
15472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call	("_aesni_decrypt6");
15482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
15492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movaps	($tweak,&QWP(16*4,"esp"));	# last tweak
15502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout0,&QWP(16*0,"esp"));	# output^=tweak
15512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout1,&QWP(16*1,"esp"));
15522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout2,&QWP(16*2,"esp"));
15532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*0,$out),$inout0);	# write output
15542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout3,&QWP(16*3,"esp"));
15552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*1,$out),$inout1);
15562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout4,$tweak);
15572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*2,$out),$inout2);
15582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*3,$out),$inout3);
15592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*4,$out),$inout4);
15602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($out,&DWP(16*5,$out));
15612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("xts_dec_done"));
15622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
15632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_dec_one",16);
15642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout0,&QWP(16*0,$inp));	# load input
15652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($inp,&DWP(16*1,$inp));
15662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout0,$inout3);		# input^=tweak
15672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	if ($inline)
15682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	{   &aesni_inline_generate1("dec");	}
15692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	else
15702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	{   &call	("_aesni_decrypt1");	}
15712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout0,$inout3);		# output^=tweak
15722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*0,$out),$inout0);	# write output
15732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($out,&DWP(16*1,$out));
15742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
15752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	($tweak,$inout3);		# last tweak
15762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("xts_dec_done"));
15772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
15782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_dec_two",16);
15792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movaps	($inout4,$tweak);		# put aside last tweak
15802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
15812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout0,&QWP(16*0,$inp));	# load input
15822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout1,&QWP(16*1,$inp));
15832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($inp,&DWP(16*2,$inp));
15842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout0,$inout3);		# input^=tweak
15852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout1,$inout4);
15862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
15872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call	("_aesni_decrypt3");
15882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
15892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout0,$inout3);		# output^=tweak
15902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout1,$inout4);
15912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*0,$out),$inout0);	# write output
15922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*1,$out),$inout1);
15932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($out,&DWP(16*2,$out));
15942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
15952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	($tweak,$inout4);		# last tweak
15962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("xts_dec_done"));
15972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
15982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_dec_three",16);
15992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movaps	($inout5,$tweak);		# put aside last tweak
16002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout0,&QWP(16*0,$inp));	# load input
16012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout1,&QWP(16*1,$inp));
16022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout2,&QWP(16*2,$inp));
16032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($inp,&DWP(16*3,$inp));
16042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout0,$inout3);		# input^=tweak
16052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout1,$inout4);
16062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout2,$inout5);
16072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
16082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call	("_aesni_decrypt3");
16092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
16102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout0,$inout3);		# output^=tweak
16112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout1,$inout4);
16122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout2,$inout5);
16132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*0,$out),$inout0);	# write output
16142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*1,$out),$inout1);
16152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*2,$out),$inout2);
16162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($out,&DWP(16*3,$out));
16172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
16182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	($tweak,$inout5);		# last tweak
16192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("xts_dec_done"));
16202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
16212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_dec_four",16);
16222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movaps	($inout4,$tweak);		# put aside last tweak
16232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
16242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout0,&QWP(16*0,$inp));	# load input
16252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout1,&QWP(16*1,$inp));
16262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout2,&QWP(16*2,$inp));
16272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout0,&QWP(16*0,"esp"));	# input^=tweak
16282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout3,&QWP(16*3,$inp));
16292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($inp,&DWP(16*4,$inp));
16302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout1,&QWP(16*1,"esp"));
16312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout2,$inout5);
16322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout3,$inout4);
16332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
16342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call	("_aesni_decrypt4");
16352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
16362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout0,&QWP(16*0,"esp"));	# output^=tweak
16372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout1,&QWP(16*1,"esp"));
16382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout2,$inout5);
16392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*0,$out),$inout0);	# write output
16402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout3,$inout4);
16412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*1,$out),$inout1);
16422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*2,$out),$inout2);
16432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(16*3,$out),$inout3);
16442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($out,&DWP(16*4,$out));
16452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
16462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	($tweak,$inout4);		# last tweak
16472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("xts_dec_done"));
16482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
16492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_dec_done6x",16);		# $tweak is pre-calculated
16502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($len,&DWP(16*7+0,"esp"));	# restore original $len
16512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&and	($len,15);
16522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jz	(&label("xts_dec_ret"));
16532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(16*7+0,"esp"),$len);	# save $len%16
16542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("xts_dec_only_one_more"));
16552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
16562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_dec_done",16);
16572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($len,&DWP(16*7+0,"esp"));	# restore original $len
16582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($twtmp,$twtmp);
16592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&and	($len,15);
16602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jz	(&label("xts_dec_ret"));
16612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
16622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pcmpgtd($twtmp,$tweak);		# broadcast upper bits
16632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(16*7+0,"esp"),$len);	# save $len%16
16642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pshufd	($twres,$twtmp,0x13);
16652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($twtmp,$twtmp);
16662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	($twmask,&QWP(16*6,"esp"));
16672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&paddq	($tweak,$tweak);		# &psllq($tweak,1);
16682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pand	($twres,$twmask);		# isolate carry and residue
16692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pcmpgtd($twtmp,$tweak);		# broadcast upper bits
16702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($tweak,$twres);
16712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
16722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_dec_only_one_more");
16732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pshufd	($inout3,$twtmp,0x13);
16742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa	($inout4,$tweak);		# put aside previous tweak
16752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&paddq	($tweak,$tweak);		# &psllq($tweak,1);
16762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pand	($inout3,$twmask);		# isolate carry and residue
16772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor	($inout3,$tweak);
16782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
16792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key,$key_);			# restore $key
16802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds,$rounds_);		# restore $rounds
16812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
16822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout0,&QWP(0,$inp));		# load input
16832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout0,$inout3);		# input^=tweak
16842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	if ($inline)
16852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	{   &aesni_inline_generate1("dec");	}
16862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	else
16872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	{   &call	("_aesni_decrypt1");	}
16882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout0,$inout3);		# output^=tweak
16892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,$out),$inout0);		# write output
16902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
16912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_dec_steal");
16922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movz	($rounds,&BP(16,$inp));
16932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movz	($key,&BP(0,$out));
16942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($inp,&DWP(1,$inp));
16952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&BP(0,$out),&LB($rounds));
16962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&BP(16,$out),&LB($key));
16972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($out,&DWP(1,$out));
16982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&sub	($len,1);
16992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jnz	(&label("xts_dec_steal"));
17002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
17012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&sub	($out,&DWP(16*7+0,"esp"));	# rewind $out
17022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key,$key_);			# restore $key
17032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds,$rounds_);		# restore $rounds
17042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
17052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout0,&QWP(0,$out));		# load input
17062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout0,$inout4);		# input^=tweak
17072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	if ($inline)
17082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	{   &aesni_inline_generate1("dec");	}
17092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	else
17102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	{   &call	("_aesni_decrypt1");	}
17112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout0,$inout4);		# output^=tweak
17122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,$out),$inout0);		# write output
17132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
17142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("xts_dec_ret");
17152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	("esp",&DWP(16*7+4,"esp"));	# restore %esp
17162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_end("aesni_xts_decrypt");
17172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org}
17182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org}
17192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
17202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org######################################################################
17212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# void $PREFIX_cbc_encrypt (const void *inp, void *out,
17222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#                           size_t length, const AES_KEY *key,
17232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#                           unsigned char *ivp,const int enc);
17242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_begin("${PREFIX}_cbc_encrypt");
17252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($inp,&wparam(0));
17262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds_,"esp");
17272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($out,&wparam(1));
17282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&sub	($rounds_,24);
17292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($len,&wparam(2));
17302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&and	($rounds_,-16);
17312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key,&wparam(3));
17322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key_,&wparam(4));
17332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&test	($len,$len);
17342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jz	(&label("cbc_abort"));
17352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
17362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&cmp	(&wparam(5),0);
17372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xchg	($rounds_,"esp");		# alloca
17382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($ivec,&QWP(0,$key_));		# load IV
17392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds,&DWP(240,$key));
17402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key_,$key);			# backup $key
17412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	(&DWP(16,"esp"),$rounds_);	# save original %esp
17422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds_,$rounds);		# backup $rounds
17432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&je	(&label("cbc_decrypt"));
17442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
17452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movaps	($inout0,$ivec);
17462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&cmp	($len,16);
17472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jb	(&label("cbc_enc_tail"));
17482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&sub	($len,16);
17492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("cbc_enc_loop"));
17502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
17512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("cbc_enc_loop",16);
17522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($ivec,&QWP(0,$inp));		# input actually
17532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($inp,&DWP(16,$inp));
17542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	if ($inline)
17552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	{   &aesni_inline_generate1("enc",$inout0,$ivec);	}
17562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	else
17572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	{   &xorps($inout0,$ivec); &call("_aesni_encrypt1");	}
17582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds,$rounds_);	# restore $rounds
17592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key,$key_);		# restore $key
17602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,$out),$inout0);	# store output
17612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($out,&DWP(16,$out));
17622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&sub	($len,16);
17632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jnc	(&label("cbc_enc_loop"));
17642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&add	($len,16);
17652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jnz	(&label("cbc_enc_tail"));
17662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movaps	($ivec,$inout0);
17672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("cbc_ret"));
17682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
17692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("cbc_enc_tail");
17702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	("ecx",$len);		# zaps $rounds
17712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&data_word(0xA4F3F689);		# rep movsb
17722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	("ecx",16);		# zero tail
17732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&sub	("ecx",$len);
17742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xor	("eax","eax");		# zaps $len
17752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&data_word(0xAAF3F689);		# rep stosb
17762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($out,&DWP(-16,$out));	# rewind $out by 1 block
17772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds,$rounds_);	# restore $rounds
17782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($inp,$out);		# $inp and $out are the same
17792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key,$key_);		# restore $key
17802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("cbc_enc_loop"));
17812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org######################################################################
17822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("cbc_decrypt",16);
17832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&cmp	($len,0x50);
17842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jbe	(&label("cbc_dec_tail"));
17852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movaps	(&QWP(0,"esp"),$ivec);		# save IV
17862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&sub	($len,0x50);
17872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("cbc_dec_loop6_enter"));
17882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
17892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("cbc_dec_loop6",16);
17902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movaps	(&QWP(0,"esp"),$rndkey0);	# save IV
17912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,$out),$inout5);
17922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($out,&DWP(0x10,$out));
17932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("cbc_dec_loop6_enter");
17942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout0,&QWP(0,$inp));
17952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout1,&QWP(0x10,$inp));
17962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout2,&QWP(0x20,$inp));
17972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout3,&QWP(0x30,$inp));
17982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout4,&QWP(0x40,$inp));
17992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqu	($inout5,&QWP(0x50,$inp));
18002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
18012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call	("_aesni_decrypt6");
18022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
18032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($rndkey1,&QWP(0,$inp));
18042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($rndkey0,&QWP(0x10,$inp));
18052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout0,&QWP(0,"esp"));	# ^=IV
18062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout1,$rndkey1);
18072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($rndkey1,&QWP(0x20,$inp));
18082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout2,$rndkey0);
18092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($rndkey0,&QWP(0x30,$inp));
18102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout3,$rndkey1);
18112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($rndkey1,&QWP(0x40,$inp));
18122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout4,$rndkey0);
18132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($rndkey0,&QWP(0x50,$inp));	# IV
18142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout5,$rndkey1);
18152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,$out),$inout0);
18162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x10,$out),$inout1);
18172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($inp,&DWP(0x60,$inp));
18182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x20,$out),$inout2);
18192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds,$rounds_)		# restore $rounds
18202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x30,$out),$inout3);
18212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key,$key_);			# restore $key
18222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x40,$out),$inout4);
18232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($out,&DWP(0x50,$out));
18242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&sub	($len,0x60);
18252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&ja	(&label("cbc_dec_loop6"));
18262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
18272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movaps	($inout0,$inout5);
18282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movaps	($ivec,$rndkey0);
18292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&add	($len,0x50);
18302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jle	(&label("cbc_dec_tail_collected"));
18312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,$out),$inout0);
18322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($out,&DWP(0x10,$out));
18332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("cbc_dec_tail");
18342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout0,&QWP(0,$inp));
18352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movaps	($in0,$inout0);
18362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&cmp	($len,0x10);
18372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jbe	(&label("cbc_dec_one"));
18382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
18392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout1,&QWP(0x10,$inp));
18402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movaps	($in1,$inout1);
18412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&cmp	($len,0x20);
18422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jbe	(&label("cbc_dec_two"));
18432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
18442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout2,&QWP(0x20,$inp));
18452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&cmp	($len,0x30);
18462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jbe	(&label("cbc_dec_three"));
18472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
18482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout3,&QWP(0x30,$inp));
18492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&cmp	($len,0x40);
18502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jbe	(&label("cbc_dec_four"));
18512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
18522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout4,&QWP(0x40,$inp));
18532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movaps	(&QWP(0,"esp"),$ivec);		# save IV
18542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($inout0,&QWP(0,$inp));
18552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout5,$inout5);
18562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call	("_aesni_decrypt6");
18572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($rndkey1,&QWP(0,$inp));
18582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($rndkey0,&QWP(0x10,$inp));
18592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout0,&QWP(0,"esp"));	# ^= IV
18602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout1,$rndkey1);
18612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($rndkey1,&QWP(0x20,$inp));
18622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout2,$rndkey0);
18632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($rndkey0,&QWP(0x30,$inp));
18642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout3,$rndkey1);
18652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($ivec,&QWP(0x40,$inp));	# IV
18662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout4,$rndkey0);
18672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,$out),$inout0);
18682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x10,$out),$inout1);
18692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x20,$out),$inout2);
18702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x30,$out),$inout3);
18712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($out,&DWP(0x40,$out));
18722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movaps	($inout0,$inout4);
18732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&sub	($len,0x50);
18742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("cbc_dec_tail_collected"));
18752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
18762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("cbc_dec_one",16);
18772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	if ($inline)
18782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	{   &aesni_inline_generate1("dec");	}
18792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	else
18802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	{   &call	("_aesni_decrypt1");	}
18812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout0,$ivec);
18822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movaps	($ivec,$in0);
18832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&sub	($len,0x10);
18842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("cbc_dec_tail_collected"));
18852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
18862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("cbc_dec_two",16);
18872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout2,$inout2);
18882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call	("_aesni_decrypt3");
18892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout0,$ivec);
18902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout1,$in0);
18912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,$out),$inout0);
18922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movaps	($inout0,$inout1);
18932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($out,&DWP(0x10,$out));
18942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movaps	($ivec,$in1);
18952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&sub	($len,0x20);
18962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("cbc_dec_tail_collected"));
18972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
18982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("cbc_dec_three",16);
18992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call	("_aesni_decrypt3");
19002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout0,$ivec);
19012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout1,$in0);
19022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout2,$in1);
19032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,$out),$inout0);
19042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movaps	($inout0,$inout2);
19052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x10,$out),$inout1);
19062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($out,&DWP(0x20,$out));
19072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($ivec,&QWP(0x20,$inp));
19082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&sub	($len,0x30);
19092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("cbc_dec_tail_collected"));
19102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
19112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("cbc_dec_four",16);
19122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call	("_aesni_decrypt4");
19132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($rndkey1,&QWP(0x10,$inp));
19142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($rndkey0,&QWP(0x20,$inp));
19152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout0,$ivec);
19162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	($ivec,&QWP(0x30,$inp));
19172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout1,$in0);
19182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,$out),$inout0);
19192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout2,$rndkey1);
19202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x10,$out),$inout1);
19212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	($inout3,$rndkey0);
19222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0x20,$out),$inout2);
19232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($out,&DWP(0x30,$out));
19242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movaps	($inout0,$inout3);
19252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&sub	($len,0x40);
19262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
19272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("cbc_dec_tail_collected");
19282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&and	($len,15);
19292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jnz	(&label("cbc_dec_tail_partial"));
19302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,$out),$inout0);
19312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp	(&label("cbc_ret"));
19322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
19332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("cbc_dec_tail_partial",16);
19342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movaps	(&QWP(0,"esp"),$inout0);
19352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	("ecx",16);
19362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($inp,"esp");
19372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&sub	("ecx",$len);
19382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&data_word(0xA4F3F689);		# rep movsb
19392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
19402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("cbc_ret");
19412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	("esp",&DWP(16,"esp"));	# pull original %esp
19422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key_,&wparam(4));
19432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	(&QWP(0,$key_),$ivec);	# output IV
19442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("cbc_abort");
19452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_end("${PREFIX}_cbc_encrypt");
19462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
19472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org######################################################################
19482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# Mechanical port from aesni-x86_64.pl.
19492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#
19502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# _aesni_set_encrypt_key is private interface,
19512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# input:
19522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#	"eax"	const unsigned char *userKey
19532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#	$rounds	int bits
19542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#	$key	AES_KEY *key
19552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# output:
19562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#	"eax"	return code
19572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#	$round	rounds
19582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
19592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_begin_B("_aesni_set_encrypt_key");
19602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&test	("eax","eax");
19612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jz	(&label("bad_pointer"));
19622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&test	($key,$key);
19632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jz	(&label("bad_pointer"));
19642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
19652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups	("xmm0",&QWP(0,"eax"));	# pull first 128 bits of *userKey
19662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps	("xmm4","xmm4");	# low dword of xmm4 is assumed 0
19672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	($key,&DWP(16,$key));
19682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&cmp	($rounds,256);
19692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&je	(&label("14rounds"));
19702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&cmp	($rounds,192);
19712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&je	(&label("12rounds"));
19722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&cmp	($rounds,128);
19732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jne	(&label("bad_keybits"));
19742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
19752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("10rounds",16);
19762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov		($rounds,9);
19772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	(&QWP(-16,$key),"xmm0");	# round 0
19782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aeskeygenassist("xmm1","xmm0",0x01);		# round 1
19792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call		(&label("key_128_cold"));
19802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aeskeygenassist("xmm1","xmm0",0x2);		# round 2
19812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call		(&label("key_128"));
19822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aeskeygenassist("xmm1","xmm0",0x04);		# round 3
19832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call		(&label("key_128"));
19842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aeskeygenassist("xmm1","xmm0",0x08);		# round 4
19852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call		(&label("key_128"));
19862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aeskeygenassist("xmm1","xmm0",0x10);		# round 5
19872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call		(&label("key_128"));
19882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aeskeygenassist("xmm1","xmm0",0x20);		# round 6
19892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call		(&label("key_128"));
19902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aeskeygenassist("xmm1","xmm0",0x40);		# round 7
19912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call		(&label("key_128"));
19922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aeskeygenassist("xmm1","xmm0",0x80);		# round 8
19932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call		(&label("key_128"));
19942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aeskeygenassist("xmm1","xmm0",0x1b);		# round 9
19952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call		(&label("key_128"));
19962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aeskeygenassist("xmm1","xmm0",0x36);		# round 10
19972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call		(&label("key_128"));
19982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	(&QWP(0,$key),"xmm0");
19992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov		(&DWP(80,$key),$rounds);
20002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xor		("eax","eax");
20012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&ret();
20022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
20032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("key_128",16);
20042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	(&QWP(0,$key),"xmm0");
20052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea		($key,&DWP(16,$key));
20062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("key_128_cold");
20072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&shufps		("xmm4","xmm0",0b00010000);
20082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps		("xmm0","xmm4");
20092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&shufps		("xmm4","xmm0",0b10001100);
20102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps		("xmm0","xmm4");
20112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&shufps		("xmm1","xmm1",0b11111111);	# critical path
20122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps		("xmm0","xmm1");
20132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&ret();
20142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
20152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("12rounds",16);
20162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movq		("xmm2",&QWP(16,"eax"));	# remaining 1/3 of *userKey
20172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov		($rounds,11);
20182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	(&QWP(-16,$key),"xmm0")		# round 0
20192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aeskeygenassist("xmm1","xmm2",0x01);		# round 1,2
20202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call		(&label("key_192a_cold"));
20212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aeskeygenassist("xmm1","xmm2",0x02);		# round 2,3
20222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call		(&label("key_192b"));
20232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aeskeygenassist("xmm1","xmm2",0x04);		# round 4,5
20242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call		(&label("key_192a"));
20252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aeskeygenassist("xmm1","xmm2",0x08);		# round 5,6
20262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call		(&label("key_192b"));
20272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aeskeygenassist("xmm1","xmm2",0x10);		# round 7,8
20282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call		(&label("key_192a"));
20292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aeskeygenassist("xmm1","xmm2",0x20);		# round 8,9
20302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call		(&label("key_192b"));
20312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aeskeygenassist("xmm1","xmm2",0x40);		# round 10,11
20322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call		(&label("key_192a"));
20332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aeskeygenassist("xmm1","xmm2",0x80);		# round 11,12
20342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call		(&label("key_192b"));
20352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	(&QWP(0,$key),"xmm0");
20362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov		(&DWP(48,$key),$rounds);
20372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xor		("eax","eax");
20382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&ret();
20392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
20402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("key_192a",16);
20412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	(&QWP(0,$key),"xmm0");
20422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea		($key,&DWP(16,$key));
20432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("key_192a_cold",16);
20442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movaps		("xmm5","xmm2");
20452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("key_192b_warm");
20462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&shufps		("xmm4","xmm0",0b00010000);
20472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movdqa		("xmm3","xmm2");
20482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps		("xmm0","xmm4");
20492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&shufps		("xmm4","xmm0",0b10001100);
20502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pslldq		("xmm3",4);
20512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps		("xmm0","xmm4");
20522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pshufd		("xmm1","xmm1",0b01010101);	# critical path
20532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor		("xmm2","xmm3");
20542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor		("xmm0","xmm1");
20552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pshufd		("xmm3","xmm0",0b11111111);
20562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&pxor		("xmm2","xmm3");
20572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&ret();
20582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
20592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("key_192b",16);
20602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movaps		("xmm3","xmm0");
20612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&shufps		("xmm5","xmm0",0b01000100);
20622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	(&QWP(0,$key),"xmm5");
20632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&shufps		("xmm3","xmm2",0b01001110);
20642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	(&QWP(16,$key),"xmm3");
20652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea		($key,&DWP(32,$key));
20662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jmp		(&label("key_192b_warm"));
20672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
20682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("14rounds",16);
20692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&movups		("xmm2",&QWP(16,"eax"));	# remaining half of *userKey
20702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov		($rounds,13);
20712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea		($key,&DWP(16,$key));
20722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	(&QWP(-32,$key),"xmm0");	# round 0
20732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	(&QWP(-16,$key),"xmm2");	# round 1
20742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aeskeygenassist("xmm1","xmm2",0x01);		# round 2
20752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call		(&label("key_256a_cold"));
20762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aeskeygenassist("xmm1","xmm0",0x01);		# round 3
20772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call		(&label("key_256b"));
20782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aeskeygenassist("xmm1","xmm2",0x02);		# round 4
20792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call		(&label("key_256a"));
20802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aeskeygenassist("xmm1","xmm0",0x02);		# round 5
20812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call		(&label("key_256b"));
20822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aeskeygenassist("xmm1","xmm2",0x04);		# round 6
20832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call		(&label("key_256a"));
20842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aeskeygenassist("xmm1","xmm0",0x04);		# round 7
20852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call		(&label("key_256b"));
20862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aeskeygenassist("xmm1","xmm2",0x08);		# round 8
20872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call		(&label("key_256a"));
20882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aeskeygenassist("xmm1","xmm0",0x08);		# round 9
20892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call		(&label("key_256b"));
20902c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aeskeygenassist("xmm1","xmm2",0x10);		# round 10
20912c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call		(&label("key_256a"));
20922c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aeskeygenassist("xmm1","xmm0",0x10);		# round 11
20932c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call		(&label("key_256b"));
20942c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aeskeygenassist("xmm1","xmm2",0x20);		# round 12
20952c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call		(&label("key_256a"));
20962c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aeskeygenassist("xmm1","xmm0",0x20);		# round 13
20972c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call		(&label("key_256b"));
20982c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aeskeygenassist("xmm1","xmm2",0x40);		# round 14
20992c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call		(&label("key_256a"));
21002c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	(&QWP(0,$key),"xmm0");
21012c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov		(&DWP(16,$key),$rounds);
21022c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xor		("eax","eax");
21032c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&ret();
21042c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
21052c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("key_256a",16);
21062c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	(&QWP(0,$key),"xmm2");
21072c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea		($key,&DWP(16,$key));
21082c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("key_256a_cold");
21092c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&shufps		("xmm4","xmm0",0b00010000);
21102c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps		("xmm0","xmm4");
21112c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&shufps		("xmm4","xmm0",0b10001100);
21122c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps		("xmm0","xmm4");
21132c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&shufps		("xmm1","xmm1",0b11111111);	# critical path
21142c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps		("xmm0","xmm1");
21152c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&ret();
21162c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
21172c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("key_256b",16);
21182c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	(&QWP(0,$key),"xmm0");
21192c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea		($key,&DWP(16,$key));
21202c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
21212c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&shufps		("xmm4","xmm2",0b00010000);
21222c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps		("xmm2","xmm4");
21232c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&shufps		("xmm4","xmm2",0b10001100);
21242c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps		("xmm2","xmm4");
21252c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&shufps		("xmm1","xmm1",0b10101010);	# critical path
21262c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xorps		("xmm2","xmm1");
21272c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&ret();
21282c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
21292c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("bad_pointer",4);
21302c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	("eax",-1);
21312c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&ret	();
21322c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("bad_keybits",4);
21332c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	("eax",-2);
21342c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&ret	();
21352c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_end_B("_aesni_set_encrypt_key");
21362c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
21372c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# int $PREFIX_set_encrypt_key (const unsigned char *userKey, int bits,
21382c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#                              AES_KEY *key)
21392c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_begin_B("${PREFIX}_set_encrypt_key");
21402c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	("eax",&wparam(0));
21412c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds,&wparam(1));
21422c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key,&wparam(2));
21432c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call	("_aesni_set_encrypt_key");
21442c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&ret	();
21452c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_end_B("${PREFIX}_set_encrypt_key");
21462c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
21472c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org# int $PREFIX_set_decrypt_key (const unsigned char *userKey, int bits,
21482c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org#                              AES_KEY *key)
21492c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_begin_B("${PREFIX}_set_decrypt_key");
21502c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	("eax",&wparam(0));
21512c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($rounds,&wparam(1));
21522c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key,&wparam(2));
21532c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&call	("_aesni_set_encrypt_key");
21542c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&mov	($key,&wparam(2));
21552c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&shl	($rounds,4)	# rounds-1 after _aesni_set_encrypt_key
21562c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&test	("eax","eax");
21572c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&jnz	(&label("dec_key_ret"));
21582c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea	("eax",&DWP(16,$key,$rounds));	# end of key schedule
21592c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
21602c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	("xmm0",&QWP(0,$key));	# just swap
21612c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	("xmm1",&QWP(0,"eax"));
21622c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	(&QWP(0,"eax"),"xmm0");
21632c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	(&QWP(0,$key),"xmm1");
21642c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea		($key,&DWP(16,$key));
21652c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea		("eax",&DWP(-16,"eax"));
21662c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
21672c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("dec_key_inverse");
21682c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	("xmm0",&QWP(0,$key));	# swap and inverse
21692c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	("xmm1",&QWP(0,"eax"));
21702c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aesimc		("xmm0","xmm0");
21712c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aesimc		("xmm1","xmm1");
21722c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea		($key,&DWP(16,$key));
21732c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&lea		("eax",&DWP(-16,"eax"));
21742c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	(&QWP(16,"eax"),"xmm0");
21752c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	(&QWP(-16,$key),"xmm1");
21762c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&cmp		("eax",$key);
21772c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&ja		(&label("dec_key_inverse"));
21782c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
21792c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	("xmm0",&QWP(0,$key));	# inverse middle
21802c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&aesimc		("xmm0","xmm0");
21812c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&$movekey	(&QWP(0,$key),"xmm0");
21822c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
21832c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&xor		("eax","eax");		# return success
21842c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&set_label("dec_key_ret");
21852c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org	&ret	();
21862c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&function_end_B("${PREFIX}_set_decrypt_key");
21872c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&asciz("AES for Intel AES-NI, CRYPTOGAMS by <appro\@openssl.org>");
21882c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org
21892c4508dfe2bc5b6296c01114ed11ddc64b7718c6digit@chromium.org&asm_finish();
2190