1392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#!/usr/bin/env perl 2392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 3392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# ==================================================================== 4392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL 5392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# project. The module is, however, dual licensed under OpenSSL and 6392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# CRYPTOGAMS licenses depending on where you obtain it. For further 7392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# details see http://www.openssl.org/~appro/cryptogams/. 8392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# ==================================================================== 9392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 10392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# This module implements support for Intel AES-NI extension. In 11392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# OpenSSL context it's used with Intel engine, but can also be used as 12392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# drop-in replacement for crypto/aes/asm/aes-x86_64.pl [see below for 13392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# details]. 14392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 15392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Performance. 16392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 17392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Given aes(enc|dec) instructions' latency asymptotic performance for 18392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# non-parallelizable modes such as CBC encrypt is 3.75 cycles per byte 19392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# processed with 128-bit key. And given their throughput asymptotic 20392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# performance for parallelizable modes is 1.25 cycles per byte. Being 21392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# asymptotic limit it's not something you commonly achieve in reality, 22392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# but how close does one get? Below are results collected for 23392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# different modes and block sized. Pairs of numbers are for en-/ 24392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# decryption. 25392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 26392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 16-byte 64-byte 256-byte 1-KB 8-KB 27392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# ECB 4.25/4.25 1.38/1.38 1.28/1.28 1.26/1.26 1.26/1.26 28392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# CTR 5.42/5.42 1.92/1.92 1.44/1.44 1.28/1.28 1.26/1.26 29392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# CBC 4.38/4.43 4.15/1.43 4.07/1.32 4.07/1.29 4.06/1.28 30392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# CCM 5.66/9.42 4.42/5.41 4.16/4.40 4.09/4.15 4.06/4.07 31392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# OFB 5.42/5.42 4.64/4.64 4.44/4.44 4.39/4.39 4.38/4.38 32392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# CFB 5.73/5.85 5.56/5.62 5.48/5.56 5.47/5.55 5.47/5.55 33392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 34392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# ECB, CTR, CBC and CCM results are free from EVP overhead. This means 35392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# that otherwise used 'openssl speed -evp aes-128-??? -engine aesni 36392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# [-decrypt]' will exhibit 10-15% worse results for smaller blocks. 37392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# The results were collected with specially crafted speed.c benchmark 38392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# in order to compare them with results reported in "Intel Advanced 39392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Encryption Standard (AES) New Instruction Set" White Paper Revision 40392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 3.0 dated May 2010. All above results are consistently better. This 41392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# module also provides better performance for block sizes smaller than 42392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 128 bytes in points *not* represented in the above table. 43392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 44392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Looking at the results for 8-KB buffer. 45392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 46392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# CFB and OFB results are far from the limit, because implementation 47392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# uses "generic" CRYPTO_[c|o]fb128_encrypt interfaces relying on 48392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# single-block aesni_encrypt, which is not the most optimal way to go. 49392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# CBC encrypt result is unexpectedly high and there is no documented 50392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# explanation for it. Seemingly there is a small penalty for feeding 51392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# the result back to AES unit the way it's done in CBC mode. There is 52392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# nothing one can do and the result appears optimal. CCM result is 53392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# identical to CBC, because CBC-MAC is essentially CBC encrypt without 54392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# saving output. CCM CTR "stays invisible," because it's neatly 55392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# interleaved wih CBC-MAC. This provides ~30% improvement over 56392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# "straghtforward" CCM implementation with CTR and CBC-MAC performed 57392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# disjointly. Parallelizable modes practically achieve the theoretical 58392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# limit. 59392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 60392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Looking at how results vary with buffer size. 61392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 62392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Curves are practically saturated at 1-KB buffer size. In most cases 63392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# "256-byte" performance is >95%, and "64-byte" is ~90% of "8-KB" one. 64392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# CTR curve doesn't follow this pattern and is "slowest" changing one 65392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# with "256-byte" result being 87% of "8-KB." This is because overhead 66392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# in CTR mode is most computationally intensive. Small-block CCM 67392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# decrypt is slower than encrypt, because first CTR and last CBC-MAC 68392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# iterations can't be interleaved. 69392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 70392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Results for 192- and 256-bit keys. 71392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 72392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# EVP-free results were observed to scale perfectly with number of 73392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# rounds for larger block sizes, i.e. 192-bit result being 10/12 times 74392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# lower and 256-bit one - 10/14. Well, in CBC encrypt case differences 75392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# are a tad smaller, because the above mentioned penalty biases all 76392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# results by same constant value. In similar way function call 77392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# overhead affects small-block performance, as well as OFB and CFB 78392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# results. Differences are not large, most common coefficients are 79392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 10/11.7 and 10/13.4 (as opposite to 10/12.0 and 10/14.0), but one 80392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# observe even 10/11.2 and 10/12.4 (CTR, OFB, CFB)... 81392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 82392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# January 2011 83392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 84392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# While Westmere processor features 6 cycles latency for aes[enc|dec] 85392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# instructions, which can be scheduled every second cycle, Sandy 86392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Bridge spends 8 cycles per instruction, but it can schedule them 87392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# every cycle. This means that code targeting Westmere would perform 88392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# suboptimally on Sandy Bridge. Therefore this update. 89392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 90392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# In addition, non-parallelizable CBC encrypt (as well as CCM) is 91392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# optimized. Relative improvement might appear modest, 8% on Westmere, 92392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# but in absolute terms it's 3.77 cycles per byte encrypted with 93392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 128-bit key on Westmere, and 5.07 - on Sandy Bridge. These numbers 94392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# should be compared to asymptotic limits of 3.75 for Westmere and 95392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 5.00 for Sandy Bridge. Actually, the fact that they get this close 96392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# to asymptotic limits is quite amazing. Indeed, the limit is 97392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# calculated as latency times number of rounds, 10 for 128-bit key, 98392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# and divided by 16, the number of bytes in block, or in other words 99392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# it accounts *solely* for aesenc instructions. But there are extra 100392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# instructions, and numbers so close to the asymptotic limits mean 101392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# that it's as if it takes as little as *one* additional cycle to 102392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# execute all of them. How is it possible? It is possible thanks to 103392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# out-of-order execution logic, which manages to overlap post- 104392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# processing of previous block, things like saving the output, with 105392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# actual encryption of current block, as well as pre-processing of 106392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# current block, things like fetching input and xor-ing it with 107392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 0-round element of the key schedule, with actual encryption of 108392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# previous block. Keep this in mind... 109392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 110392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# For parallelizable modes, such as ECB, CBC decrypt, CTR, higher 111392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# performance is achieved by interleaving instructions working on 112392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# independent blocks. In which case asymptotic limit for such modes 113392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# can be obtained by dividing above mentioned numbers by AES 114392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# instructions' interleave factor. Westmere can execute at most 3 115392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# instructions at a time, meaning that optimal interleave factor is 3, 116392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# and that's where the "magic" number of 1.25 come from. "Optimal 117392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# interleave factor" means that increase of interleave factor does 118392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# not improve performance. The formula has proven to reflect reality 119392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# pretty well on Westmere... Sandy Bridge on the other hand can 120392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# execute up to 8 AES instructions at a time, so how does varying 121392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# interleave factor affect the performance? Here is table for ECB 122392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# (numbers are cycles per byte processed with 128-bit key): 123392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 124392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# instruction interleave factor 3x 6x 8x 125392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# theoretical asymptotic limit 1.67 0.83 0.625 126392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# measured performance for 8KB block 1.05 0.86 0.84 127392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 128392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# "as if" interleave factor 4.7x 5.8x 6.0x 129392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 130392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Further data for other parallelizable modes: 131392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 132392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# CBC decrypt 1.16 0.93 0.93 133392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# CTR 1.14 0.91 n/a 134392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 135392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Well, given 3x column it's probably inappropriate to call the limit 136392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# asymptotic, if it can be surpassed, isn't it? What happens there? 137392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Rewind to CBC paragraph for the answer. Yes, out-of-order execution 138392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# magic is responsible for this. Processor overlaps not only the 139392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# additional instructions with AES ones, but even AES instuctions 140392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# processing adjacent triplets of independent blocks. In the 6x case 141392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# additional instructions still claim disproportionally small amount 142392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# of additional cycles, but in 8x case number of instructions must be 143392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# a tad too high for out-of-order logic to cope with, and AES unit 144392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# remains underutilized... As you can see 8x interleave is hardly 145392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# justifiable, so there no need to feel bad that 32-bit aesni-x86.pl 146392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# utilizies 6x interleave because of limited register bank capacity. 147392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 148392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Higher interleave factors do have negative impact on Westmere 149392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# performance. While for ECB mode it's negligible ~1.5%, other 150392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# parallelizables perform ~5% worse, which is outweighed by ~25% 151392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# improvement on Sandy Bridge. To balance regression on Westmere 152392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# CTR mode was implemented with 6x aesenc interleave factor. 153392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 154392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# April 2011 155392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 156392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Add aesni_xts_[en|de]crypt. Westmere spends 1.33 cycles processing 157392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# one byte out of 8KB with 128-bit key, Sandy Bridge - 0.97. Just like 158392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# in CTR mode AES instruction interleave factor was chosen to be 6x. 159392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 160392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$PREFIX="aesni"; # if $PREFIX is set to "AES", the script 161392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # generates drop-in replacement for 162392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # crypto/aes/asm/aes-x86_64.pl:-) 163392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 164392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$flavour = shift; 165392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$output = shift; 166392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromif ($flavour =~ /\./) { $output = $flavour; undef $flavour; } 167392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 168392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); 169392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 170392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 171392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or 172392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or 173392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromdie "can't locate x86_64-xlate.pl"; 174392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 17504ef91b390dfcc6125913e2f2af502d23d7a5112Brian Carlstromopen OUT,"| \"$^X\" $xlate $flavour $output"; 17604ef91b390dfcc6125913e2f2af502d23d7a5112Brian Carlstrom*STDOUT=*OUT; 177392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 178392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$movkey = $PREFIX eq "aesni" ? "movups" : "movups"; 179392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom@_4args=$win64? ("%rcx","%rdx","%r8", "%r9") : # Win64 order 180392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ("%rdi","%rsi","%rdx","%rcx"); # Unix order 181392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 182392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code=".text\n"; 183392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 184392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$rounds="%eax"; # input to and changed by aesni_[en|de]cryptN !!! 185392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# this is natural Unix argument order for public $PREFIX_[ecb|cbc]_encrypt ... 186392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$inp="%rdi"; 187392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$out="%rsi"; 188392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$len="%rdx"; 189392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$key="%rcx"; # input to and changed by aesni_[en|de]cryptN !!! 190392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$ivp="%r8"; # cbc, ctr, ... 191392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 192392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$rnds_="%r10d"; # backup copy for $rounds 193392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$key_="%r11"; # backup copy for $key 194392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 195392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# %xmm register layout 196392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$rndkey0="%xmm0"; $rndkey1="%xmm1"; 197392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$inout0="%xmm2"; $inout1="%xmm3"; 198392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$inout2="%xmm4"; $inout3="%xmm5"; 199392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$inout4="%xmm6"; $inout5="%xmm7"; 200392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$inout6="%xmm8"; $inout7="%xmm9"; 201392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 202392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$in2="%xmm6"; $in1="%xmm7"; # used in CBC decrypt, CTR, ... 203392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$in0="%xmm8"; $iv="%xmm9"; 204392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 205392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Inline version of internal aesni_[en|de]crypt1. 206392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 207392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Why folded loop? Because aes[enc|dec] is slow enough to accommodate 208392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# cycles which take care of loop variables... 209392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom{ my $sn; 210392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromsub aesni_generate1 { 211392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy ($p,$key,$rounds,$inout,$ivec)=@_; $inout=$inout0 if (!defined($inout)); 212392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom++$sn; 213392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 214392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey ($key),$rndkey0 215392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey 16($key),$rndkey1 216392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 217392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if (defined($ivec)); 218392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey0,$ivec 219392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 32($key),$key 220392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $ivec,$inout 221392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 222392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if (!defined($ivec)); 223392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 32($key),$key 224392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey0,$inout 225392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 226392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 227392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Loop_${p}1_$sn: 228392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${p} $rndkey1,$inout 229392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom dec $rounds 230392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey ($key),$rndkey1 231392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16($key),$key 232392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jnz .Loop_${p}1_$sn # loop body is 16 bytes 233392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${p}last $rndkey1,$inout 234392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 235392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom}} 236392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# void $PREFIX_[en|de]crypt (const void *inp,void *out,const AES_KEY *key); 237392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 238392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom{ my ($inp,$out,$key) = @_4args; 239392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 240392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 241392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.globl ${PREFIX}_encrypt 242392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type ${PREFIX}_encrypt,\@abi-omnipotent 243392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 244392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom${PREFIX}_encrypt: 245392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups ($inp),$inout0 # load input 246392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 240($key),$rounds # key->rounds 247392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 248392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &aesni_generate1("enc",$key,$rounds); 249392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 250392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) # output 251392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 252392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size ${PREFIX}_encrypt,.-${PREFIX}_encrypt 253392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 254392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.globl ${PREFIX}_decrypt 255392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type ${PREFIX}_decrypt,\@abi-omnipotent 256392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 257392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom${PREFIX}_decrypt: 258392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups ($inp),$inout0 # load input 259392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 240($key),$rounds # key->rounds 260392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 261392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &aesni_generate1("dec",$key,$rounds); 262392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 263392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) # output 264392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 265392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size ${PREFIX}_decrypt, .-${PREFIX}_decrypt 266392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 267392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom} 268392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 269392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# _aesni_[en|de]cryptN are private interfaces, N denotes interleave 270392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# factor. Why 3x subroutine were originally used in loops? Even though 271392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# aes[enc|dec] latency was originally 6, it could be scheduled only 272392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# every *2nd* cycle. Thus 3x interleave was the one providing optimal 273392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# utilization, i.e. when subroutine's throughput is virtually same as 274392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# of non-interleaved subroutine [for number of input blocks up to 3]. 275392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# This is why it makes no sense to implement 2x subroutine. 276392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# aes[enc|dec] latency in next processor generation is 8, but the 277392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# instructions can be scheduled every cycle. Optimal interleave for 278392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# new processor is therefore 8x... 279392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromsub aesni_generate3 { 280392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy $dir=shift; 281392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# As already mentioned it takes in $key and $rounds, which are *not* 282392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# preserved. $inout[0-2] is cipher/clear text... 283392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 284392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type _aesni_${dir}rypt3,\@abi-omnipotent 285392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 286392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom_aesni_${dir}rypt3: 287392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey ($key),$rndkey0 288392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shr \$1,$rounds 289392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey 16($key),$rndkey1 290392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 32($key),$key 291392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey0,$inout0 292392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey0,$inout1 293392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey0,$inout2 294392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey ($key),$rndkey0 295392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 296392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.L${dir}_loop3: 297392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout0 298392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout1 299392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom dec $rounds 300392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout2 301392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey 16($key),$rndkey1 302392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey0,$inout0 303392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey0,$inout1 304392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 32($key),$key 305392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey0,$inout2 306392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey ($key),$rndkey0 307392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jnz .L${dir}_loop3 308392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 309392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout0 310392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout1 311392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout2 312392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir}last $rndkey0,$inout0 313392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir}last $rndkey0,$inout1 314392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir}last $rndkey0,$inout2 315392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 316392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size _aesni_${dir}rypt3,.-_aesni_${dir}rypt3 317392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 318392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom} 319392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 4x interleave is implemented to improve small block performance, 320392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# most notably [and naturally] 4 block by ~30%. One can argue that one 321392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# should have implemented 5x as well, but improvement would be <20%, 322392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# so it's not worth it... 323392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromsub aesni_generate4 { 324392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy $dir=shift; 325392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# As already mentioned it takes in $key and $rounds, which are *not* 326392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# preserved. $inout[0-3] is cipher/clear text... 327392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 328392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type _aesni_${dir}rypt4,\@abi-omnipotent 329392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 330392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom_aesni_${dir}rypt4: 331392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey ($key),$rndkey0 332392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shr \$1,$rounds 333392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey 16($key),$rndkey1 334392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 32($key),$key 335392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey0,$inout0 336392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey0,$inout1 337392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey0,$inout2 338392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey0,$inout3 339392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey ($key),$rndkey0 340392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 341392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.L${dir}_loop4: 342392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout0 343392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout1 344392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom dec $rounds 345392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout2 346392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout3 347392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey 16($key),$rndkey1 348392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey0,$inout0 349392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey0,$inout1 350392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 32($key),$key 351392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey0,$inout2 352392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey0,$inout3 353392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey ($key),$rndkey0 354392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jnz .L${dir}_loop4 355392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 356392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout0 357392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout1 358392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout2 359392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout3 360392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir}last $rndkey0,$inout0 361392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir}last $rndkey0,$inout1 362392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir}last $rndkey0,$inout2 363392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir}last $rndkey0,$inout3 364392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 365392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size _aesni_${dir}rypt4,.-_aesni_${dir}rypt4 366392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 367392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom} 368392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromsub aesni_generate6 { 369392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy $dir=shift; 370392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# As already mentioned it takes in $key and $rounds, which are *not* 371392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# preserved. $inout[0-5] is cipher/clear text... 372392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 373392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type _aesni_${dir}rypt6,\@abi-omnipotent 374392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 375392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom_aesni_${dir}rypt6: 376392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey ($key),$rndkey0 377392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shr \$1,$rounds 378392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey 16($key),$rndkey1 379392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 32($key),$key 380392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey0,$inout0 381392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $rndkey0,$inout1 382392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout0 383392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $rndkey0,$inout2 384392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout1 385392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $rndkey0,$inout3 386392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout2 387392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $rndkey0,$inout4 388392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout3 389392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $rndkey0,$inout5 390392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom dec $rounds 391392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout4 392392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey ($key),$rndkey0 393392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout5 394392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .L${dir}_loop6_enter 395392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 396392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.L${dir}_loop6: 397392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout0 398392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout1 399392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom dec $rounds 400392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout2 401392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout3 402392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout4 403392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout5 404392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.L${dir}_loop6_enter: # happens to be 16-byte aligned 405392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey 16($key),$rndkey1 406392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey0,$inout0 407392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey0,$inout1 408392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 32($key),$key 409392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey0,$inout2 410392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey0,$inout3 411392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey0,$inout4 412392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey0,$inout5 413392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey ($key),$rndkey0 414392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jnz .L${dir}_loop6 415392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 416392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout0 417392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout1 418392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout2 419392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout3 420392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout4 421392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout5 422392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir}last $rndkey0,$inout0 423392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir}last $rndkey0,$inout1 424392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir}last $rndkey0,$inout2 425392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir}last $rndkey0,$inout3 426392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir}last $rndkey0,$inout4 427392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir}last $rndkey0,$inout5 428392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 429392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size _aesni_${dir}rypt6,.-_aesni_${dir}rypt6 430392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 431392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom} 432392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromsub aesni_generate8 { 433392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy $dir=shift; 434392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# As already mentioned it takes in $key and $rounds, which are *not* 435392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# preserved. $inout[0-7] is cipher/clear text... 436392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 437392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type _aesni_${dir}rypt8,\@abi-omnipotent 438392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 439392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom_aesni_${dir}rypt8: 440392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey ($key),$rndkey0 441392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shr \$1,$rounds 442392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey 16($key),$rndkey1 443392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 32($key),$key 444392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey0,$inout0 445392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey0,$inout1 446392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout0 447392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $rndkey0,$inout2 448392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout1 449392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $rndkey0,$inout3 450392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout2 451392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $rndkey0,$inout4 452392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout3 453392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $rndkey0,$inout5 454392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom dec $rounds 455392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout4 456392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $rndkey0,$inout6 457392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout5 458392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $rndkey0,$inout7 459392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey ($key),$rndkey0 460392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout6 461392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout7 462392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey 16($key),$rndkey1 463392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .L${dir}_loop8_enter 464392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 465392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.L${dir}_loop8: 466392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout0 467392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout1 468392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom dec $rounds 469392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout2 470392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout3 471392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout4 472392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout5 473392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout6 474392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout7 475392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey 16($key),$rndkey1 476392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.L${dir}_loop8_enter: # happens to be 16-byte aligned 477392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey0,$inout0 478392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey0,$inout1 479392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 32($key),$key 480392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey0,$inout2 481392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey0,$inout3 482392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey0,$inout4 483392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey0,$inout5 484392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey0,$inout6 485392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey0,$inout7 486392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey ($key),$rndkey0 487392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jnz .L${dir}_loop8 488392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 489392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout0 490392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout1 491392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout2 492392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout3 493392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout4 494392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout5 495392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout6 496392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir} $rndkey1,$inout7 497392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir}last $rndkey0,$inout0 498392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir}last $rndkey0,$inout1 499392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir}last $rndkey0,$inout2 500392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir}last $rndkey0,$inout3 501392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir}last $rndkey0,$inout4 502392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir}last $rndkey0,$inout5 503392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir}last $rndkey0,$inout6 504392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aes${dir}last $rndkey0,$inout7 505392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 506392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size _aesni_${dir}rypt8,.-_aesni_${dir}rypt8 507392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 508392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom} 509392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom&aesni_generate3("enc") if ($PREFIX eq "aesni"); 510392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom&aesni_generate3("dec"); 511392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom&aesni_generate4("enc") if ($PREFIX eq "aesni"); 512392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom&aesni_generate4("dec"); 513392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom&aesni_generate6("enc") if ($PREFIX eq "aesni"); 514392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom&aesni_generate6("dec"); 515392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom&aesni_generate8("enc") if ($PREFIX eq "aesni"); 516392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom&aesni_generate8("dec"); 517392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 518392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromif ($PREFIX eq "aesni") { 519392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom######################################################################## 520392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# void aesni_ecb_encrypt (const void *in, void *out, 521392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# size_t length, const AES_KEY *key, 522392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# int enc); 523392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 524392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.globl aesni_ecb_encrypt 525392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type aesni_ecb_encrypt,\@function,5 526392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 527392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromaesni_ecb_encrypt: 528392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom and \$-16,$len 529392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jz .Lecb_ret 530392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 531392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 240($key),$rounds # key->rounds 532392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey ($key),$rndkey0 533392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $key,$key_ # backup $key 534392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $rounds,$rnds_ # backup $rounds 535392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom test %r8d,%r8d # 5th argument 536392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jz .Lecb_decrypt 537392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#--------------------------- ECB ENCRYPT ------------------------------# 538392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp \$0x80,$len 539392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jb .Lecb_enc_tail 540392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 541392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu ($inp),$inout0 542392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 0x10($inp),$inout1 543392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 0x20($inp),$inout2 544392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 0x30($inp),$inout3 545392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 0x40($inp),$inout4 546392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 0x50($inp),$inout5 547392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 0x60($inp),$inout6 548392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 0x70($inp),$inout7 549392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0x80($inp),$inp 550392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$0x80,$len 551392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lecb_enc_loop8_enter 552392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 553392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lecb_enc_loop8: 554392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) 555392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $key_,$key # restore $key 556392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu ($inp),$inout0 557392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $rnds_,$rounds # restore $rounds 558392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout1,0x10($out) 559392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 0x10($inp),$inout1 560392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout2,0x20($out) 561392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 0x20($inp),$inout2 562392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout3,0x30($out) 563392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 0x30($inp),$inout3 564392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout4,0x40($out) 565392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 0x40($inp),$inout4 566392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout5,0x50($out) 567392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 0x50($inp),$inout5 568392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout6,0x60($out) 569392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 0x60($inp),$inout6 570392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout7,0x70($out) 571392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0x80($out),$out 572392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 0x70($inp),$inout7 573392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0x80($inp),$inp 574392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lecb_enc_loop8_enter: 575392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 576392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _aesni_encrypt8 577392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 578392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$0x80,$len 579392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jnc .Lecb_enc_loop8 580392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 581392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) 582392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $key_,$key # restore $key 583392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout1,0x10($out) 584392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $rnds_,$rounds # restore $rounds 585392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout2,0x20($out) 586392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout3,0x30($out) 587392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout4,0x40($out) 588392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout5,0x50($out) 589392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout6,0x60($out) 590392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout7,0x70($out) 591392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0x80($out),$out 592392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom add \$0x80,$len 593392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jz .Lecb_ret 594392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 595392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lecb_enc_tail: 596392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups ($inp),$inout0 597392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp \$0x20,$len 598392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jb .Lecb_enc_one 599392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x10($inp),$inout1 600392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom je .Lecb_enc_two 601392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x20($inp),$inout2 602392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp \$0x40,$len 603392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jb .Lecb_enc_three 604392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x30($inp),$inout3 605392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom je .Lecb_enc_four 606392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x40($inp),$inout4 607392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp \$0x60,$len 608392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jb .Lecb_enc_five 609392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x50($inp),$inout5 610392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom je .Lecb_enc_six 611392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 0x60($inp),$inout6 612392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _aesni_encrypt8 613392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) 614392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout1,0x10($out) 615392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout2,0x20($out) 616392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout3,0x30($out) 617392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout4,0x40($out) 618392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout5,0x50($out) 619392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout6,0x60($out) 620392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lecb_ret 621392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 622392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lecb_enc_one: 623392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 624392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &aesni_generate1("enc",$key,$rounds); 625392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 626392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) 627392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lecb_ret 628392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 629392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lecb_enc_two: 630392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $inout2,$inout2 631392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _aesni_encrypt3 632392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) 633392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout1,0x10($out) 634392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lecb_ret 635392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 636392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lecb_enc_three: 637392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _aesni_encrypt3 638392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) 639392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout1,0x10($out) 640392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout2,0x20($out) 641392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lecb_ret 642392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 643392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lecb_enc_four: 644392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _aesni_encrypt4 645392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) 646392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout1,0x10($out) 647392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout2,0x20($out) 648392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout3,0x30($out) 649392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lecb_ret 650392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 651392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lecb_enc_five: 652392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $inout5,$inout5 653392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _aesni_encrypt6 654392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) 655392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout1,0x10($out) 656392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout2,0x20($out) 657392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout3,0x30($out) 658392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout4,0x40($out) 659392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lecb_ret 660392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 661392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lecb_enc_six: 662392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _aesni_encrypt6 663392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) 664392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout1,0x10($out) 665392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout2,0x20($out) 666392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout3,0x30($out) 667392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout4,0x40($out) 668392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout5,0x50($out) 669392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lecb_ret 670392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#--------------------------- ECB DECRYPT ------------------------------# 671392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 672392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lecb_decrypt: 673392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp \$0x80,$len 674392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jb .Lecb_dec_tail 675392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 676392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu ($inp),$inout0 677392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 0x10($inp),$inout1 678392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 0x20($inp),$inout2 679392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 0x30($inp),$inout3 680392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 0x40($inp),$inout4 681392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 0x50($inp),$inout5 682392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 0x60($inp),$inout6 683392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 0x70($inp),$inout7 684392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0x80($inp),$inp 685392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$0x80,$len 686392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lecb_dec_loop8_enter 687392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 688392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lecb_dec_loop8: 689392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) 690392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $key_,$key # restore $key 691392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu ($inp),$inout0 692392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $rnds_,$rounds # restore $rounds 693392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout1,0x10($out) 694392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 0x10($inp),$inout1 695392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout2,0x20($out) 696392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 0x20($inp),$inout2 697392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout3,0x30($out) 698392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 0x30($inp),$inout3 699392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout4,0x40($out) 700392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 0x40($inp),$inout4 701392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout5,0x50($out) 702392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 0x50($inp),$inout5 703392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout6,0x60($out) 704392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 0x60($inp),$inout6 705392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout7,0x70($out) 706392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0x80($out),$out 707392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 0x70($inp),$inout7 708392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0x80($inp),$inp 709392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lecb_dec_loop8_enter: 710392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 711392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _aesni_decrypt8 712392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 713392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey ($key_),$rndkey0 714392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$0x80,$len 715392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jnc .Lecb_dec_loop8 716392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 717392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) 718392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $key_,$key # restore $key 719392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout1,0x10($out) 720392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $rnds_,$rounds # restore $rounds 721392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout2,0x20($out) 722392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout3,0x30($out) 723392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout4,0x40($out) 724392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout5,0x50($out) 725392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout6,0x60($out) 726392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout7,0x70($out) 727392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0x80($out),$out 728392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom add \$0x80,$len 729392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jz .Lecb_ret 730392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 731392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lecb_dec_tail: 732392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups ($inp),$inout0 733392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp \$0x20,$len 734392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jb .Lecb_dec_one 735392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x10($inp),$inout1 736392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom je .Lecb_dec_two 737392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x20($inp),$inout2 738392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp \$0x40,$len 739392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jb .Lecb_dec_three 740392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x30($inp),$inout3 741392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom je .Lecb_dec_four 742392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x40($inp),$inout4 743392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp \$0x60,$len 744392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jb .Lecb_dec_five 745392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x50($inp),$inout5 746392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom je .Lecb_dec_six 747392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x60($inp),$inout6 748392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey ($key),$rndkey0 749392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _aesni_decrypt8 750392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) 751392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout1,0x10($out) 752392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout2,0x20($out) 753392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout3,0x30($out) 754392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout4,0x40($out) 755392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout5,0x50($out) 756392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout6,0x60($out) 757392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lecb_ret 758392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 759392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lecb_dec_one: 760392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 761392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &aesni_generate1("dec",$key,$rounds); 762392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 763392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) 764392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lecb_ret 765392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 766392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lecb_dec_two: 767392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $inout2,$inout2 768392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _aesni_decrypt3 769392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) 770392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout1,0x10($out) 771392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lecb_ret 772392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 773392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lecb_dec_three: 774392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _aesni_decrypt3 775392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) 776392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout1,0x10($out) 777392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout2,0x20($out) 778392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lecb_ret 779392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 780392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lecb_dec_four: 781392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _aesni_decrypt4 782392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) 783392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout1,0x10($out) 784392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout2,0x20($out) 785392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout3,0x30($out) 786392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lecb_ret 787392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 788392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lecb_dec_five: 789392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $inout5,$inout5 790392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _aesni_decrypt6 791392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) 792392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout1,0x10($out) 793392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout2,0x20($out) 794392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout3,0x30($out) 795392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout4,0x40($out) 796392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lecb_ret 797392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 798392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lecb_dec_six: 799392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _aesni_decrypt6 800392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) 801392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout1,0x10($out) 802392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout2,0x20($out) 803392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout3,0x30($out) 804392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout4,0x40($out) 805392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout5,0x50($out) 806392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 807392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lecb_ret: 808392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 809392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size aesni_ecb_encrypt,.-aesni_ecb_encrypt 810392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 811392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 812392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom{ 813392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom###################################################################### 814392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# void aesni_ccm64_[en|de]crypt_blocks (const void *in, void *out, 815392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# size_t blocks, const AES_KEY *key, 816392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# const char *ivec,char *cmac); 817392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 818392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Handles only complete blocks, operates on 64-bit counter and 819392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# does not update *ivec! Nor does it finalize CMAC value 820392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# (see engine/eng_aesni.c for details) 821392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 822392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom{ 823392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy $cmac="%r9"; # 6th argument 824392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 825392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy $increment="%xmm6"; 826392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy $bswap_mask="%xmm7"; 827392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 828392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 829392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.globl aesni_ccm64_encrypt_blocks 830392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type aesni_ccm64_encrypt_blocks,\@function,6 831392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 832392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromaesni_ccm64_encrypt_blocks: 833392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 834392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($win64); 835392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea -0x58(%rsp),%rsp 836392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm6,(%rsp) 837392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm7,0x10(%rsp) 838392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm8,0x20(%rsp) 839392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm9,0x30(%rsp) 840392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lccm64_enc_body: 841392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 842392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 843392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 240($key),$rounds # key->rounds 844392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu ($ivp),$iv 845392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa .Lincrement64(%rip),$increment 846392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa .Lbswap_mask(%rip),$bswap_mask 847392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 848392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shr \$1,$rounds 849392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0($key),$key_ 850392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu ($cmac),$inout1 851392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa $iv,$inout0 852392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $rounds,$rnds_ 853392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb $bswap_mask,$iv 854392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lccm64_enc_outer 855392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 856392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lccm64_enc_outer: 857392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey ($key_),$rndkey0 858392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $rnds_,$rounds 859392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups ($inp),$in0 # load inp 860392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 861392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey0,$inout0 # counter 862392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey 16($key_),$rndkey1 863392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $in0,$rndkey0 864392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 32($key_),$key 865392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey0,$inout1 # cmac^=inp 866392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey ($key),$rndkey0 867392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 868392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lccm64_enc2_loop: 869392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout0 870392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom dec $rounds 871392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout1 872392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey 16($key),$rndkey1 873392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey0,$inout0 874392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 32($key),$key 875392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey0,$inout1 876392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey 0($key),$rndkey0 877392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jnz .Lccm64_enc2_loop 878392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout0 879392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout1 880392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom paddq $increment,$iv 881392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenclast $rndkey0,$inout0 882392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenclast $rndkey0,$inout1 883392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 884392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom dec $len 885392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16($inp),$inp 886392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $inout0,$in0 # inp ^= E(iv) 887392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa $iv,$inout0 888392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $in0,($out) # save output 889392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16($out),$out 890392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb $bswap_mask,$inout0 891392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jnz .Lccm64_enc_outer 892392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 893392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout1,($cmac) 894392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 895392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($win64); 896392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps (%rsp),%xmm6 897392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x10(%rsp),%xmm7 898392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x20(%rsp),%xmm8 899392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x30(%rsp),%xmm9 900392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0x58(%rsp),%rsp 901392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lccm64_enc_ret: 902392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 903392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 904392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 905392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks 906392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 907392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom###################################################################### 908392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 909392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.globl aesni_ccm64_decrypt_blocks 910392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type aesni_ccm64_decrypt_blocks,\@function,6 911392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 912392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromaesni_ccm64_decrypt_blocks: 913392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 914392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($win64); 915392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea -0x58(%rsp),%rsp 916392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm6,(%rsp) 917392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm7,0x10(%rsp) 918392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm8,0x20(%rsp) 919392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm9,0x30(%rsp) 920392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lccm64_dec_body: 921392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 922392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 923392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 240($key),$rounds # key->rounds 924392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups ($ivp),$iv 925392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu ($cmac),$inout1 926392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa .Lincrement64(%rip),$increment 927392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa .Lbswap_mask(%rip),$bswap_mask 928392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 929392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps $iv,$inout0 930392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $rounds,$rnds_ 931392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $key,$key_ 932392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb $bswap_mask,$iv 933392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 934392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &aesni_generate1("enc",$key,$rounds); 935392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 936392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups ($inp),$in0 # load inp 937392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom paddq $increment,$iv 938392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16($inp),$inp 939392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lccm64_dec_outer 940392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 941392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lccm64_dec_outer: 942392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $inout0,$in0 # inp ^= E(iv) 943392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa $iv,$inout0 944392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $rnds_,$rounds 945392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $in0,($out) # save output 946392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16($out),$out 947392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb $bswap_mask,$inout0 948392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 949392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$1,$len 950392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jz .Lccm64_dec_break 951392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 952392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey ($key_),$rndkey0 953392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shr \$1,$rounds 954392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey 16($key_),$rndkey1 955392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey0,$in0 956392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 32($key_),$key 957392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey0,$inout0 958392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $in0,$inout1 # cmac^=out 959392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey ($key),$rndkey0 960392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 961392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lccm64_dec2_loop: 962392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout0 963392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom dec $rounds 964392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout1 965392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey 16($key),$rndkey1 966392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey0,$inout0 967392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 32($key),$key 968392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey0,$inout1 969392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey 0($key),$rndkey0 970392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jnz .Lccm64_dec2_loop 971392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups ($inp),$in0 # load inp 972392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom paddq $increment,$iv 973392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout0 974392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout1 975392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16($inp),$inp 976392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenclast $rndkey0,$inout0 977392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenclast $rndkey0,$inout1 978392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lccm64_dec_outer 979392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 980392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 981392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lccm64_dec_break: 982392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom #xorps $in0,$inout1 # cmac^=out 983392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 984392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &aesni_generate1("enc",$key_,$rounds,$inout1,$in0); 985392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 986392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout1,($cmac) 987392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 988392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($win64); 989392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps (%rsp),%xmm6 990392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x10(%rsp),%xmm7 991392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x20(%rsp),%xmm8 992392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x30(%rsp),%xmm9 993392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0x58(%rsp),%rsp 994392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lccm64_dec_ret: 995392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 996392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 997392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 998392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks 999392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 1000392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom} 1001392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom###################################################################### 1002392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# void aesni_ctr32_encrypt_blocks (const void *in, void *out, 1003392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# size_t blocks, const AES_KEY *key, 1004392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# const char *ivec); 1005392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 1006392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Handles only complete blocks, operates on 32-bit counter and 1007392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# does not update *ivec! (see engine/eng_aesni.c for details) 1008392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 1009392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom{ 1010392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy $reserved = $win64?0:-0x28; 1011392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy ($in0,$in1,$in2,$in3)=map("%xmm$_",(8..11)); 1012392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy ($iv0,$iv1,$ivec)=("%xmm12","%xmm13","%xmm14"); 1013392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy $bswap_mask="%xmm15"; 1014392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1015392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 1016392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.globl aesni_ctr32_encrypt_blocks 1017392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type aesni_ctr32_encrypt_blocks,\@function,5 1018392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 1019392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromaesni_ctr32_encrypt_blocks: 1020392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 1021392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($win64); 1022392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea -0xc8(%rsp),%rsp 1023392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm6,0x20(%rsp) 1024392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm7,0x30(%rsp) 1025392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm8,0x40(%rsp) 1026392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm9,0x50(%rsp) 1027392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm10,0x60(%rsp) 1028392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm11,0x70(%rsp) 1029392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm12,0x80(%rsp) 1030392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm13,0x90(%rsp) 1031392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm14,0xa0(%rsp) 1032392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm15,0xb0(%rsp) 1033392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lctr32_body: 1034392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 1035392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 1036392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp \$1,$len 1037392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom je .Lctr32_one_shortcut 1038392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1039392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu ($ivp),$ivec 1040392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa .Lbswap_mask(%rip),$bswap_mask 1041392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xor $rounds,$rounds 1042392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pextrd \$3,$ivec,$rnds_ # pull 32-bit counter 1043392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pinsrd \$3,$rounds,$ivec # wipe 32-bit counter 1044392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1045392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 240($key),$rounds # key->rounds 1046392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom bswap $rnds_ 1047392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $iv0,$iv0 # vector of 3 32-bit counters 1048392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $iv1,$iv1 # vector of 3 32-bit counters 1049392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pinsrd \$0,$rnds_,$iv0 1050392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 3($rnds_),$key_ 1051392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pinsrd \$0,$key_,$iv1 1052392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom inc $rnds_ 1053392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pinsrd \$1,$rnds_,$iv0 1054392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom inc $key_ 1055392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pinsrd \$1,$key_,$iv1 1056392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom inc $rnds_ 1057392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pinsrd \$2,$rnds_,$iv0 1058392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom inc $key_ 1059392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pinsrd \$2,$key_,$iv1 1060392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa $iv0,$reserved(%rsp) 1061392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb $bswap_mask,$iv0 1062392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa $iv1,`$reserved+0x10`(%rsp) 1063392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb $bswap_mask,$iv1 1064392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1065392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$`3<<6`,$iv0,$inout0 # place counter to upper dword 1066392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$`2<<6`,$iv0,$inout1 1067392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$`1<<6`,$iv0,$inout2 1068392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp \$6,$len 1069392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jb .Lctr32_tail 1070392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shr \$1,$rounds 1071392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $key,$key_ # backup $key 1072392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $rounds,$rnds_ # backup $rounds 1073392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$6,$len 1074392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lctr32_loop6 1075392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1076392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 1077392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lctr32_loop6: 1078392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$`3<<6`,$iv1,$inout3 1079392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom por $ivec,$inout0 # merge counter-less ivec 1080392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey ($key_),$rndkey0 1081392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$`2<<6`,$iv1,$inout4 1082392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom por $ivec,$inout1 1083392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey 16($key_),$rndkey1 1084392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$`1<<6`,$iv1,$inout5 1085392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom por $ivec,$inout2 1086392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom por $ivec,$inout3 1087392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey0,$inout0 1088392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom por $ivec,$inout4 1089392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom por $ivec,$inout5 1090392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1091392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # inline _aesni_encrypt6 and interleave last rounds 1092392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # with own code... 1093392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1094392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $rndkey0,$inout1 1095392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout0 1096392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 32($key_),$key 1097392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $rndkey0,$inout2 1098392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout1 1099392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa .Lincrement32(%rip),$iv1 1100392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $rndkey0,$inout3 1101392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout2 1102392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa $reserved(%rsp),$iv0 1103392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $rndkey0,$inout4 1104392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout3 1105392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $rndkey0,$inout5 1106392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey ($key),$rndkey0 1107392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom dec $rounds 1108392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout4 1109392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout5 1110392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lctr32_enc_loop6_enter 1111392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 1112392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lctr32_enc_loop6: 1113392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout0 1114392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout1 1115392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom dec $rounds 1116392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout2 1117392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout3 1118392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout4 1119392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout5 1120392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lctr32_enc_loop6_enter: 1121392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey 16($key),$rndkey1 1122392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey0,$inout0 1123392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey0,$inout1 1124392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 32($key),$key 1125392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey0,$inout2 1126392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey0,$inout3 1127392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey0,$inout4 1128392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey0,$inout5 1129392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey ($key),$rndkey0 1130392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jnz .Lctr32_enc_loop6 1131392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1132392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout0 1133392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom paddd $iv1,$iv0 # increment counter vector 1134392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout1 1135392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom paddd `$reserved+0x10`(%rsp),$iv1 1136392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout2 1137392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa $iv0,$reserved(%rsp) # save counter vector 1138392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout3 1139392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa $iv1,`$reserved+0x10`(%rsp) 1140392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout4 1141392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb $bswap_mask,$iv0 # byte swap 1142392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout5 1143392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb $bswap_mask,$iv1 1144392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1145392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenclast $rndkey0,$inout0 1146392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups ($inp),$in0 # load input 1147392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenclast $rndkey0,$inout1 1148392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x10($inp),$in1 1149392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenclast $rndkey0,$inout2 1150392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x20($inp),$in2 1151392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenclast $rndkey0,$inout3 1152392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x30($inp),$in3 1153392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenclast $rndkey0,$inout4 1154392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x40($inp),$rndkey1 1155392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenclast $rndkey0,$inout5 1156392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x50($inp),$rndkey0 1157392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0x60($inp),$inp 1158392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1159392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $inout0,$in0 # xor 1160392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$`3<<6`,$iv0,$inout0 1161392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $inout1,$in1 1162392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$`2<<6`,$iv0,$inout1 1163392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $in0,($out) # store output 1164392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $inout2,$in2 1165392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$`1<<6`,$iv0,$inout2 1166392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $in1,0x10($out) 1167392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $inout3,$in3 1168392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $in2,0x20($out) 1169392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $inout4,$rndkey1 1170392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $in3,0x30($out) 1171392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $inout5,$rndkey0 1172392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $rndkey1,0x40($out) 1173392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $rndkey0,0x50($out) 1174392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0x60($out),$out 1175392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $rnds_,$rounds 1176392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$6,$len 1177392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jnc .Lctr32_loop6 1178392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1179392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom add \$6,$len 1180392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jz .Lctr32_done 1181392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $key_,$key # restore $key 1182392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 1($rounds,$rounds),$rounds # restore original value 1183392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1184392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lctr32_tail: 1185392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom por $ivec,$inout0 1186392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups ($inp),$in0 1187392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp \$2,$len 1188392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jb .Lctr32_one 1189392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1190392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom por $ivec,$inout1 1191392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x10($inp),$in1 1192392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom je .Lctr32_two 1193392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1194392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$`3<<6`,$iv1,$inout3 1195392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom por $ivec,$inout2 1196392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x20($inp),$in2 1197392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp \$4,$len 1198392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jb .Lctr32_three 1199392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1200392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$`2<<6`,$iv1,$inout4 1201392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom por $ivec,$inout3 1202392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x30($inp),$in3 1203392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom je .Lctr32_four 1204392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1205392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom por $ivec,$inout4 1206392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $inout5,$inout5 1207392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1208392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _aesni_encrypt6 1209392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1210392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x40($inp),$rndkey1 1211392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $inout0,$in0 1212392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $inout1,$in1 1213392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $in0,($out) 1214392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $inout2,$in2 1215392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $in1,0x10($out) 1216392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $inout3,$in3 1217392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $in2,0x20($out) 1218392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $inout4,$rndkey1 1219392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $in3,0x30($out) 1220392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $rndkey1,0x40($out) 1221392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lctr32_done 1222392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1223392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 1224392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lctr32_one_shortcut: 1225392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups ($ivp),$inout0 1226392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups ($inp),$in0 1227392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 240($key),$rounds # key->rounds 1228392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lctr32_one: 1229392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 1230392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &aesni_generate1("enc",$key,$rounds); 1231392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 1232392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $inout0,$in0 1233392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $in0,($out) 1234392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lctr32_done 1235392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1236392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 1237392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lctr32_two: 1238392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $inout2,$inout2 1239392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _aesni_encrypt3 1240392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $inout0,$in0 1241392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $inout1,$in1 1242392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $in0,($out) 1243392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $in1,0x10($out) 1244392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lctr32_done 1245392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1246392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 1247392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lctr32_three: 1248392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _aesni_encrypt3 1249392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $inout0,$in0 1250392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $inout1,$in1 1251392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $in0,($out) 1252392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $inout2,$in2 1253392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $in1,0x10($out) 1254392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $in2,0x20($out) 1255392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lctr32_done 1256392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1257392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 1258392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lctr32_four: 1259392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _aesni_encrypt4 1260392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $inout0,$in0 1261392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $inout1,$in1 1262392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $in0,($out) 1263392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $inout2,$in2 1264392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $in1,0x10($out) 1265392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $inout3,$in3 1266392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $in2,0x20($out) 1267392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $in3,0x30($out) 1268392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1269392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lctr32_done: 1270392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 1271392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($win64); 1272392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x20(%rsp),%xmm6 1273392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x30(%rsp),%xmm7 1274392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x40(%rsp),%xmm8 1275392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x50(%rsp),%xmm9 1276392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x60(%rsp),%xmm10 1277392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x70(%rsp),%xmm11 1278392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x80(%rsp),%xmm12 1279392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x90(%rsp),%xmm13 1280392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0xa0(%rsp),%xmm14 1281392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0xb0(%rsp),%xmm15 1282392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0xc8(%rsp),%rsp 1283392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lctr32_ret: 1284392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 1285392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 1286392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 1287392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks 1288392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 1289392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom} 1290392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1291392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom###################################################################### 1292392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# void aesni_xts_[en|de]crypt(const char *inp,char *out,size_t len, 1293392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# const AES_KEY *key1, const AES_KEY *key2 1294392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# const unsigned char iv[16]); 1295392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 1296392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom{ 1297392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy @tweak=map("%xmm$_",(10..15)); 1298392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy ($twmask,$twres,$twtmp)=("%xmm8","%xmm9",@tweak[4]); 1299392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy ($key2,$ivp,$len_)=("%r8","%r9","%r9"); 1300392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy $frame_size = 0x68 + ($win64?160:0); 1301392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1302392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 1303392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.globl aesni_xts_encrypt 1304392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type aesni_xts_encrypt,\@function,6 1305392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 1306392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromaesni_xts_encrypt: 1307392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea -$frame_size(%rsp),%rsp 1308392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 1309392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($win64); 1310392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm6,0x60(%rsp) 1311392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm7,0x70(%rsp) 1312392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm8,0x80(%rsp) 1313392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm9,0x90(%rsp) 1314392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm10,0xa0(%rsp) 1315392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm11,0xb0(%rsp) 1316392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm12,0xc0(%rsp) 1317392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm13,0xd0(%rsp) 1318392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm14,0xe0(%rsp) 1319392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm15,0xf0(%rsp) 1320392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lxts_enc_body: 1321392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 1322392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 1323392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups ($ivp),@tweak[5] # load clear-text tweak 1324392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 240(%r8),$rounds # key2->rounds 1325392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 240($key),$rnds_ # key1->rounds 1326392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 1327392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # generate the tweak 1328392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &aesni_generate1("enc",$key2,$rounds,@tweak[5]); 1329392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 1330392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $key,$key_ # backup $key 1331392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $rnds_,$rounds # backup $rounds 1332392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $len,$len_ # backup $len 1333392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom and \$-16,$len 1334392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1335392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa .Lxts_magic(%rip),$twmask 1336392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $twtmp,$twtmp 1337392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pcmpgtd @tweak[5],$twtmp # broadcast upper bits 1338392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 1339392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom for ($i=0;$i<4;$i++) { 1340392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $code.=<<___; 1341392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$0x13,$twtmp,$twres 1342392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $twtmp,$twtmp 1343392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[5],@tweak[$i] 1344392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom paddq @tweak[5],@tweak[5] # psllq 1,$tweak 1345392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pand $twmask,$twres # isolate carry and residue 1346392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pcmpgtd @tweak[5],$twtmp # broadcat upper bits 1347392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $twres,@tweak[5] 1348392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 1349392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom } 1350392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 1351392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$16*6,$len 1352392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jc .Lxts_enc_short 1353392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1354392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shr \$1,$rounds 1355392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$1,$rounds 1356392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $rounds,$rnds_ 1357392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lxts_enc_grandloop 1358392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1359392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 1360392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lxts_enc_grandloop: 1361392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$0x13,$twtmp,$twres 1362392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[5],@tweak[4] 1363392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom paddq @tweak[5],@tweak[5] # psllq 1,$tweak 1364392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu `16*0`($inp),$inout0 # load input 1365392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pand $twmask,$twres # isolate carry and residue 1366392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu `16*1`($inp),$inout1 1367392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $twres,@tweak[5] 1368392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1369392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu `16*2`($inp),$inout2 1370392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor @tweak[0],$inout0 # input^=tweak 1371392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu `16*3`($inp),$inout3 1372392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor @tweak[1],$inout1 1373392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu `16*4`($inp),$inout4 1374392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor @tweak[2],$inout2 1375392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu `16*5`($inp),$inout5 1376392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea `16*6`($inp),$inp 1377392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor @tweak[3],$inout3 1378392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey ($key_),$rndkey0 1379392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor @tweak[4],$inout4 1380392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor @tweak[5],$inout5 1381392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1382392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # inline _aesni_encrypt6 and interleave first and last rounds 1383392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # with own code... 1384392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey 16($key_),$rndkey1 1385392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $rndkey0,$inout0 1386392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $rndkey0,$inout1 1387392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[0],`16*0`(%rsp) # put aside tweaks 1388392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout0 1389392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 32($key_),$key 1390392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $rndkey0,$inout2 1391392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[1],`16*1`(%rsp) 1392392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout1 1393392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $rndkey0,$inout3 1394392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[2],`16*2`(%rsp) 1395392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout2 1396392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $rndkey0,$inout4 1397392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[3],`16*3`(%rsp) 1398392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout3 1399392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $rndkey0,$inout5 1400392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey ($key),$rndkey0 1401392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom dec $rounds 1402392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[4],`16*4`(%rsp) 1403392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout4 1404392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[5],`16*5`(%rsp) 1405392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout5 1406392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $twtmp,$twtmp 1407392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pcmpgtd @tweak[5],$twtmp 1408392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lxts_enc_loop6_enter 1409392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1410392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 1411392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lxts_enc_loop6: 1412392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout0 1413392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout1 1414392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom dec $rounds 1415392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout2 1416392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout3 1417392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout4 1418392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout5 1419392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lxts_enc_loop6_enter: 1420392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey 16($key),$rndkey1 1421392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey0,$inout0 1422392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey0,$inout1 1423392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 32($key),$key 1424392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey0,$inout2 1425392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey0,$inout3 1426392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey0,$inout4 1427392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey0,$inout5 1428392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey ($key),$rndkey0 1429392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jnz .Lxts_enc_loop6 1430392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1431392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$0x13,$twtmp,$twres 1432392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $twtmp,$twtmp 1433392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom paddq @tweak[5],@tweak[5] # psllq 1,$tweak 1434392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout0 1435392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pand $twmask,$twres # isolate carry and residue 1436392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout1 1437392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pcmpgtd @tweak[5],$twtmp # broadcast upper bits 1438392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout2 1439392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $twres,@tweak[5] 1440392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout3 1441392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout4 1442392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout5 1443392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey 16($key),$rndkey1 1444392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1445392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$0x13,$twtmp,$twres 1446392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $twtmp,$twtmp 1447392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[5],@tweak[0] 1448392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom paddq @tweak[5],@tweak[5] # psllq 1,$tweak 1449392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey0,$inout0 1450392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pand $twmask,$twres # isolate carry and residue 1451392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey0,$inout1 1452392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pcmpgtd @tweak[5],$twtmp # broadcat upper bits 1453392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey0,$inout2 1454392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $twres,@tweak[5] 1455392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey0,$inout3 1456392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey0,$inout4 1457392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey0,$inout5 1458392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey 32($key),$rndkey0 1459392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1460392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$0x13,$twtmp,$twres 1461392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $twtmp,$twtmp 1462392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[5],@tweak[1] 1463392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom paddq @tweak[5],@tweak[5] # psllq 1,$tweak 1464392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout0 1465392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pand $twmask,$twres # isolate carry and residue 1466392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout1 1467392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pcmpgtd @tweak[5],$twtmp # broadcat upper bits 1468392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout2 1469392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $twres,@tweak[5] 1470392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout3 1471392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout4 1472392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenc $rndkey1,$inout5 1473392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1474392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$0x13,$twtmp,$twres 1475392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $twtmp,$twtmp 1476392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[5],@tweak[2] 1477392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom paddq @tweak[5],@tweak[5] # psllq 1,$tweak 1478392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenclast $rndkey0,$inout0 1479392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pand $twmask,$twres # isolate carry and residue 1480392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenclast $rndkey0,$inout1 1481392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pcmpgtd @tweak[5],$twtmp # broadcat upper bits 1482392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenclast $rndkey0,$inout2 1483392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $twres,@tweak[5] 1484392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenclast $rndkey0,$inout3 1485392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenclast $rndkey0,$inout4 1486392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesenclast $rndkey0,$inout5 1487392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1488392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$0x13,$twtmp,$twres 1489392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $twtmp,$twtmp 1490392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[5],@tweak[3] 1491392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom paddq @tweak[5],@tweak[5] # psllq 1,$tweak 1492392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps `16*0`(%rsp),$inout0 # output^=tweak 1493392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pand $twmask,$twres # isolate carry and residue 1494392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps `16*1`(%rsp),$inout1 1495392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pcmpgtd @tweak[5],$twtmp # broadcat upper bits 1496392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $twres,@tweak[5] 1497392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1498392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps `16*2`(%rsp),$inout2 1499392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,`16*0`($out) # write output 1500392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps `16*3`(%rsp),$inout3 1501392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout1,`16*1`($out) 1502392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps `16*4`(%rsp),$inout4 1503392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout2,`16*2`($out) 1504392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps `16*5`(%rsp),$inout5 1505392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout3,`16*3`($out) 1506392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $rnds_,$rounds # restore $rounds 1507392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout4,`16*4`($out) 1508392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout5,`16*5`($out) 1509392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea `16*6`($out),$out 1510392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$16*6,$len 1511392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jnc .Lxts_enc_grandloop 1512392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1513392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 3($rounds,$rounds),$rounds # restore original value 1514392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $key_,$key # restore $key 1515392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $rounds,$rnds_ # backup $rounds 1516392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1517392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lxts_enc_short: 1518392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom add \$16*6,$len 1519392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jz .Lxts_enc_done 1520392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1521392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp \$0x20,$len 1522392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jb .Lxts_enc_one 1523392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom je .Lxts_enc_two 1524392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1525392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp \$0x40,$len 1526392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jb .Lxts_enc_three 1527392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom je .Lxts_enc_four 1528392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1529392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$0x13,$twtmp,$twres 1530392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[5],@tweak[4] 1531392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom paddq @tweak[5],@tweak[5] # psllq 1,$tweak 1532392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu ($inp),$inout0 1533392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pand $twmask,$twres # isolate carry and residue 1534392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 16*1($inp),$inout1 1535392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $twres,@tweak[5] 1536392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1537392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 16*2($inp),$inout2 1538392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor @tweak[0],$inout0 1539392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 16*3($inp),$inout3 1540392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor @tweak[1],$inout1 1541392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 16*4($inp),$inout4 1542392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16*5($inp),$inp 1543392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor @tweak[2],$inout2 1544392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor @tweak[3],$inout3 1545392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor @tweak[4],$inout4 1546392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1547392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _aesni_encrypt6 1548392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1549392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[0],$inout0 1550392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[5],@tweak[0] 1551392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[1],$inout1 1552392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[2],$inout2 1553392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu $inout0,($out) 1554392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[3],$inout3 1555392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu $inout1,16*1($out) 1556392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[4],$inout4 1557392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu $inout2,16*2($out) 1558392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu $inout3,16*3($out) 1559392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu $inout4,16*4($out) 1560392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16*5($out),$out 1561392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lxts_enc_done 1562392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1563392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 1564392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lxts_enc_one: 1565392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups ($inp),$inout0 1566392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16*1($inp),$inp 1567392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[0],$inout0 1568392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 1569392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &aesni_generate1("enc",$key,$rounds); 1570392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 1571392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[0],$inout0 1572392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[1],@tweak[0] 1573392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) 1574392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16*1($out),$out 1575392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lxts_enc_done 1576392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1577392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 1578392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lxts_enc_two: 1579392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups ($inp),$inout0 1580392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 16($inp),$inout1 1581392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 32($inp),$inp 1582392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[0],$inout0 1583392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[1],$inout1 1584392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1585392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _aesni_encrypt3 1586392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1587392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[0],$inout0 1588392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[2],@tweak[0] 1589392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[1],$inout1 1590392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) 1591392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout1,16*1($out) 1592392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16*2($out),$out 1593392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lxts_enc_done 1594392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1595392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 1596392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lxts_enc_three: 1597392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups ($inp),$inout0 1598392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 16*1($inp),$inout1 1599392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 16*2($inp),$inout2 1600392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16*3($inp),$inp 1601392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[0],$inout0 1602392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[1],$inout1 1603392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[2],$inout2 1604392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1605392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _aesni_encrypt3 1606392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1607392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[0],$inout0 1608392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[3],@tweak[0] 1609392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[1],$inout1 1610392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[2],$inout2 1611392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) 1612392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout1,16*1($out) 1613392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout2,16*2($out) 1614392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16*3($out),$out 1615392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lxts_enc_done 1616392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1617392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 1618392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lxts_enc_four: 1619392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups ($inp),$inout0 1620392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 16*1($inp),$inout1 1621392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 16*2($inp),$inout2 1622392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[0],$inout0 1623392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 16*3($inp),$inout3 1624392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16*4($inp),$inp 1625392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[1],$inout1 1626392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[2],$inout2 1627392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[3],$inout3 1628392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1629392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _aesni_encrypt4 1630392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1631392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[0],$inout0 1632392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[5],@tweak[0] 1633392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[1],$inout1 1634392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[2],$inout2 1635392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) 1636392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[3],$inout3 1637392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout1,16*1($out) 1638392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout2,16*2($out) 1639392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout3,16*3($out) 1640392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16*4($out),$out 1641392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lxts_enc_done 1642392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1643392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 1644392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lxts_enc_done: 1645392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom and \$15,$len_ 1646392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jz .Lxts_enc_ret 1647392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $len_,$len 1648392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1649392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lxts_enc_steal: 1650392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movzb ($inp),%eax # borrow $rounds ... 1651392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movzb -16($out),%ecx # ... and $key 1652392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 1($inp),$inp 1653392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov %al,-16($out) 1654392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov %cl,0($out) 1655392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 1($out),$out 1656392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$1,$len 1657392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jnz .Lxts_enc_steal 1658392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1659392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub $len_,$out # rewind $out 1660392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $key_,$key # restore $key 1661392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $rnds_,$rounds # restore $rounds 1662392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1663392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups -16($out),$inout0 1664392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[0],$inout0 1665392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 1666392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &aesni_generate1("enc",$key,$rounds); 1667392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 1668392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[0],$inout0 1669392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,-16($out) 1670392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1671392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lxts_enc_ret: 1672392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 1673392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($win64); 1674392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x60(%rsp),%xmm6 1675392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x70(%rsp),%xmm7 1676392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x80(%rsp),%xmm8 1677392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x90(%rsp),%xmm9 1678392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0xa0(%rsp),%xmm10 1679392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0xb0(%rsp),%xmm11 1680392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0xc0(%rsp),%xmm12 1681392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0xd0(%rsp),%xmm13 1682392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0xe0(%rsp),%xmm14 1683392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0xf0(%rsp),%xmm15 1684392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 1685392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 1686392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea $frame_size(%rsp),%rsp 1687392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lxts_enc_epilogue: 1688392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 1689392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size aesni_xts_encrypt,.-aesni_xts_encrypt 1690392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 1691392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1692392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 1693392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.globl aesni_xts_decrypt 1694392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type aesni_xts_decrypt,\@function,6 1695392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 1696392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromaesni_xts_decrypt: 1697392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea -$frame_size(%rsp),%rsp 1698392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 1699392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($win64); 1700392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm6,0x60(%rsp) 1701392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm7,0x70(%rsp) 1702392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm8,0x80(%rsp) 1703392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm9,0x90(%rsp) 1704392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm10,0xa0(%rsp) 1705392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm11,0xb0(%rsp) 1706392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm12,0xc0(%rsp) 1707392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm13,0xd0(%rsp) 1708392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm14,0xe0(%rsp) 1709392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm15,0xf0(%rsp) 1710392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lxts_dec_body: 1711392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 1712392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 1713392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups ($ivp),@tweak[5] # load clear-text tweak 1714392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 240($key2),$rounds # key2->rounds 1715392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 240($key),$rnds_ # key1->rounds 1716392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 1717392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # generate the tweak 1718392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &aesni_generate1("enc",$key2,$rounds,@tweak[5]); 1719392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 1720392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xor %eax,%eax # if ($len%16) len-=16; 1721392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom test \$15,$len 1722392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom setnz %al 1723392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shl \$4,%rax 1724392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub %rax,$len 1725392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1726392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $key,$key_ # backup $key 1727392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $rnds_,$rounds # backup $rounds 1728392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $len,$len_ # backup $len 1729392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom and \$-16,$len 1730392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1731392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa .Lxts_magic(%rip),$twmask 1732392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $twtmp,$twtmp 1733392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pcmpgtd @tweak[5],$twtmp # broadcast upper bits 1734392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 1735392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom for ($i=0;$i<4;$i++) { 1736392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $code.=<<___; 1737392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$0x13,$twtmp,$twres 1738392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $twtmp,$twtmp 1739392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[5],@tweak[$i] 1740392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom paddq @tweak[5],@tweak[5] # psllq 1,$tweak 1741392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pand $twmask,$twres # isolate carry and residue 1742392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pcmpgtd @tweak[5],$twtmp # broadcat upper bits 1743392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $twres,@tweak[5] 1744392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 1745392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom } 1746392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 1747392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$16*6,$len 1748392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jc .Lxts_dec_short 1749392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1750392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shr \$1,$rounds 1751392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$1,$rounds 1752392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $rounds,$rnds_ 1753392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lxts_dec_grandloop 1754392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1755392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 1756392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lxts_dec_grandloop: 1757392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$0x13,$twtmp,$twres 1758392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[5],@tweak[4] 1759392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom paddq @tweak[5],@tweak[5] # psllq 1,$tweak 1760392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu `16*0`($inp),$inout0 # load input 1761392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pand $twmask,$twres # isolate carry and residue 1762392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu `16*1`($inp),$inout1 1763392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $twres,@tweak[5] 1764392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1765392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu `16*2`($inp),$inout2 1766392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor @tweak[0],$inout0 # input^=tweak 1767392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu `16*3`($inp),$inout3 1768392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor @tweak[1],$inout1 1769392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu `16*4`($inp),$inout4 1770392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor @tweak[2],$inout2 1771392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu `16*5`($inp),$inout5 1772392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea `16*6`($inp),$inp 1773392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor @tweak[3],$inout3 1774392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey ($key_),$rndkey0 1775392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor @tweak[4],$inout4 1776392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor @tweak[5],$inout5 1777392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1778392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # inline _aesni_decrypt6 and interleave first and last rounds 1779392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # with own code... 1780392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey 16($key_),$rndkey1 1781392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $rndkey0,$inout0 1782392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $rndkey0,$inout1 1783392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[0],`16*0`(%rsp) # put aside tweaks 1784392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey1,$inout0 1785392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 32($key_),$key 1786392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $rndkey0,$inout2 1787392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[1],`16*1`(%rsp) 1788392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey1,$inout1 1789392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $rndkey0,$inout3 1790392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[2],`16*2`(%rsp) 1791392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey1,$inout2 1792392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $rndkey0,$inout4 1793392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[3],`16*3`(%rsp) 1794392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey1,$inout3 1795392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $rndkey0,$inout5 1796392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey ($key),$rndkey0 1797392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom dec $rounds 1798392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[4],`16*4`(%rsp) 1799392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey1,$inout4 1800392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[5],`16*5`(%rsp) 1801392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey1,$inout5 1802392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $twtmp,$twtmp 1803392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pcmpgtd @tweak[5],$twtmp 1804392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lxts_dec_loop6_enter 1805392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1806392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 1807392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lxts_dec_loop6: 1808392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey1,$inout0 1809392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey1,$inout1 1810392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom dec $rounds 1811392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey1,$inout2 1812392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey1,$inout3 1813392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey1,$inout4 1814392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey1,$inout5 1815392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lxts_dec_loop6_enter: 1816392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey 16($key),$rndkey1 1817392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey0,$inout0 1818392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey0,$inout1 1819392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 32($key),$key 1820392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey0,$inout2 1821392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey0,$inout3 1822392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey0,$inout4 1823392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey0,$inout5 1824392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey ($key),$rndkey0 1825392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jnz .Lxts_dec_loop6 1826392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1827392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$0x13,$twtmp,$twres 1828392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $twtmp,$twtmp 1829392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom paddq @tweak[5],@tweak[5] # psllq 1,$tweak 1830392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey1,$inout0 1831392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pand $twmask,$twres # isolate carry and residue 1832392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey1,$inout1 1833392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pcmpgtd @tweak[5],$twtmp # broadcast upper bits 1834392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey1,$inout2 1835392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $twres,@tweak[5] 1836392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey1,$inout3 1837392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey1,$inout4 1838392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey1,$inout5 1839392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey 16($key),$rndkey1 1840392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1841392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$0x13,$twtmp,$twres 1842392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $twtmp,$twtmp 1843392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[5],@tweak[0] 1844392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom paddq @tweak[5],@tweak[5] # psllq 1,$tweak 1845392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey0,$inout0 1846392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pand $twmask,$twres # isolate carry and residue 1847392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey0,$inout1 1848392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pcmpgtd @tweak[5],$twtmp # broadcat upper bits 1849392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey0,$inout2 1850392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $twres,@tweak[5] 1851392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey0,$inout3 1852392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey0,$inout4 1853392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey0,$inout5 1854392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey 32($key),$rndkey0 1855392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1856392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$0x13,$twtmp,$twres 1857392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $twtmp,$twtmp 1858392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[5],@tweak[1] 1859392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom paddq @tweak[5],@tweak[5] # psllq 1,$tweak 1860392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey1,$inout0 1861392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pand $twmask,$twres # isolate carry and residue 1862392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey1,$inout1 1863392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pcmpgtd @tweak[5],$twtmp # broadcat upper bits 1864392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey1,$inout2 1865392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $twres,@tweak[5] 1866392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey1,$inout3 1867392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey1,$inout4 1868392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey1,$inout5 1869392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1870392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$0x13,$twtmp,$twres 1871392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $twtmp,$twtmp 1872392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[5],@tweak[2] 1873392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom paddq @tweak[5],@tweak[5] # psllq 1,$tweak 1874392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdeclast $rndkey0,$inout0 1875392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pand $twmask,$twres # isolate carry and residue 1876392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdeclast $rndkey0,$inout1 1877392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pcmpgtd @tweak[5],$twtmp # broadcat upper bits 1878392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdeclast $rndkey0,$inout2 1879392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $twres,@tweak[5] 1880392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdeclast $rndkey0,$inout3 1881392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdeclast $rndkey0,$inout4 1882392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdeclast $rndkey0,$inout5 1883392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1884392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$0x13,$twtmp,$twres 1885392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $twtmp,$twtmp 1886392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[5],@tweak[3] 1887392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom paddq @tweak[5],@tweak[5] # psllq 1,$tweak 1888392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps `16*0`(%rsp),$inout0 # output^=tweak 1889392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pand $twmask,$twres # isolate carry and residue 1890392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps `16*1`(%rsp),$inout1 1891392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pcmpgtd @tweak[5],$twtmp # broadcat upper bits 1892392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $twres,@tweak[5] 1893392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1894392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps `16*2`(%rsp),$inout2 1895392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,`16*0`($out) # write output 1896392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps `16*3`(%rsp),$inout3 1897392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout1,`16*1`($out) 1898392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps `16*4`(%rsp),$inout4 1899392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout2,`16*2`($out) 1900392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps `16*5`(%rsp),$inout5 1901392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout3,`16*3`($out) 1902392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $rnds_,$rounds # restore $rounds 1903392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout4,`16*4`($out) 1904392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout5,`16*5`($out) 1905392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea `16*6`($out),$out 1906392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$16*6,$len 1907392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jnc .Lxts_dec_grandloop 1908392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1909392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 3($rounds,$rounds),$rounds # restore original value 1910392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $key_,$key # restore $key 1911392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $rounds,$rnds_ # backup $rounds 1912392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1913392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lxts_dec_short: 1914392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom add \$16*6,$len 1915392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jz .Lxts_dec_done 1916392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1917392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp \$0x20,$len 1918392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jb .Lxts_dec_one 1919392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom je .Lxts_dec_two 1920392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1921392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp \$0x40,$len 1922392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jb .Lxts_dec_three 1923392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom je .Lxts_dec_four 1924392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1925392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$0x13,$twtmp,$twres 1926392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[5],@tweak[4] 1927392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom paddq @tweak[5],@tweak[5] # psllq 1,$tweak 1928392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu ($inp),$inout0 1929392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pand $twmask,$twres # isolate carry and residue 1930392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 16*1($inp),$inout1 1931392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $twres,@tweak[5] 1932392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1933392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 16*2($inp),$inout2 1934392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor @tweak[0],$inout0 1935392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 16*3($inp),$inout3 1936392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor @tweak[1],$inout1 1937392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 16*4($inp),$inout4 1938392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16*5($inp),$inp 1939392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor @tweak[2],$inout2 1940392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor @tweak[3],$inout3 1941392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor @tweak[4],$inout4 1942392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1943392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _aesni_decrypt6 1944392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1945392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[0],$inout0 1946392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[1],$inout1 1947392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[2],$inout2 1948392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu $inout0,($out) 1949392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[3],$inout3 1950392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu $inout1,16*1($out) 1951392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[4],$inout4 1952392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu $inout2,16*2($out) 1953392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $twtmp,$twtmp 1954392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu $inout3,16*3($out) 1955392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pcmpgtd @tweak[5],$twtmp 1956392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu $inout4,16*4($out) 1957392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16*5($out),$out 1958392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$0x13,$twtmp,@tweak[1] # $twres 1959392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom and \$15,$len_ 1960392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jz .Lxts_dec_ret 1961392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1962392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[5],@tweak[0] 1963392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom paddq @tweak[5],@tweak[5] # psllq 1,$tweak 1964392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pand $twmask,@tweak[1] # isolate carry and residue 1965392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor @tweak[5],@tweak[1] 1966392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lxts_dec_done2 1967392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1968392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 1969392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lxts_dec_one: 1970392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups ($inp),$inout0 1971392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16*1($inp),$inp 1972392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[0],$inout0 1973392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 1974392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &aesni_generate1("dec",$key,$rounds); 1975392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 1976392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[0],$inout0 1977392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[1],@tweak[0] 1978392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) 1979392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[2],@tweak[1] 1980392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16*1($out),$out 1981392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lxts_dec_done 1982392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1983392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 1984392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lxts_dec_two: 1985392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups ($inp),$inout0 1986392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 16($inp),$inout1 1987392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 32($inp),$inp 1988392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[0],$inout0 1989392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[1],$inout1 1990392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1991392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _aesni_decrypt3 1992392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1993392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[0],$inout0 1994392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[2],@tweak[0] 1995392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[1],$inout1 1996392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[3],@tweak[1] 1997392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) 1998392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout1,16*1($out) 1999392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16*2($out),$out 2000392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lxts_dec_done 2001392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2002392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 2003392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lxts_dec_three: 2004392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups ($inp),$inout0 2005392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 16*1($inp),$inout1 2006392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 16*2($inp),$inout2 2007392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16*3($inp),$inp 2008392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[0],$inout0 2009392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[1],$inout1 2010392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[2],$inout2 2011392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2012392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _aesni_decrypt3 2013392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2014392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[0],$inout0 2015392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[3],@tweak[0] 2016392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[1],$inout1 2017392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[5],@tweak[1] 2018392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[2],$inout2 2019392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) 2020392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout1,16*1($out) 2021392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout2,16*2($out) 2022392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16*3($out),$out 2023392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lxts_dec_done 2024392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2025392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 2026392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lxts_dec_four: 2027392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$0x13,$twtmp,$twres 2028392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[5],@tweak[4] 2029392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom paddq @tweak[5],@tweak[5] # psllq 1,$tweak 2030392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups ($inp),$inout0 2031392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pand $twmask,$twres # isolate carry and residue 2032392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 16*1($inp),$inout1 2033392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $twres,@tweak[5] 2034392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2035392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 16*2($inp),$inout2 2036392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[0],$inout0 2037392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 16*3($inp),$inout3 2038392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16*4($inp),$inp 2039392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[1],$inout1 2040392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[2],$inout2 2041392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[3],$inout3 2042392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2043392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _aesni_decrypt4 2044392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2045392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[0],$inout0 2046392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[4],@tweak[0] 2047392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[1],$inout1 2048392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa @tweak[5],@tweak[1] 2049392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[2],$inout2 2050392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) 2051392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[3],$inout3 2052392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout1,16*1($out) 2053392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout2,16*2($out) 2054392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout3,16*3($out) 2055392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16*4($out),$out 2056392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lxts_dec_done 2057392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2058392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 2059392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lxts_dec_done: 2060392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom and \$15,$len_ 2061392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jz .Lxts_dec_ret 2062392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lxts_dec_done2: 2063392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $len_,$len 2064392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $key_,$key # restore $key 2065392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $rnds_,$rounds # restore $rounds 2066392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2067392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups ($inp),$inout0 2068392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[1],$inout0 2069392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 2070392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &aesni_generate1("dec",$key,$rounds); 2071392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 2072392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[1],$inout0 2073392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) 2074392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2075392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lxts_dec_steal: 2076392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movzb 16($inp),%eax # borrow $rounds ... 2077392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movzb ($out),%ecx # ... and $key 2078392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 1($inp),$inp 2079392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov %al,($out) 2080392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov %cl,16($out) 2081392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 1($out),$out 2082392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$1,$len 2083392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jnz .Lxts_dec_steal 2084392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2085392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub $len_,$out # rewind $out 2086392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $key_,$key # restore $key 2087392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $rnds_,$rounds # restore $rounds 2088392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2089392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups ($out),$inout0 2090392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[0],$inout0 2091392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 2092392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &aesni_generate1("dec",$key,$rounds); 2093392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 2094392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps @tweak[0],$inout0 2095392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) 2096392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2097392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lxts_dec_ret: 2098392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 2099392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($win64); 2100392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x60(%rsp),%xmm6 2101392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x70(%rsp),%xmm7 2102392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x80(%rsp),%xmm8 2103392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x90(%rsp),%xmm9 2104392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0xa0(%rsp),%xmm10 2105392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0xb0(%rsp),%xmm11 2106392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0xc0(%rsp),%xmm12 2107392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0xd0(%rsp),%xmm13 2108392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0xe0(%rsp),%xmm14 2109392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0xf0(%rsp),%xmm15 2110392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 2111392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 2112392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea $frame_size(%rsp),%rsp 2113392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lxts_dec_epilogue: 2114392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 2115392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size aesni_xts_decrypt,.-aesni_xts_decrypt 2116392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 2117392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom} }} 2118392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2119392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom######################################################################## 2120392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# void $PREFIX_cbc_encrypt (const void *inp, void *out, 2121392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# size_t length, const AES_KEY *key, 2122392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# unsigned char *ivp,const int enc); 2123392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom{ 2124392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy $reserved = $win64?0x40:-0x18; # used in decrypt 2125392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 2126392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.globl ${PREFIX}_cbc_encrypt 2127392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type ${PREFIX}_cbc_encrypt,\@function,6 2128392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 2129392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom${PREFIX}_cbc_encrypt: 2130392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom test $len,$len # check length 2131392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jz .Lcbc_ret 2132392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2133392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 240($key),$rnds_ # key->rounds 2134392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $key,$key_ # backup $key 2135392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom test %r9d,%r9d # 6th argument 2136392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jz .Lcbc_decrypt 2137392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#--------------------------- CBC ENCRYPT ------------------------------# 2138392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups ($ivp),$inout0 # load iv as initial state 2139392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $rnds_,$rounds 2140392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp \$16,$len 2141392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jb .Lcbc_enc_tail 2142392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$16,$len 2143392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lcbc_enc_loop 2144392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 2145392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lcbc_enc_loop: 2146392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups ($inp),$inout1 # load input 2147392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16($inp),$inp 2148392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom #xorps $inout1,$inout0 2149392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 2150392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &aesni_generate1("enc",$key,$rounds,$inout0,$inout1); 2151392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 2152392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $rnds_,$rounds # restore $rounds 2153392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $key_,$key # restore $key 2154392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,0($out) # store output 2155392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16($out),$out 2156392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$16,$len 2157392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jnc .Lcbc_enc_loop 2158392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom add \$16,$len 2159392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jnz .Lcbc_enc_tail 2160392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($ivp) 2161392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lcbc_ret 2162392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2163392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lcbc_enc_tail: 2164392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $len,%rcx # zaps $key 2165392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xchg $inp,$out # $inp is %rsi and $out is %rdi now 2166392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .long 0x9066A4F3 # rep movsb 2167392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov \$16,%ecx # zero tail 2168392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub $len,%rcx 2169392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xor %eax,%eax 2170392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .long 0x9066AAF3 # rep stosb 2171392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea -16(%rdi),%rdi # rewind $out by 1 block 2172392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $rnds_,$rounds # restore $rounds 2173392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov %rdi,%rsi # $inp and $out are the same 2174392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $key_,$key # restore $key 2175392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xor $len,$len # len=16 2176392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lcbc_enc_loop # one more spin 2177392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#--------------------------- CBC DECRYPT ------------------------------# 2178392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 2179392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lcbc_decrypt: 2180392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 2181392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($win64); 2182392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea -0x58(%rsp),%rsp 2183392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm6,(%rsp) 2184392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm7,0x10(%rsp) 2185392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm8,0x20(%rsp) 2186392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm9,0x30(%rsp) 2187392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lcbc_decrypt_body: 2188392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 2189392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 2190392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups ($ivp),$iv 2191392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $rnds_,$rounds 2192392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp \$0x70,$len 2193392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jbe .Lcbc_dec_tail 2194392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shr \$1,$rnds_ 2195392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$0x70,$len 2196392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $rnds_,$rounds 2197392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps $iv,$reserved(%rsp) 2198392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lcbc_dec_loop8_enter 2199392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 2200392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lcbc_dec_loop8: 2201392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps $rndkey0,$reserved(%rsp) # save IV 2202392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout7,($out) 2203392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0x10($out),$out 2204392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lcbc_dec_loop8_enter: 2205392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey ($key),$rndkey0 2206392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups ($inp),$inout0 # load input 2207392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x10($inp),$inout1 2208392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey 16($key),$rndkey1 2209392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2210392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 32($key),$key 2211392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 0x20($inp),$inout2 2212392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey0,$inout0 2213392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 0x30($inp),$inout3 2214392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey0,$inout1 2215392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 0x40($inp),$inout4 2216392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey1,$inout0 2217392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $rndkey0,$inout2 2218392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 0x50($inp),$inout5 2219392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey1,$inout1 2220392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $rndkey0,$inout3 2221392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 0x60($inp),$inout6 2222392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey1,$inout2 2223392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $rndkey0,$inout4 2224392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 0x70($inp),$inout7 2225392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey1,$inout3 2226392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $rndkey0,$inout5 2227392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom dec $rounds 2228392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey1,$inout4 2229392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $rndkey0,$inout6 2230392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey1,$inout5 2231392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor $rndkey0,$inout7 2232392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey ($key),$rndkey0 2233392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey1,$inout6 2234392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesdec $rndkey1,$inout7 2235392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey 16($key),$rndkey1 2236392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2237392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call .Ldec_loop8_enter 2238392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2239392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups ($inp),$rndkey1 # re-load input 2240392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x10($inp),$rndkey0 2241392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $reserved(%rsp),$inout0 # ^= IV 2242392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey1,$inout1 2243392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x20($inp),$rndkey1 2244392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey0,$inout2 2245392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x30($inp),$rndkey0 2246392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey1,$inout3 2247392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x40($inp),$rndkey1 2248392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey0,$inout4 2249392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x50($inp),$rndkey0 2250392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey1,$inout5 2251392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x60($inp),$rndkey1 2252392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey0,$inout6 2253392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x70($inp),$rndkey0 # IV 2254392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey1,$inout7 2255392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) 2256392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout1,0x10($out) 2257392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout2,0x20($out) 2258392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout3,0x30($out) 2259392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $rnds_,$rounds # restore $rounds 2260392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout4,0x40($out) 2261392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $key_,$key # restore $key 2262392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout5,0x50($out) 2263392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0x80($inp),$inp 2264392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout6,0x60($out) 2265392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0x70($out),$out 2266392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$0x80,$len 2267392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ja .Lcbc_dec_loop8 2268392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2269392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps $inout7,$inout0 2270392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps $rndkey0,$iv 2271392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom add \$0x70,$len 2272392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jle .Lcbc_dec_tail_collected 2273392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) 2274392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 1($rnds_,$rnds_),$rounds 2275392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0x10($out),$out 2276392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lcbc_dec_tail: 2277392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups ($inp),$inout0 2278392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps $inout0,$in0 2279392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp \$0x10,$len 2280392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jbe .Lcbc_dec_one 2281392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2282392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x10($inp),$inout1 2283392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps $inout1,$in1 2284392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp \$0x20,$len 2285392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jbe .Lcbc_dec_two 2286392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2287392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x20($inp),$inout2 2288392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps $inout2,$in2 2289392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp \$0x30,$len 2290392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jbe .Lcbc_dec_three 2291392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2292392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x30($inp),$inout3 2293392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp \$0x40,$len 2294392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jbe .Lcbc_dec_four 2295392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2296392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x40($inp),$inout4 2297392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp \$0x50,$len 2298392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jbe .Lcbc_dec_five 2299392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2300392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x50($inp),$inout5 2301392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp \$0x60,$len 2302392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jbe .Lcbc_dec_six 2303392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2304392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x60($inp),$inout6 2305392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps $iv,$reserved(%rsp) # save IV 2306392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _aesni_decrypt8 2307392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups ($inp),$rndkey1 2308392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x10($inp),$rndkey0 2309392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $reserved(%rsp),$inout0 # ^= IV 2310392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey1,$inout1 2311392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x20($inp),$rndkey1 2312392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey0,$inout2 2313392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x30($inp),$rndkey0 2314392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey1,$inout3 2315392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x40($inp),$rndkey1 2316392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey0,$inout4 2317392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x50($inp),$rndkey0 2318392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey1,$inout5 2319392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x60($inp),$iv # IV 2320392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey0,$inout6 2321392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) 2322392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout1,0x10($out) 2323392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout2,0x20($out) 2324392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout3,0x30($out) 2325392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout4,0x40($out) 2326392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout5,0x50($out) 2327392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0x60($out),$out 2328392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps $inout6,$inout0 2329392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$0x70,$len 2330392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lcbc_dec_tail_collected 2331392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 2332392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lcbc_dec_one: 2333392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 2334392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom &aesni_generate1("dec",$key,$rounds); 2335392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 2336392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $iv,$inout0 2337392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps $in0,$iv 2338392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$0x10,$len 2339392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lcbc_dec_tail_collected 2340392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 2341392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lcbc_dec_two: 2342392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $inout2,$inout2 2343392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _aesni_decrypt3 2344392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $iv,$inout0 2345392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $in0,$inout1 2346392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) 2347392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps $in1,$iv 2348392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps $inout1,$inout0 2349392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0x10($out),$out 2350392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$0x20,$len 2351392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lcbc_dec_tail_collected 2352392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 2353392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lcbc_dec_three: 2354392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _aesni_decrypt3 2355392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $iv,$inout0 2356392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $in0,$inout1 2357392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) 2358392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $in1,$inout2 2359392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout1,0x10($out) 2360392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps $in2,$iv 2361392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps $inout2,$inout0 2362392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0x20($out),$out 2363392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$0x30,$len 2364392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lcbc_dec_tail_collected 2365392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 2366392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lcbc_dec_four: 2367392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _aesni_decrypt4 2368392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $iv,$inout0 2369392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x30($inp),$iv 2370392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $in0,$inout1 2371392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) 2372392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $in1,$inout2 2373392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout1,0x10($out) 2374392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $in2,$inout3 2375392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout2,0x20($out) 2376392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps $inout3,$inout0 2377392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0x30($out),$out 2378392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$0x40,$len 2379392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lcbc_dec_tail_collected 2380392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 2381392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lcbc_dec_five: 2382392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $inout5,$inout5 2383392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _aesni_decrypt6 2384392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x10($inp),$rndkey1 2385392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x20($inp),$rndkey0 2386392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $iv,$inout0 2387392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $in0,$inout1 2388392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey1,$inout2 2389392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x30($inp),$rndkey1 2390392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey0,$inout3 2391392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x40($inp),$iv 2392392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey1,$inout4 2393392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) 2394392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout1,0x10($out) 2395392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout2,0x20($out) 2396392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout3,0x30($out) 2397392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0x40($out),$out 2398392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps $inout4,$inout0 2399392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$0x50,$len 2400392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lcbc_dec_tail_collected 2401392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 2402392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lcbc_dec_six: 2403392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _aesni_decrypt6 2404392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x10($inp),$rndkey1 2405392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x20($inp),$rndkey0 2406392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $iv,$inout0 2407392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $in0,$inout1 2408392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey1,$inout2 2409392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x30($inp),$rndkey1 2410392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey0,$inout3 2411392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x40($inp),$rndkey0 2412392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey1,$inout4 2413392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 0x50($inp),$iv 2414392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps $rndkey0,$inout5 2415392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) 2416392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout1,0x10($out) 2417392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout2,0x20($out) 2418392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout3,0x30($out) 2419392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout4,0x40($out) 2420392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0x50($out),$out 2421392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps $inout5,$inout0 2422392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$0x60,$len 2423392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lcbc_dec_tail_collected 2424392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 2425392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lcbc_dec_tail_collected: 2426392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom and \$15,$len 2427392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $iv,($ivp) 2428392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jnz .Lcbc_dec_tail_partial 2429392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups $inout0,($out) 2430392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lcbc_dec_ret 2431392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 2432392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lcbc_dec_tail_partial: 2433392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps $inout0,$reserved(%rsp) 2434392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov \$16,%rcx 2435392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $out,%rdi 2436392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub $len,%rcx 2437392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea $reserved(%rsp),%rsi 2438392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .long 0x9066A4F3 # rep movsb 2439392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2440392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lcbc_dec_ret: 2441392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 2442392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($win64); 2443392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps (%rsp),%xmm6 2444392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x10(%rsp),%xmm7 2445392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x20(%rsp),%xmm8 2446392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x30(%rsp),%xmm9 2447392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0x58(%rsp),%rsp 2448392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 2449392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 2450392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lcbc_ret: 2451392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 2452392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt 2453392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 2454392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom} 2455392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# int $PREFIX_set_[en|de]crypt_key (const unsigned char *userKey, 2456392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# int bits, AES_KEY *key) 2457392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom{ my ($inp,$bits,$key) = @_4args; 2458392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $bits =~ s/%r/%e/; 2459392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2460392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 2461392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.globl ${PREFIX}_set_decrypt_key 2462392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type ${PREFIX}_set_decrypt_key,\@abi-omnipotent 2463392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 2464392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom${PREFIX}_set_decrypt_key: 2465392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .byte 0x48,0x83,0xEC,0x08 # sub rsp,8 2466392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call __aesni_set_encrypt_key 2467392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shl \$4,$bits # rounds-1 after _aesni_set_encrypt_key 2468392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom test %eax,%eax 2469392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jnz .Ldec_key_ret 2470392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16($key,$bits),$inp # points at the end of key schedule 2471392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2472392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey ($key),%xmm0 # just swap 2473392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey ($inp),%xmm1 2474392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey %xmm0,($inp) 2475392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey %xmm1,($key) 2476392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16($key),$key 2477392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea -16($inp),$inp 2478392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2479392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Ldec_key_inverse: 2480392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey ($key),%xmm0 # swap and inverse 2481392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey ($inp),%xmm1 2482392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesimc %xmm0,%xmm0 2483392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesimc %xmm1,%xmm1 2484392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16($key),$key 2485392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea -16($inp),$inp 2486392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey %xmm0,16($inp) 2487392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey %xmm1,-16($key) 2488392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp $key,$inp 2489392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ja .Ldec_key_inverse 2490392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2491392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey ($key),%xmm0 # inverse middle 2492392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aesimc %xmm0,%xmm0 2493392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey %xmm0,($inp) 2494392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Ldec_key_ret: 2495392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom add \$8,%rsp 2496392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 2497392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.LSEH_end_set_decrypt_key: 2498392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size ${PREFIX}_set_decrypt_key,.-${PREFIX}_set_decrypt_key 2499392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 2500392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2501392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# This is based on submission by 2502392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 2503392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Huang Ying <ying.huang@intel.com> 2504392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Vinodh Gopal <vinodh.gopal@intel.com> 2505392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Kahraman Akdemir 2506392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 2507392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Agressively optimized in respect to aeskeygenassist's critical path 2508392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# and is contained in %xmm0-5 to meet Win64 ABI requirement. 2509392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 2510392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 2511392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.globl ${PREFIX}_set_encrypt_key 2512392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type ${PREFIX}_set_encrypt_key,\@abi-omnipotent 2513392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 2514392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom${PREFIX}_set_encrypt_key: 2515392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom__aesni_set_encrypt_key: 2516392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .byte 0x48,0x83,0xEC,0x08 # sub rsp,8 2517392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov \$-1,%rax 2518392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom test $inp,$inp 2519392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jz .Lenc_key_ret 2520392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom test $key,$key 2521392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jz .Lenc_key_ret 2522392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2523392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups ($inp),%xmm0 # pull first 128 bits of *userKey 2524392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps %xmm4,%xmm4 # low dword of xmm4 is assumed 0 2525392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16($key),%rax 2526392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp \$256,$bits 2527392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom je .L14rounds 2528392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp \$192,$bits 2529392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom je .L12rounds 2530392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp \$128,$bits 2531392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jne .Lbad_keybits 2532392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2533392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.L10rounds: 2534392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov \$9,$bits # 10 rounds for 128-bit key 2535392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey %xmm0,($key) # round 0 2536392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aeskeygenassist \$0x1,%xmm0,%xmm1 # round 1 2537392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call .Lkey_expansion_128_cold 2538392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aeskeygenassist \$0x2,%xmm0,%xmm1 # round 2 2539392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call .Lkey_expansion_128 2540392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aeskeygenassist \$0x4,%xmm0,%xmm1 # round 3 2541392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call .Lkey_expansion_128 2542392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aeskeygenassist \$0x8,%xmm0,%xmm1 # round 4 2543392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call .Lkey_expansion_128 2544392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aeskeygenassist \$0x10,%xmm0,%xmm1 # round 5 2545392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call .Lkey_expansion_128 2546392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aeskeygenassist \$0x20,%xmm0,%xmm1 # round 6 2547392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call .Lkey_expansion_128 2548392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aeskeygenassist \$0x40,%xmm0,%xmm1 # round 7 2549392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call .Lkey_expansion_128 2550392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aeskeygenassist \$0x80,%xmm0,%xmm1 # round 8 2551392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call .Lkey_expansion_128 2552392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aeskeygenassist \$0x1b,%xmm0,%xmm1 # round 9 2553392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call .Lkey_expansion_128 2554392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aeskeygenassist \$0x36,%xmm0,%xmm1 # round 10 2555392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call .Lkey_expansion_128 2556392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey %xmm0,(%rax) 2557392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $bits,80(%rax) # 240(%rdx) 2558392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xor %eax,%eax 2559392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lenc_key_ret 2560392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2561392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 2562392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.L12rounds: 2563392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movq 16($inp),%xmm2 # remaining 1/3 of *userKey 2564392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov \$11,$bits # 12 rounds for 192 2565392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey %xmm0,($key) # round 0 2566392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aeskeygenassist \$0x1,%xmm2,%xmm1 # round 1,2 2567392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call .Lkey_expansion_192a_cold 2568392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aeskeygenassist \$0x2,%xmm2,%xmm1 # round 2,3 2569392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call .Lkey_expansion_192b 2570392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aeskeygenassist \$0x4,%xmm2,%xmm1 # round 4,5 2571392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call .Lkey_expansion_192a 2572392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aeskeygenassist \$0x8,%xmm2,%xmm1 # round 5,6 2573392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call .Lkey_expansion_192b 2574392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aeskeygenassist \$0x10,%xmm2,%xmm1 # round 7,8 2575392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call .Lkey_expansion_192a 2576392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aeskeygenassist \$0x20,%xmm2,%xmm1 # round 8,9 2577392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call .Lkey_expansion_192b 2578392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aeskeygenassist \$0x40,%xmm2,%xmm1 # round 10,11 2579392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call .Lkey_expansion_192a 2580392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aeskeygenassist \$0x80,%xmm2,%xmm1 # round 11,12 2581392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call .Lkey_expansion_192b 2582392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey %xmm0,(%rax) 2583392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $bits,48(%rax) # 240(%rdx) 2584392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xor %rax, %rax 2585392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lenc_key_ret 2586392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2587392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 2588392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.L14rounds: 2589392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movups 16($inp),%xmm2 # remaning half of *userKey 2590392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov \$13,$bits # 14 rounds for 256 2591392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16(%rax),%rax 2592392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey %xmm0,($key) # round 0 2593392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey %xmm2,16($key) # round 1 2594392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aeskeygenassist \$0x1,%xmm2,%xmm1 # round 2 2595392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call .Lkey_expansion_256a_cold 2596392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aeskeygenassist \$0x1,%xmm0,%xmm1 # round 3 2597392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call .Lkey_expansion_256b 2598392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aeskeygenassist \$0x2,%xmm2,%xmm1 # round 4 2599392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call .Lkey_expansion_256a 2600392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aeskeygenassist \$0x2,%xmm0,%xmm1 # round 5 2601392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call .Lkey_expansion_256b 2602392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aeskeygenassist \$0x4,%xmm2,%xmm1 # round 6 2603392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call .Lkey_expansion_256a 2604392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aeskeygenassist \$0x4,%xmm0,%xmm1 # round 7 2605392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call .Lkey_expansion_256b 2606392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aeskeygenassist \$0x8,%xmm2,%xmm1 # round 8 2607392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call .Lkey_expansion_256a 2608392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aeskeygenassist \$0x8,%xmm0,%xmm1 # round 9 2609392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call .Lkey_expansion_256b 2610392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aeskeygenassist \$0x10,%xmm2,%xmm1 # round 10 2611392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call .Lkey_expansion_256a 2612392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aeskeygenassist \$0x10,%xmm0,%xmm1 # round 11 2613392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call .Lkey_expansion_256b 2614392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aeskeygenassist \$0x20,%xmm2,%xmm1 # round 12 2615392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call .Lkey_expansion_256a 2616392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aeskeygenassist \$0x20,%xmm0,%xmm1 # round 13 2617392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call .Lkey_expansion_256b 2618392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom aeskeygenassist \$0x40,%xmm2,%xmm1 # round 14 2619392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call .Lkey_expansion_256a 2620392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey %xmm0,(%rax) 2621392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $bits,16(%rax) # 240(%rdx) 2622392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xor %rax,%rax 2623392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lenc_key_ret 2624392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2625392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 2626392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lbad_keybits: 2627392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov \$-2,%rax 2628392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lenc_key_ret: 2629392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom add \$8,%rsp 2630392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 2631392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.LSEH_end_set_encrypt_key: 2632392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2633392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 2634392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lkey_expansion_128: 2635392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey %xmm0,(%rax) 2636392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16(%rax),%rax 2637392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lkey_expansion_128_cold: 2638392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shufps \$0b00010000,%xmm0,%xmm4 2639392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps %xmm4, %xmm0 2640392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shufps \$0b10001100,%xmm0,%xmm4 2641392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps %xmm4, %xmm0 2642392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shufps \$0b11111111,%xmm1,%xmm1 # critical path 2643392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps %xmm1,%xmm0 2644392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 2645392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2646392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 2647392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lkey_expansion_192a: 2648392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey %xmm0,(%rax) 2649392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16(%rax),%rax 2650392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lkey_expansion_192a_cold: 2651392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm2, %xmm5 2652392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lkey_expansion_192b_warm: 2653392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shufps \$0b00010000,%xmm0,%xmm4 2654392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm2,%xmm3 2655392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps %xmm4,%xmm0 2656392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shufps \$0b10001100,%xmm0,%xmm4 2657392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pslldq \$4,%xmm3 2658392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps %xmm4,%xmm0 2659392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$0b01010101,%xmm1,%xmm1 # critical path 2660392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm3,%xmm2 2661392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm1,%xmm0 2662392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$0b11111111,%xmm0,%xmm3 2663392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm3,%xmm2 2664392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 2665392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2666392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 2667392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lkey_expansion_192b: 2668392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm0,%xmm3 2669392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shufps \$0b01000100,%xmm0,%xmm5 2670392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey %xmm5,(%rax) 2671392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shufps \$0b01001110,%xmm2,%xmm3 2672392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey %xmm3,16(%rax) 2673392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 32(%rax),%rax 2674392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lkey_expansion_192b_warm 2675392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2676392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 2677392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lkey_expansion_256a: 2678392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey %xmm2,(%rax) 2679392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16(%rax),%rax 2680392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lkey_expansion_256a_cold: 2681392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shufps \$0b00010000,%xmm0,%xmm4 2682392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps %xmm4,%xmm0 2683392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shufps \$0b10001100,%xmm0,%xmm4 2684392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps %xmm4,%xmm0 2685392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shufps \$0b11111111,%xmm1,%xmm1 # critical path 2686392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps %xmm1,%xmm0 2687392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 2688392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2689392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 2690392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lkey_expansion_256b: 2691392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $movkey %xmm0,(%rax) 2692392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16(%rax),%rax 2693392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2694392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shufps \$0b00010000,%xmm2,%xmm4 2695392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps %xmm4,%xmm2 2696392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shufps \$0b10001100,%xmm2,%xmm4 2697392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps %xmm4,%xmm2 2698392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shufps \$0b10101010,%xmm1,%xmm1 # critical path 2699392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xorps %xmm1,%xmm2 2700392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 2701392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size ${PREFIX}_set_encrypt_key,.-${PREFIX}_set_encrypt_key 2702392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key 2703392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 2704392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom} 2705392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2706392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 2707392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 64 2708392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lbswap_mask: 2709392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 2710392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lincrement32: 2711392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .long 6,6,6,0 2712392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lincrement64: 2713392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .long 1,0,0,0 2714392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lxts_magic: 2715392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .long 0x87,0,1,0 2716392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2717392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.asciz "AES for Intel AES-NI, CRYPTOGAMS by <appro\@openssl.org>" 2718392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 64 2719392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 2720392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2721392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, 2722392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# CONTEXT *context,DISPATCHER_CONTEXT *disp) 2723392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromif ($win64) { 2724392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$rec="%rcx"; 2725392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$frame="%rdx"; 2726392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$context="%r8"; 2727392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$disp="%r9"; 2728392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2729392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 2730392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.extern __imp_RtlVirtualUnwind 2731392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 2732392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($PREFIX eq "aesni"); 2733392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type ecb_se_handler,\@abi-omnipotent 2734392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 2735392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromecb_se_handler: 2736392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %rsi 2737392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %rdi 2738392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %rbx 2739392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %rbp 2740392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %r12 2741392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %r13 2742392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %r14 2743392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %r15 2744392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pushfq 2745392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$64,%rsp 2746392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2747392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 152($context),%rax # pull context->Rsp 2748392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2749392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lcommon_seh_tail 2750392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size ecb_se_handler,.-ecb_se_handler 2751392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2752392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type ccm64_se_handler,\@abi-omnipotent 2753392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 2754392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromccm64_se_handler: 2755392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %rsi 2756392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %rdi 2757392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %rbx 2758392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %rbp 2759392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %r12 2760392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %r13 2761392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %r14 2762392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %r15 2763392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pushfq 2764392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$64,%rsp 2765392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2766392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 120($context),%rax # pull context->Rax 2767392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 248($context),%rbx # pull context->Rip 2768392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2769392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 8($disp),%rsi # disp->ImageBase 2770392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 56($disp),%r11 # disp->HandlerData 2771392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2772392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 0(%r11),%r10d # HandlerData[0] 2773392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea (%rsi,%r10),%r10 # prologue label 2774392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp %r10,%rbx # context->Rip<prologue label 2775392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jb .Lcommon_seh_tail 2776392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2777392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 152($context),%rax # pull context->Rsp 2778392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2779392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 4(%r11),%r10d # HandlerData[1] 2780392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea (%rsi,%r10),%r10 # epilogue label 2781392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp %r10,%rbx # context->Rip>=epilogue label 2782392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jae .Lcommon_seh_tail 2783392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2784392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0(%rax),%rsi # %xmm save area 2785392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 512($context),%rdi # &context.Xmm6 2786392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov \$8,%ecx # 4*sizeof(%xmm0)/sizeof(%rax) 2787392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .long 0xa548f3fc # cld; rep movsq 2788392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0x58(%rax),%rax # adjust stack pointer 2789392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2790392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lcommon_seh_tail 2791392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size ccm64_se_handler,.-ccm64_se_handler 2792392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2793392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type ctr32_se_handler,\@abi-omnipotent 2794392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 2795392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromctr32_se_handler: 2796392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %rsi 2797392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %rdi 2798392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %rbx 2799392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %rbp 2800392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %r12 2801392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %r13 2802392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %r14 2803392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %r15 2804392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pushfq 2805392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$64,%rsp 2806392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2807392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 120($context),%rax # pull context->Rax 2808392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 248($context),%rbx # pull context->Rip 2809392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2810392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea .Lctr32_body(%rip),%r10 2811392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp %r10,%rbx # context->Rip<"prologue" label 2812392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jb .Lcommon_seh_tail 2813392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2814392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 152($context),%rax # pull context->Rsp 2815392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2816392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea .Lctr32_ret(%rip),%r10 2817392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp %r10,%rbx 2818392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jae .Lcommon_seh_tail 2819392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2820392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0x20(%rax),%rsi # %xmm save area 2821392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 512($context),%rdi # &context.Xmm6 2822392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax) 2823392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .long 0xa548f3fc # cld; rep movsq 2824392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0xc8(%rax),%rax # adjust stack pointer 2825392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2826392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lcommon_seh_tail 2827392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size ctr32_se_handler,.-ctr32_se_handler 2828392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2829392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type xts_se_handler,\@abi-omnipotent 2830392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 2831392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromxts_se_handler: 2832392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %rsi 2833392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %rdi 2834392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %rbx 2835392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %rbp 2836392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %r12 2837392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %r13 2838392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %r14 2839392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %r15 2840392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pushfq 2841392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$64,%rsp 2842392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2843392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 120($context),%rax # pull context->Rax 2844392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 248($context),%rbx # pull context->Rip 2845392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2846392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 8($disp),%rsi # disp->ImageBase 2847392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 56($disp),%r11 # disp->HandlerData 2848392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2849392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 0(%r11),%r10d # HandlerData[0] 2850392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea (%rsi,%r10),%r10 # prologue lable 2851392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp %r10,%rbx # context->Rip<prologue label 2852392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jb .Lcommon_seh_tail 2853392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2854392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 152($context),%rax # pull context->Rsp 2855392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2856392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 4(%r11),%r10d # HandlerData[1] 2857392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea (%rsi,%r10),%r10 # epilogue label 2858392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp %r10,%rbx # context->Rip>=epilogue label 2859392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jae .Lcommon_seh_tail 2860392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2861392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0x60(%rax),%rsi # %xmm save area 2862392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 512($context),%rdi # & context.Xmm6 2863392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax) 2864392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .long 0xa548f3fc # cld; rep movsq 2865392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0x68+160(%rax),%rax # adjust stack pointer 2866392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2867392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lcommon_seh_tail 2868392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size xts_se_handler,.-xts_se_handler 2869392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 2870392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 2871392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type cbc_se_handler,\@abi-omnipotent 2872392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 2873392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromcbc_se_handler: 2874392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %rsi 2875392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %rdi 2876392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %rbx 2877392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %rbp 2878392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %r12 2879392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %r13 2880392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %r14 2881392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %r15 2882392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pushfq 2883392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$64,%rsp 2884392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2885392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 152($context),%rax # pull context->Rsp 2886392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 248($context),%rbx # pull context->Rip 2887392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2888392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea .Lcbc_decrypt(%rip),%r10 2889392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp %r10,%rbx # context->Rip<"prologue" label 2890392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jb .Lcommon_seh_tail 2891392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2892392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea .Lcbc_decrypt_body(%rip),%r10 2893392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp %r10,%rbx # context->Rip<cbc_decrypt_body 2894392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jb .Lrestore_cbc_rax 2895392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2896392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea .Lcbc_ret(%rip),%r10 2897392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp %r10,%rbx # context->Rip>="epilogue" label 2898392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jae .Lcommon_seh_tail 2899392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2900392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0(%rax),%rsi # top of stack 2901392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 512($context),%rdi # &context.Xmm6 2902392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov \$8,%ecx # 4*sizeof(%xmm0)/sizeof(%rax) 2903392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .long 0xa548f3fc # cld; rep movsq 2904392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0x58(%rax),%rax # adjust stack pointer 2905392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lcommon_seh_tail 2906392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2907392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lrestore_cbc_rax: 2908392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 120($context),%rax 2909392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2910392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lcommon_seh_tail: 2911392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 8(%rax),%rdi 2912392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 16(%rax),%rsi 2913392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov %rax,152($context) # restore context->Rsp 2914392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov %rsi,168($context) # restore context->Rsi 2915392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov %rdi,176($context) # restore context->Rdi 2916392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2917392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 40($disp),%rdi # disp->ContextRecord 2918392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $context,%rsi # context 2919392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov \$154,%ecx # sizeof(CONTEXT) 2920392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .long 0xa548f3fc # cld; rep movsq 2921392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2922392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $disp,%rsi 2923392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER 2924392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 8(%rsi),%rdx # arg2, disp->ImageBase 2925392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 0(%rsi),%r8 # arg3, disp->ControlPc 2926392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 16(%rsi),%r9 # arg4, disp->FunctionEntry 2927392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 40(%rsi),%r10 # disp->ContextRecord 2928392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 56(%rsi),%r11 # &disp->HandlerData 2929392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 24(%rsi),%r12 # &disp->EstablisherFrame 2930392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov %r10,32(%rsp) # arg5 2931392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov %r11,40(%rsp) # arg6 2932392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov %r12,48(%rsp) # arg7 2933392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov %rcx,56(%rsp) # arg8, (NULL) 2934392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call *__imp_RtlVirtualUnwind(%rip) 2935392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2936392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov \$1,%eax # ExceptionContinueSearch 2937392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom add \$64,%rsp 2938392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom popfq 2939392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pop %r15 2940392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pop %r14 2941392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pop %r13 2942392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pop %r12 2943392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pop %rbp 2944392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pop %rbx 2945392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pop %rdi 2946392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pop %rsi 2947392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 2948392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size cbc_se_handler,.-cbc_se_handler 2949392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2950392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.section .pdata 2951392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 4 2952392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 2953392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($PREFIX eq "aesni"); 2954392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_begin_aesni_ecb_encrypt 2955392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_end_aesni_ecb_encrypt 2956392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_info_ecb 2957392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2958392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_begin_aesni_ccm64_encrypt_blocks 2959392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_end_aesni_ccm64_encrypt_blocks 2960392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_info_ccm64_enc 2961392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2962392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_begin_aesni_ccm64_decrypt_blocks 2963392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_end_aesni_ccm64_decrypt_blocks 2964392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_info_ccm64_dec 2965392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2966392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_begin_aesni_ctr32_encrypt_blocks 2967392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_end_aesni_ctr32_encrypt_blocks 2968392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_info_ctr32 2969392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2970392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_begin_aesni_xts_encrypt 2971392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_end_aesni_xts_encrypt 2972392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_info_xts_enc 2973392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2974392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_begin_aesni_xts_decrypt 2975392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_end_aesni_xts_decrypt 2976392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_info_xts_dec 2977392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 2978392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 2979392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_begin_${PREFIX}_cbc_encrypt 2980392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_end_${PREFIX}_cbc_encrypt 2981392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_info_cbc 2982392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2983392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva ${PREFIX}_set_decrypt_key 2984392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_end_set_decrypt_key 2985392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_info_key 2986392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 2987392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva ${PREFIX}_set_encrypt_key 2988392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_end_set_encrypt_key 2989392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_info_key 2990392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.section .xdata 2991392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 8 2992392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 2993392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($PREFIX eq "aesni"); 2994392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.LSEH_info_ecb: 2995392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .byte 9,0,0,0 2996392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva ecb_se_handler 2997392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.LSEH_info_ccm64_enc: 2998392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .byte 9,0,0,0 2999392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva ccm64_se_handler 3000392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .Lccm64_enc_body,.Lccm64_enc_ret # HandlerData[] 3001392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.LSEH_info_ccm64_dec: 3002392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .byte 9,0,0,0 3003392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva ccm64_se_handler 3004392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .Lccm64_dec_body,.Lccm64_dec_ret # HandlerData[] 3005392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.LSEH_info_ctr32: 3006392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .byte 9,0,0,0 3007392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva ctr32_se_handler 3008392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.LSEH_info_xts_enc: 3009392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .byte 9,0,0,0 3010392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva xts_se_handler 3011392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .Lxts_enc_body,.Lxts_enc_epilogue # HandlerData[] 3012392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.LSEH_info_xts_dec: 3013392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .byte 9,0,0,0 3014392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva xts_se_handler 3015392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .Lxts_dec_body,.Lxts_dec_epilogue # HandlerData[] 3016392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 3017392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 3018392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.LSEH_info_cbc: 3019392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .byte 9,0,0,0 3020392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva cbc_se_handler 3021392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.LSEH_info_key: 3022392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .byte 0x01,0x04,0x01,0x00 3023392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .byte 0x04,0x02,0x00,0x00 # sub rsp,8 3024392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 3025392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom} 3026392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 3027392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromsub rex { 3028392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom local *opcode=shift; 3029392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom my ($dst,$src)=@_; 3030392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom my $rex=0; 3031392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 3032392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $rex|=0x04 if($dst>=8); 3033392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom $rex|=0x01 if($src>=8); 3034392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push @opcode,$rex|0x40 if($rex); 3035392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom} 3036392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 3037392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromsub aesni { 3038392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom my $line=shift; 3039392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom my @opcode=(0x66); 3040392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 3041392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom if ($line=~/(aeskeygenassist)\s+\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) { 3042392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom rex(\@opcode,$4,$3); 3043392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push @opcode,0x0f,0x3a,0xdf; 3044392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push @opcode,0xc0|($3&7)|(($4&7)<<3); # ModR/M 3045392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom my $c=$2; 3046392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push @opcode,$c=~/^0/?oct($c):$c; 3047392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom return ".byte\t".join(',',@opcode); 3048392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom } 3049392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom elsif ($line=~/(aes[a-z]+)\s+%xmm([0-9]+),\s*%xmm([0-9]+)/) { 3050392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom my %opcodelet = ( 3051392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom "aesimc" => 0xdb, 3052392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom "aesenc" => 0xdc, "aesenclast" => 0xdd, 3053392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom "aesdec" => 0xde, "aesdeclast" => 0xdf 3054392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ); 3055392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom return undef if (!defined($opcodelet{$1})); 3056392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom rex(\@opcode,$3,$2); 3057392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push @opcode,0x0f,0x38,$opcodelet{$1}; 3058392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M 3059392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom return ".byte\t".join(',',@opcode); 3060392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom } 3061392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom return $line; 3062392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom} 3063392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 3064392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code =~ s/\`([^\`]*)\`/eval($1)/gem; 3065392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code =~ s/\b(aes.*%xmm[0-9]+).*$/aesni($1)/gem; 3066392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 3067392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromprint $code; 3068392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 3069392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromclose STDOUT; 3070