1392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#!/usr/bin/env perl 2392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 3392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom###################################################################### 4392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Constant-time SSSE3 AES core implementation. 5392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## version 0.1 6392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 7392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## By Mike Hamburg (Stanford University), 2009 8392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Public domain. 9392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 10392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## For details see http://shiftleft.org/papers/vector_aes/ and 11392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## http://crypto.stanford.edu/vpaes/. 12392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 13392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom###################################################################### 14392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# September 2011. 15392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 16392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Interface to OpenSSL as "almost" drop-in replacement for 17392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# aes-x86_64.pl. "Almost" refers to the fact that AES_cbc_encrypt 18392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# doesn't handle partial vectors (doesn't have to if called from 19392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# EVP only). "Drop-in" implies that this module doesn't share key 20392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# schedule structure with the original nor does it make assumption 21392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# about its alignment... 22392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 23392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Performance summary. aes-x86_64.pl column lists large-block CBC 24392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# encrypt/decrypt/with-hyper-threading-off(*) results in cycles per 25392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# byte processed with 128-bit key, and vpaes-x86_64.pl column - 26392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# [also large-block CBC] encrypt/decrypt. 27392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 28392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# aes-x86_64.pl vpaes-x86_64.pl 29392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 30392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Core 2(**) 30.5/43.7/14.3 21.8/25.7(***) 31392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Nehalem 30.5/42.2/14.6 9.8/11.8 32392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Atom 63.9/79.0/32.1 64.0/84.8(***) 33392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 34392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# (*) "Hyper-threading" in the context refers rather to cache shared 35392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# among multiple cores, than to specifically Intel HTT. As vast 36392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# majority of contemporary cores share cache, slower code path 37392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# is common place. In other words "with-hyper-threading-off" 38392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# results are presented mostly for reference purposes. 39392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 40392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# (**) "Core 2" refers to initial 65nm design, a.k.a. Conroe. 41392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 42392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# (***) Less impressive improvement on Core 2 and Atom is due to slow 43392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# pshufb, yet it's respectable +40%/78% improvement on Core 2 44392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# (as implied, over "hyper-threading-safe" code path). 45392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 46392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# <appro@openssl.org> 47392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 48392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$flavour = shift; 49392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$output = shift; 50392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromif ($flavour =~ /\./) { $output = $flavour; undef $flavour; } 51392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 52392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); 53392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 54392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 55392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or 56392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or 57392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromdie "can't locate x86_64-xlate.pl"; 58392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 5904ef91b390dfcc6125913e2f2af502d23d7a5112Brian Carlstromopen OUT,"| \"$^X\" $xlate $flavour $output"; 6004ef91b390dfcc6125913e2f2af502d23d7a5112Brian Carlstrom*STDOUT=*OUT; 61392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 62392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$PREFIX="vpaes"; 63392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 64392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 65392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.text 66392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 67392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 68392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## _aes_encrypt_core 69392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 70392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## AES-encrypt %xmm0. 71392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 72392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Inputs: 73392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## %xmm0 = input 74392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## %xmm9-%xmm15 as in _vpaes_preheat 75392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## (%rdx) = scheduled keys 76392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 77392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Output in %xmm0 78392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Clobbers %xmm1-%xmm5, %r9, %r10, %r11, %rax 79392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Preserves %xmm6 - %xmm8 so you get some local vectors 80392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 81392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 82392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type _vpaes_encrypt_core,\@abi-omnipotent 83392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 84392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom_vpaes_encrypt_core: 85392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov %rdx, %r9 86392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov \$16, %r11 87392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 240(%rdx),%eax 88392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm9, %xmm1 89392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa .Lk_ipt(%rip), %xmm2 # iptlo 90392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pandn %xmm0, %xmm1 91392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu (%r9), %xmm5 # round0 key 92392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom psrld \$4, %xmm1 93392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pand %xmm9, %xmm0 94392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm0, %xmm2 95392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa .Lk_ipt+16(%rip), %xmm0 # ipthi 96392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm1, %xmm0 97392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm5, %xmm2 98392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm2, %xmm0 99392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom add \$16, %r9 100392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea .Lk_mc_backward(%rip),%r10 101392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lenc_entry 102392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 103392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 104392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lenc_loop: 105392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # middle of middle round 106392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm13, %xmm4 # 4 : sb1u 107392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm2, %xmm4 # 4 = sb1u 108392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm5, %xmm4 # 4 = sb1u + k 109392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm12, %xmm0 # 0 : sb1t 110392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm3, %xmm0 # 0 = sb1t 111392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm4, %xmm0 # 0 = A 112392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm15, %xmm5 # 4 : sb2u 113392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm2, %xmm5 # 4 = sb2u 114392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa -0x40(%r11,%r10), %xmm1 # .Lk_mc_forward[] 115392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm14, %xmm2 # 2 : sb2t 116392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm3, %xmm2 # 2 = sb2t 117392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm5, %xmm2 # 2 = 2A 118392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa (%r11,%r10), %xmm4 # .Lk_mc_backward[] 119392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm0, %xmm3 # 3 = A 120392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm1, %xmm0 # 0 = B 121392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom add \$16, %r9 # next key 122392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm2, %xmm0 # 0 = 2A+B 123392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm4, %xmm3 # 3 = D 124392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom add \$16, %r11 # next mc 125392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm0, %xmm3 # 3 = 2A+B+D 126392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm1, %xmm0 # 0 = 2B+C 127392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom and \$0x30, %r11 # ... mod 4 128392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm3, %xmm0 # 0 = 2A+3B+C+D 129392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$1,%rax # nr-- 130392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 131392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lenc_entry: 132392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # top of round 133392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm9, %xmm1 # 1 : i 134392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pandn %xmm0, %xmm1 # 1 = i<<4 135392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom psrld \$4, %xmm1 # 1 = i 136392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pand %xmm9, %xmm0 # 0 = k 137392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm11, %xmm5 # 2 : a/k 138392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm0, %xmm5 # 2 = a/k 139392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm1, %xmm0 # 0 = j 140392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm10, %xmm3 # 3 : 1/i 141392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm1, %xmm3 # 3 = 1/i 142392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm5, %xmm3 # 3 = iak = 1/i + a/k 143392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm10, %xmm4 # 4 : 1/j 144392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm0, %xmm4 # 4 = 1/j 145392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm5, %xmm4 # 4 = jak = 1/j + a/k 146392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm10, %xmm2 # 2 : 1/iak 147392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm3, %xmm2 # 2 = 1/iak 148392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm0, %xmm2 # 2 = io 149392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm10, %xmm3 # 3 : 1/jak 150392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu (%r9), %xmm5 151392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm4, %xmm3 # 3 = 1/jak 152392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm1, %xmm3 # 3 = jo 153392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jnz .Lenc_loop 154392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 155392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # middle of last round 156392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa -0x60(%r10), %xmm4 # 3 : sbou .Lk_sbo 157392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa -0x50(%r10), %xmm0 # 0 : sbot .Lk_sbo+16 158392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm2, %xmm4 # 4 = sbou 159392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm5, %xmm4 # 4 = sb1u + k 160392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm3, %xmm0 # 0 = sb1t 161392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa 0x40(%r11,%r10), %xmm1 # .Lk_sr[] 162392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm4, %xmm0 # 0 = A 163392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm1, %xmm0 164392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 165392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size _vpaes_encrypt_core,.-_vpaes_encrypt_core 166392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 167392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 168392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Decryption core 169392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 170392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Same API as encryption core. 171392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 172392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type _vpaes_decrypt_core,\@abi-omnipotent 173392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 174392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom_vpaes_decrypt_core: 175392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov %rdx, %r9 # load key 176392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 240(%rdx),%eax 177392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm9, %xmm1 178392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa .Lk_dipt(%rip), %xmm2 # iptlo 179392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pandn %xmm0, %xmm1 180392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov %rax, %r11 181392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom psrld \$4, %xmm1 182392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu (%r9), %xmm5 # round0 key 183392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shl \$4, %r11 184392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pand %xmm9, %xmm0 185392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm0, %xmm2 186392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa .Lk_dipt+16(%rip), %xmm0 # ipthi 187392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xor \$0x30, %r11 188392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea .Lk_dsbd(%rip),%r10 189392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm1, %xmm0 190392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom and \$0x30, %r11 191392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm5, %xmm2 192392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa .Lk_mc_forward+48(%rip), %xmm5 193392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm2, %xmm0 194392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom add \$16, %r9 195392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom add %r10, %r11 196392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Ldec_entry 197392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 198392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 199392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Ldec_loop: 200392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 201392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Inverse mix columns 202392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 203392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa -0x20(%r10),%xmm4 # 4 : sb9u 204392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm2, %xmm4 # 4 = sb9u 205392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm0, %xmm4 206392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa -0x10(%r10),%xmm0 # 0 : sb9t 207392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm3, %xmm0 # 0 = sb9t 208392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm4, %xmm0 # 0 = ch 209392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom add \$16, %r9 # next round key 210392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 211392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm5, %xmm0 # MC ch 212392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa 0x00(%r10),%xmm4 # 4 : sbdu 213392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm2, %xmm4 # 4 = sbdu 214392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm0, %xmm4 # 4 = ch 215392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa 0x10(%r10),%xmm0 # 0 : sbdt 216392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm3, %xmm0 # 0 = sbdt 217392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm4, %xmm0 # 0 = ch 218392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$1,%rax # nr-- 219392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 220392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm5, %xmm0 # MC ch 221392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa 0x20(%r10),%xmm4 # 4 : sbbu 222392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm2, %xmm4 # 4 = sbbu 223392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm0, %xmm4 # 4 = ch 224392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa 0x30(%r10),%xmm0 # 0 : sbbt 225392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm3, %xmm0 # 0 = sbbt 226392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm4, %xmm0 # 0 = ch 227392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 228392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm5, %xmm0 # MC ch 229392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa 0x40(%r10),%xmm4 # 4 : sbeu 230392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm2, %xmm4 # 4 = sbeu 231392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm0, %xmm4 # 4 = ch 232392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa 0x50(%r10),%xmm0 # 0 : sbet 233392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm3, %xmm0 # 0 = sbet 234392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm4, %xmm0 # 0 = ch 235392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 236392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom palignr \$12, %xmm5, %xmm5 237392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 238392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Ldec_entry: 239392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # top of round 240392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm9, %xmm1 # 1 : i 241392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pandn %xmm0, %xmm1 # 1 = i<<4 242392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom psrld \$4, %xmm1 # 1 = i 243392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pand %xmm9, %xmm0 # 0 = k 244392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm11, %xmm2 # 2 : a/k 245392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm0, %xmm2 # 2 = a/k 246392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm1, %xmm0 # 0 = j 247392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm10, %xmm3 # 3 : 1/i 248392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm1, %xmm3 # 3 = 1/i 249392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm2, %xmm3 # 3 = iak = 1/i + a/k 250392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm10, %xmm4 # 4 : 1/j 251392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm0, %xmm4 # 4 = 1/j 252392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm2, %xmm4 # 4 = jak = 1/j + a/k 253392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm10, %xmm2 # 2 : 1/iak 254392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm3, %xmm2 # 2 = 1/iak 255392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm0, %xmm2 # 2 = io 256392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm10, %xmm3 # 3 : 1/jak 257392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm4, %xmm3 # 3 = 1/jak 258392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm1, %xmm3 # 3 = jo 259392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu (%r9), %xmm0 260392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jnz .Ldec_loop 261392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 262392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # middle of last round 263392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa 0x60(%r10), %xmm4 # 3 : sbou 264392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm2, %xmm4 # 4 = sbou 265392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm0, %xmm4 # 4 = sb1u + k 266392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa 0x70(%r10), %xmm0 # 0 : sbot 267a1a5710c055e139ea00e785f9eb55b3af3e4dab1Brian Carlstrom movdqa -0x160(%r11), %xmm2 # .Lk_sr-.Lk_dsbd=-0x160 268392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm3, %xmm0 # 0 = sb1t 269392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm4, %xmm0 # 0 = A 270392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm2, %xmm0 271392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 272392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size _vpaes_decrypt_core,.-_vpaes_decrypt_core 273392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 274392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom######################################################## 275392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## ## 276392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## AES key schedule ## 277392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## ## 278392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom######################################################## 279392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type _vpaes_schedule_core,\@abi-omnipotent 280392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 281392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom_vpaes_schedule_core: 282392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # rdi = key 283392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # rsi = size in bits 284392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # rdx = buffer 285392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # rcx = direction. 0=encrypt, 1=decrypt 286392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 287392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _vpaes_preheat # load the tables 288392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa .Lk_rcon(%rip), %xmm8 # load rcon 289392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu (%rdi), %xmm0 # load key (unaligned) 290392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 291392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # input transform 292392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm0, %xmm3 293392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea .Lk_ipt(%rip), %r11 294392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _vpaes_schedule_transform 295392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm0, %xmm7 296392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 297392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea .Lk_sr(%rip),%r10 298392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom test %rcx, %rcx 299392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jnz .Lschedule_am_decrypting 300392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 301392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # encrypting, output zeroth round key after transform 302392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu %xmm0, (%rdx) 303392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lschedule_go 304392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 305392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lschedule_am_decrypting: 306392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # decrypting, output zeroth round key after shiftrows 307392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa (%r8,%r10),%xmm1 308392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm1, %xmm3 309392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu %xmm3, (%rdx) 310392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xor \$0x30, %r8 311392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 312392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lschedule_go: 313392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp \$192, %esi 314392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ja .Lschedule_256 315392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom je .Lschedule_192 316392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # 128: fall though 317392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 318392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 319392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## .schedule_128 320392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 321392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 128-bit specific part of key schedule. 322392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 323392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## This schedule is really simple, because all its parts 324392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## are accomplished by the subroutines. 325392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 326392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lschedule_128: 327392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov \$10, %esi 328392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 329392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Loop_schedule_128: 330392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _vpaes_schedule_round 331392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom dec %rsi 332392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jz .Lschedule_mangle_last 333392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _vpaes_schedule_mangle # write output 334392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Loop_schedule_128 335392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 336392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 337392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## .aes_schedule_192 338392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 339392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 192-bit specific part of key schedule. 340392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 341392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## The main body of this schedule is the same as the 128-bit 342392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## schedule, but with more smearing. The long, high side is 343392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## stored in %xmm7 as before, and the short, low side is in 344392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## the high bits of %xmm6. 345392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 346392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## This schedule is somewhat nastier, however, because each 347392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## round produces 192 bits of key material, or 1.5 round keys. 348392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Therefore, on each cycle we do 2 rounds and produce 3 round 349392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## keys. 350392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 351392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 352392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lschedule_192: 353392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 8(%rdi),%xmm0 # load key part 2 (very unaligned) 354392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _vpaes_schedule_transform # input transform 355392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm0, %xmm6 # save short part 356392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm4, %xmm4 # clear 4 357392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movhlps %xmm4, %xmm6 # clobber low side with zeros 358392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov \$4, %esi 359392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 360392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Loop_schedule_192: 361392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _vpaes_schedule_round 362392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom palignr \$8,%xmm6,%xmm0 363392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _vpaes_schedule_mangle # save key n 364392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _vpaes_schedule_192_smear 365392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _vpaes_schedule_mangle # save key n+1 366392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _vpaes_schedule_round 367392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom dec %rsi 368392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jz .Lschedule_mangle_last 369392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _vpaes_schedule_mangle # save key n+2 370392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _vpaes_schedule_192_smear 371392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Loop_schedule_192 372392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 373392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 374392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## .aes_schedule_256 375392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 376392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 256-bit specific part of key schedule. 377392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 378392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## The structure here is very similar to the 128-bit 379392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## schedule, but with an additional "low side" in 380392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## %xmm6. The low side's rounds are the same as the 381392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## high side's, except no rcon and no rotation. 382392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 383392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 384392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lschedule_256: 385392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu 16(%rdi),%xmm0 # load key part 2 (unaligned) 386392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _vpaes_schedule_transform # input transform 387392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov \$7, %esi 388392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 389392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Loop_schedule_256: 390392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _vpaes_schedule_mangle # output low result 391392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm0, %xmm6 # save cur_lo in xmm6 392392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 393392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # high round 394392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _vpaes_schedule_round 395392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom dec %rsi 396392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jz .Lschedule_mangle_last 397392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _vpaes_schedule_mangle 398392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 399392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # low round. swap xmm7 and xmm6 400392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$0xFF, %xmm0, %xmm0 401392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm7, %xmm5 402392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm6, %xmm7 403392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _vpaes_schedule_low_round 404392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm5, %xmm7 405392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 406392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Loop_schedule_256 407392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 408392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 409392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 410392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## .aes_schedule_mangle_last 411392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 412392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Mangler for last round of key schedule 413392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Mangles %xmm0 414392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## when encrypting, outputs out(%xmm0) ^ 63 415392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## when decrypting, outputs unskew(%xmm0) 416392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 417392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Always called right before return... jumps to cleanup and exits 418392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 419392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 420392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lschedule_mangle_last: 421392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # schedule last round key from xmm0 422392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea .Lk_deskew(%rip),%r11 # prepare to deskew 423392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom test %rcx, %rcx 424392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jnz .Lschedule_mangle_last_dec 425392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 426392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # encrypting 427392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa (%r8,%r10),%xmm1 428392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm1, %xmm0 # output permute 429392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea .Lk_opt(%rip), %r11 # prepare to output transform 430392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom add \$32, %rdx 431392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 432392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lschedule_mangle_last_dec: 433392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom add \$-16, %rdx 434392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor .Lk_s63(%rip), %xmm0 435392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _vpaes_schedule_transform # output transform 436392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu %xmm0, (%rdx) # save last key 437392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 438392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # cleanup 439392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm0, %xmm0 440392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm1, %xmm1 441392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm2, %xmm2 442392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm3, %xmm3 443392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm4, %xmm4 444392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm5, %xmm5 445392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm6, %xmm6 446392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm7, %xmm7 447392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 448392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size _vpaes_schedule_core,.-_vpaes_schedule_core 449392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 450392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 451392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## .aes_schedule_192_smear 452392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 453392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Smear the short, low side in the 192-bit key schedule. 454392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 455392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Inputs: 456392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## %xmm7: high side, b a x y 457392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## %xmm6: low side, d c 0 0 458392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## %xmm13: 0 459392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 460392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Outputs: 461392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## %xmm6: b+c+d b+c 0 0 462392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## %xmm0: b+c+d b+c b a 463392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 464392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type _vpaes_schedule_192_smear,\@abi-omnipotent 465392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 466392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom_vpaes_schedule_192_smear: 467392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$0x80, %xmm6, %xmm0 # d c 0 0 -> c 0 0 0 468392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm0, %xmm6 # -> c+d c 0 0 469392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$0xFE, %xmm7, %xmm0 # b a _ _ -> b b b a 470392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm0, %xmm6 # -> b+c+d b+c b a 471392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm6, %xmm0 472392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm1, %xmm1 473392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movhlps %xmm1, %xmm6 # clobber low side with zeros 474392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 475392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear 476392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 477392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 478392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## .aes_schedule_round 479392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 480392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Runs one main round of the key schedule on %xmm0, %xmm7 481392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 482392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Specifically, runs subbytes on the high dword of %xmm0 483392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## then rotates it by one byte and xors into the low dword of 484392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## %xmm7. 485392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 486392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Adds rcon from low byte of %xmm8, then rotates %xmm8 for 487392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## next rcon. 488392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 489392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Smears the dwords of %xmm7 by xoring the low into the 490392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## second low, result into third, result into highest. 491392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 492392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Returns results in %xmm7 = %xmm0. 493392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Clobbers %xmm1-%xmm4, %r11. 494392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 495392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type _vpaes_schedule_round,\@abi-omnipotent 496392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 497392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom_vpaes_schedule_round: 498392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # extract rcon from xmm8 499392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm1, %xmm1 500392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom palignr \$15, %xmm8, %xmm1 501392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom palignr \$15, %xmm8, %xmm8 502392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm1, %xmm7 503392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 504392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # rotate 505392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufd \$0xFF, %xmm0, %xmm0 506392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom palignr \$1, %xmm0, %xmm0 507392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 508392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # fall through... 509392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 510392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # low round: same as high round, but no rotation and no rcon. 511392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom_vpaes_schedule_low_round: 512392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # smear xmm7 513392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm7, %xmm1 514392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pslldq \$4, %xmm7 515392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm1, %xmm7 516392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm7, %xmm1 517392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pslldq \$8, %xmm7 518392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm1, %xmm7 519392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor .Lk_s63(%rip), %xmm7 520392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 521392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # subbytes 522392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm9, %xmm1 523392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pandn %xmm0, %xmm1 524392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom psrld \$4, %xmm1 # 1 = i 525392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pand %xmm9, %xmm0 # 0 = k 526392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm11, %xmm2 # 2 : a/k 527392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm0, %xmm2 # 2 = a/k 528392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm1, %xmm0 # 0 = j 529392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm10, %xmm3 # 3 : 1/i 530392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm1, %xmm3 # 3 = 1/i 531392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm2, %xmm3 # 3 = iak = 1/i + a/k 532392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm10, %xmm4 # 4 : 1/j 533392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm0, %xmm4 # 4 = 1/j 534392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm2, %xmm4 # 4 = jak = 1/j + a/k 535392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm10, %xmm2 # 2 : 1/iak 536392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm3, %xmm2 # 2 = 1/iak 537392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm0, %xmm2 # 2 = io 538392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm10, %xmm3 # 3 : 1/jak 539392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm4, %xmm3 # 3 = 1/jak 540392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm1, %xmm3 # 3 = jo 541392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm13, %xmm4 # 4 : sbou 542392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm2, %xmm4 # 4 = sbou 543392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm12, %xmm0 # 0 : sbot 544392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm3, %xmm0 # 0 = sb1t 545392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm4, %xmm0 # 0 = sbox output 546392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 547392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # add in smeared stuff 548392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm7, %xmm0 549392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm0, %xmm7 550392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 551392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size _vpaes_schedule_round,.-_vpaes_schedule_round 552392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 553392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 554392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## .aes_schedule_transform 555392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 556392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Linear-transform %xmm0 according to tables at (%r11) 557392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 558392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Requires that %xmm9 = 0x0F0F... as in preheat 559392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Output in %xmm0 560392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Clobbers %xmm1, %xmm2 561392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 562392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type _vpaes_schedule_transform,\@abi-omnipotent 563392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 564392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom_vpaes_schedule_transform: 565392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm9, %xmm1 566392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pandn %xmm0, %xmm1 567392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom psrld \$4, %xmm1 568392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pand %xmm9, %xmm0 569392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa (%r11), %xmm2 # lo 570392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm0, %xmm2 571392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa 16(%r11), %xmm0 # hi 572392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm1, %xmm0 573392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm2, %xmm0 574392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 575392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size _vpaes_schedule_transform,.-_vpaes_schedule_transform 576392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 577392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 578392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## .aes_schedule_mangle 579392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 580392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Mangle xmm0 from (basis-transformed) standard version 581392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## to our version. 582392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 583392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## On encrypt, 584392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## xor with 0x63 585392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## multiply by circulant 0,1,1,1 586392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## apply shiftrows transform 587392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 588392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## On decrypt, 589392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## xor with 0x63 590392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## multiply by "inverse mixcolumns" circulant E,B,D,9 591392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## deskew 592392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## apply shiftrows transform 593392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 594392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 595392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Writes out to (%rdx), and increments or decrements it 596392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Keeps track of round number mod 4 in %r8 597392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Preserves xmm0 598392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Clobbers xmm1-xmm5 599392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 600392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type _vpaes_schedule_mangle,\@abi-omnipotent 601392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 602392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom_vpaes_schedule_mangle: 603392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm0, %xmm4 # save xmm0 for later 604392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa .Lk_mc_forward(%rip),%xmm5 605392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom test %rcx, %rcx 606392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jnz .Lschedule_mangle_dec 607392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 608392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # encrypting 609392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom add \$16, %rdx 610392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor .Lk_s63(%rip),%xmm4 611392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm5, %xmm4 612392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm4, %xmm3 613392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm5, %xmm4 614392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm4, %xmm3 615392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm5, %xmm4 616392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm4, %xmm3 617392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 618392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lschedule_mangle_both 619392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 620392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lschedule_mangle_dec: 621392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom # inverse mix columns 622392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea .Lk_dksd(%rip),%r11 623392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm9, %xmm1 624392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pandn %xmm4, %xmm1 625392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom psrld \$4, %xmm1 # 1 = hi 626392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pand %xmm9, %xmm4 # 4 = lo 627392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 628392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa 0x00(%r11), %xmm2 629392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm4, %xmm2 630392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa 0x10(%r11), %xmm3 631392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm1, %xmm3 632392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm2, %xmm3 633392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm5, %xmm3 634392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 635392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa 0x20(%r11), %xmm2 636392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm4, %xmm2 637392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm3, %xmm2 638392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa 0x30(%r11), %xmm3 639392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm1, %xmm3 640392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm2, %xmm3 641392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm5, %xmm3 642392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 643392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa 0x40(%r11), %xmm2 644392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm4, %xmm2 645392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm3, %xmm2 646392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa 0x50(%r11), %xmm3 647392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm1, %xmm3 648392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm2, %xmm3 649392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm5, %xmm3 650392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 651392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa 0x60(%r11), %xmm2 652392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm4, %xmm2 653392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm3, %xmm2 654392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa 0x70(%r11), %xmm3 655392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm1, %xmm3 656392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm2, %xmm3 657392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 658392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom add \$-16, %rdx 659392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 660392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lschedule_mangle_both: 661392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa (%r8,%r10),%xmm1 662392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pshufb %xmm1,%xmm3 663392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom add \$-16, %r8 664392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom and \$0x30, %r8 665392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu %xmm3, (%rdx) 666392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 667392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle 668392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 669392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 670392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Interface to OpenSSL 671392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# 672392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.globl ${PREFIX}_set_encrypt_key 673392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type ${PREFIX}_set_encrypt_key,\@function,3 674392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 675392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom${PREFIX}_set_encrypt_key: 676392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 677392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($win64); 678392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea -0xb8(%rsp),%rsp 679392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm6,0x10(%rsp) 680392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm7,0x20(%rsp) 681392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm8,0x30(%rsp) 682392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm9,0x40(%rsp) 683392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm10,0x50(%rsp) 684392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm11,0x60(%rsp) 685392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm12,0x70(%rsp) 686392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm13,0x80(%rsp) 687392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm14,0x90(%rsp) 688392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm15,0xa0(%rsp) 689392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lenc_key_body: 690392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 691392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 692392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov %esi,%eax 693392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shr \$5,%eax 694392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom add \$5,%eax 695392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5; 696392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 697392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov \$0,%ecx 698392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov \$0x30,%r8d 699392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _vpaes_schedule_core 700392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 701392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($win64); 702392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x10(%rsp),%xmm6 703392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x20(%rsp),%xmm7 704392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x30(%rsp),%xmm8 705392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x40(%rsp),%xmm9 706392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x50(%rsp),%xmm10 707392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x60(%rsp),%xmm11 708392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x70(%rsp),%xmm12 709392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x80(%rsp),%xmm13 710392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x90(%rsp),%xmm14 711392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0xa0(%rsp),%xmm15 712392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0xb8(%rsp),%rsp 713392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lenc_key_epilogue: 714392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 715392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 716392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xor %eax,%eax 717392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 718392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size ${PREFIX}_set_encrypt_key,.-${PREFIX}_set_encrypt_key 719392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 720392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.globl ${PREFIX}_set_decrypt_key 721392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type ${PREFIX}_set_decrypt_key,\@function,3 722392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 723392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom${PREFIX}_set_decrypt_key: 724392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 725392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($win64); 726392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea -0xb8(%rsp),%rsp 727392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm6,0x10(%rsp) 728392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm7,0x20(%rsp) 729392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm8,0x30(%rsp) 730392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm9,0x40(%rsp) 731392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm10,0x50(%rsp) 732392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm11,0x60(%rsp) 733392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm12,0x70(%rsp) 734392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm13,0x80(%rsp) 735392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm14,0x90(%rsp) 736392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm15,0xa0(%rsp) 737392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Ldec_key_body: 738392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 739392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 740392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov %esi,%eax 741392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shr \$5,%eax 742392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom add \$5,%eax 743392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5; 744392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shl \$4,%eax 745392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16(%rdx,%rax),%rdx 746392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 747392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov \$1,%ecx 748392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov %esi,%r8d 749392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom shr \$1,%r8d 750392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom and \$32,%r8d 751392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xor \$32,%r8d # nbits==192?0:32 752392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _vpaes_schedule_core 753392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 754392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($win64); 755392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x10(%rsp),%xmm6 756392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x20(%rsp),%xmm7 757392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x30(%rsp),%xmm8 758392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x40(%rsp),%xmm9 759392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x50(%rsp),%xmm10 760392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x60(%rsp),%xmm11 761392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x70(%rsp),%xmm12 762392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x80(%rsp),%xmm13 763392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x90(%rsp),%xmm14 764392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0xa0(%rsp),%xmm15 765392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0xb8(%rsp),%rsp 766392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Ldec_key_epilogue: 767392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 768392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 769392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xor %eax,%eax 770392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 771392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size ${PREFIX}_set_decrypt_key,.-${PREFIX}_set_decrypt_key 772392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 773392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.globl ${PREFIX}_encrypt 774392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type ${PREFIX}_encrypt,\@function,3 775392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 776392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom${PREFIX}_encrypt: 777392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 778392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($win64); 779392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea -0xb8(%rsp),%rsp 780392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm6,0x10(%rsp) 781392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm7,0x20(%rsp) 782392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm8,0x30(%rsp) 783392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm9,0x40(%rsp) 784392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm10,0x50(%rsp) 785392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm11,0x60(%rsp) 786392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm12,0x70(%rsp) 787392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm13,0x80(%rsp) 788392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm14,0x90(%rsp) 789392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm15,0xa0(%rsp) 790392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lenc_body: 791392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 792392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 793392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu (%rdi),%xmm0 794392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _vpaes_preheat 795392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _vpaes_encrypt_core 796392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu %xmm0,(%rsi) 797392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 798392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($win64); 799392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x10(%rsp),%xmm6 800392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x20(%rsp),%xmm7 801392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x30(%rsp),%xmm8 802392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x40(%rsp),%xmm9 803392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x50(%rsp),%xmm10 804392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x60(%rsp),%xmm11 805392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x70(%rsp),%xmm12 806392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x80(%rsp),%xmm13 807392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x90(%rsp),%xmm14 808392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0xa0(%rsp),%xmm15 809392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0xb8(%rsp),%rsp 810392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lenc_epilogue: 811392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 812392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 813392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 814392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size ${PREFIX}_encrypt,.-${PREFIX}_encrypt 815392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 816392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.globl ${PREFIX}_decrypt 817392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type ${PREFIX}_decrypt,\@function,3 818392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 819392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom${PREFIX}_decrypt: 820392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 821392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($win64); 822392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea -0xb8(%rsp),%rsp 823392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm6,0x10(%rsp) 824392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm7,0x20(%rsp) 825392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm8,0x30(%rsp) 826392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm9,0x40(%rsp) 827392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm10,0x50(%rsp) 828392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm11,0x60(%rsp) 829392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm12,0x70(%rsp) 830392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm13,0x80(%rsp) 831392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm14,0x90(%rsp) 832392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm15,0xa0(%rsp) 833392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Ldec_body: 834392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 835392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 836392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu (%rdi),%xmm0 837392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _vpaes_preheat 838392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _vpaes_decrypt_core 839392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu %xmm0,(%rsi) 840392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 841392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($win64); 842392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x10(%rsp),%xmm6 843392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x20(%rsp),%xmm7 844392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x30(%rsp),%xmm8 845392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x40(%rsp),%xmm9 846392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x50(%rsp),%xmm10 847392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x60(%rsp),%xmm11 848392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x70(%rsp),%xmm12 849392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x80(%rsp),%xmm13 850392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x90(%rsp),%xmm14 851392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0xa0(%rsp),%xmm15 852392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0xb8(%rsp),%rsp 853392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Ldec_epilogue: 854392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 855392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 856392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 857392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size ${PREFIX}_decrypt,.-${PREFIX}_decrypt 858392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 859392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom{ 860392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy ($inp,$out,$len,$key,$ivp,$enc)=("%rdi","%rsi","%rdx","%rcx","%r8","%r9"); 861392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# void AES_cbc_encrypt (const void char *inp, unsigned char *out, 862392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# size_t length, const AES_KEY *key, 863392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# unsigned char *ivp,const int enc); 864392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 865392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.globl ${PREFIX}_cbc_encrypt 866392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type ${PREFIX}_cbc_encrypt,\@function,6 867392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 868392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom${PREFIX}_cbc_encrypt: 869392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xchg $key,$len 870392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 871392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom($len,$key)=($key,$len); 872392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 873a1a5710c055e139ea00e785f9eb55b3af3e4dab1Brian Carlstrom sub \$16,$len 874a1a5710c055e139ea00e785f9eb55b3af3e4dab1Brian Carlstrom jc .Lcbc_abort 875392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 876392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($win64); 877392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea -0xb8(%rsp),%rsp 878392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm6,0x10(%rsp) 879392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm7,0x20(%rsp) 880392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm8,0x30(%rsp) 881392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm9,0x40(%rsp) 882392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm10,0x50(%rsp) 883392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm11,0x60(%rsp) 884392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm12,0x70(%rsp) 885392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm13,0x80(%rsp) 886392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm14,0x90(%rsp) 887392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps %xmm15,0xa0(%rsp) 888392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lcbc_body: 889392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 890392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 891392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu ($ivp),%xmm6 # load IV 892392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub $inp,$out 893392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _vpaes_preheat 894392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp \$0,${enc}d 895392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom je .Lcbc_dec_loop 896392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lcbc_enc_loop 897392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 898392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lcbc_enc_loop: 899392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu ($inp),%xmm0 900392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm6,%xmm0 901392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _vpaes_encrypt_core 902392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm0,%xmm6 903392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu %xmm0,($out,$inp) 904392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16($inp),$inp 905392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$16,$len 906392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jnc .Lcbc_enc_loop 907392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jmp .Lcbc_done 908392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 909392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lcbc_dec_loop: 910392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu ($inp),%xmm0 911392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm0,%xmm7 912392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call _vpaes_decrypt_core 913392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pxor %xmm6,%xmm0 914392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa %xmm7,%xmm6 915392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu %xmm0,($out,$inp) 916392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16($inp),$inp 917392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$16,$len 918392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jnc .Lcbc_dec_loop 919392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lcbc_done: 920392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqu %xmm6,($ivp) # save IV 921392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 922392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($win64); 923392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x10(%rsp),%xmm6 924392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x20(%rsp),%xmm7 925392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x30(%rsp),%xmm8 926392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x40(%rsp),%xmm9 927392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x50(%rsp),%xmm10 928392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x60(%rsp),%xmm11 929392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x70(%rsp),%xmm12 930392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x80(%rsp),%xmm13 931392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0x90(%rsp),%xmm14 932392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movaps 0xa0(%rsp),%xmm15 933392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0xb8(%rsp),%rsp 934392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lcbc_epilogue: 935392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 936392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 937a1a5710c055e139ea00e785f9eb55b3af3e4dab1Brian Carlstrom.Lcbc_abort: 938392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 939392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt 940392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 941392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom} 942392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 943392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 944392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## _aes_preheat 945392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 946392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Fills register %r10 -> .aes_consts (so you can -fPIC) 947392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## and %xmm9-%xmm15 as specified below. 948392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 949392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type _vpaes_preheat,\@abi-omnipotent 950392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 951392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom_vpaes_preheat: 952392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea .Lk_s0F(%rip), %r10 953392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa -0x20(%r10), %xmm10 # .Lk_inv 954392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa -0x10(%r10), %xmm11 # .Lk_inv+16 955392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa 0x00(%r10), %xmm9 # .Lk_s0F 956392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa 0x30(%r10), %xmm13 # .Lk_sb1 957392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa 0x40(%r10), %xmm12 # .Lk_sb1+16 958392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa 0x50(%r10), %xmm15 # .Lk_sb2 959392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom movdqa 0x60(%r10), %xmm14 # .Lk_sb2+16 960392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 961392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size _vpaes_preheat,.-_vpaes_preheat 962392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom######################################################## 963392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## ## 964392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Constants ## 965392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## ## 966392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom######################################################## 967392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type _vpaes_consts,\@object 968392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 64 969392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom_vpaes_consts: 970392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_inv: # inv, inva 971392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0x0E05060F0D080180, 0x040703090A0B0C02 972392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0x01040A060F0B0780, 0x030D0E0C02050809 973392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 974392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_s0F: # s0F 975392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F 976392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 977392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_ipt: # input transform (lo, hi) 978392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0xC2B2E8985A2A7000, 0xCABAE09052227808 979392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81 980392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 981392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_sb1: # sb1u, sb1t 982392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544 983392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF 984392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_sb2: # sb2u, sb2t 985392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD 986392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0x69EB88400AE12900, 0xC2A163C8AB82234A 987392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_sbo: # sbou, sbot 988392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878 989392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA 990392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 991392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_mc_forward: # mc_forward 992392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0x0407060500030201, 0x0C0F0E0D080B0A09 993392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0x080B0A0904070605, 0x000302010C0F0E0D 994392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0x0C0F0E0D080B0A09, 0x0407060500030201 995392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0x000302010C0F0E0D, 0x080B0A0904070605 996392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 997392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_mc_backward:# mc_backward 998392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0x0605040702010003, 0x0E0D0C0F0A09080B 999392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0x020100030E0D0C0F, 0x0A09080B06050407 1000392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0x0E0D0C0F0A09080B, 0x0605040702010003 1001392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0x0A09080B06050407, 0x020100030E0D0C0F 1002392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1003392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_sr: # sr 1004392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0x0706050403020100, 0x0F0E0D0C0B0A0908 1005392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0x030E09040F0A0500, 0x0B06010C07020D08 1006392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0x0F060D040B020900, 0x070E050C030A0108 1007392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0x0B0E0104070A0D00, 0x0306090C0F020508 1008392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1009392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_rcon: # rcon 1010392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81 1011392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1012392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_s63: # s63: all equal to 0x63 transformed 1013392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B 1014392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1015392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_opt: # output transform 1016392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808 1017392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0 1018392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1019392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_deskew: # deskew tables: inverts the sbox's "skew" 1020392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A 1021392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77 1022392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1023392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 1024392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Decryption stuff 1025392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Key schedule constants 1026392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 1027392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_dksd: # decryption key schedule: invskew x*D 1028392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9 1029392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E 1030392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_dksb: # decryption key schedule: invskew x*B 1031392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99 1032392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8 1033392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_dkse: # decryption key schedule: invskew x*E + 0x63 1034392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086 1035392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487 1036392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_dks9: # decryption key schedule: invskew x*9 1037392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0xB6116FC87ED9A700, 0x4AED933482255BFC 1038392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0x4576516227143300, 0x8BB89FACE9DAFDCE 1039392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1040392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 1041392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Decryption stuff 1042392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Round function constants 1043392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## 1044392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_dipt: # decryption input transform 1045392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0x0F505B040B545F00, 0x154A411E114E451A 1046392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0x86E383E660056500, 0x12771772F491F194 1047392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1048392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_dsb9: # decryption sbox output *9*u, *9*t 1049392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0x851C03539A86D600, 0xCAD51F504F994CC9 1050392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565 1051392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_dsbd: # decryption sbox output *D*u, *D*t 1052392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439 1053392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3 1054392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_dsbb: # decryption sbox output *B*u, *B*t 1055392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0xD022649296B44200, 0x602646F6B0F2D404 1056392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B 1057392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_dsbe: # decryption sbox output *E*u, *E*t 1058392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0x46F2929626D4D000, 0x2242600464B4F6B0 1059392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32 1060392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_dsbo: # decryption sbox final output 1061392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D 1062392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C 1063392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.asciz "Vector Permutaion AES for x86_64/SSSE3, Mike Hamburg (Stanford University)" 1064392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 64 1065392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size _vpaes_consts,.-_vpaes_consts 1066392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 1067392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1068392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromif ($win64) { 1069392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, 1070392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# CONTEXT *context,DISPATCHER_CONTEXT *disp) 1071392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$rec="%rcx"; 1072392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$frame="%rdx"; 1073392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$context="%r8"; 1074392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$disp="%r9"; 1075392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1076392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___; 1077392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.extern __imp_RtlVirtualUnwind 1078392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type se_handler,\@abi-omnipotent 1079392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16 1080392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromse_handler: 1081392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %rsi 1082392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %rdi 1083392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %rbx 1084392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %rbp 1085392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %r12 1086392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %r13 1087392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %r14 1088392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom push %r15 1089392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pushfq 1090392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom sub \$64,%rsp 1091392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1092392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 120($context),%rax # pull context->Rax 1093392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 248($context),%rbx # pull context->Rip 1094392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1095392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 8($disp),%rsi # disp->ImageBase 1096392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 56($disp),%r11 # disp->HandlerData 1097392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1098392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 0(%r11),%r10d # HandlerData[0] 1099392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea (%rsi,%r10),%r10 # prologue label 1100392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp %r10,%rbx # context->Rip<prologue label 1101392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jb .Lin_prologue 1102392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1103392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 152($context),%rax # pull context->Rsp 1104392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1105392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 4(%r11),%r10d # HandlerData[1] 1106392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea (%rsi,%r10),%r10 # epilogue label 1107392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom cmp %r10,%rbx # context->Rip>=epilogue label 1108392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom jae .Lin_prologue 1109392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1110392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 16(%rax),%rsi # %xmm save area 1111392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 512($context),%rdi # &context.Xmm6 1112392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax) 1113392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .long 0xa548f3fc # cld; rep movsq 1114392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 0xb8(%rax),%rax # adjust stack pointer 1115392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1116392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lin_prologue: 1117392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 8(%rax),%rdi 1118392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 16(%rax),%rsi 1119392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov %rax,152($context) # restore context->Rsp 1120392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov %rsi,168($context) # restore context->Rsi 1121392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov %rdi,176($context) # restore context->Rdi 1122392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1123392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 40($disp),%rdi # disp->ContextRecord 1124392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $context,%rsi # context 1125392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov \$`1232/8`,%ecx # sizeof(CONTEXT) 1126392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .long 0xa548f3fc # cld; rep movsq 1127392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1128392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov $disp,%rsi 1129392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER 1130392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 8(%rsi),%rdx # arg2, disp->ImageBase 1131392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 0(%rsi),%r8 # arg3, disp->ControlPc 1132392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 16(%rsi),%r9 # arg4, disp->FunctionEntry 1133392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov 40(%rsi),%r10 # disp->ContextRecord 1134392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 56(%rsi),%r11 # &disp->HandlerData 1135392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom lea 24(%rsi),%r12 # &disp->EstablisherFrame 1136392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov %r10,32(%rsp) # arg5 1137392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov %r11,40(%rsp) # arg6 1138392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov %r12,48(%rsp) # arg7 1139392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov %rcx,56(%rsp) # arg8, (NULL) 1140392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom call *__imp_RtlVirtualUnwind(%rip) 1141392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1142392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom mov \$1,%eax # ExceptionContinueSearch 1143392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom add \$64,%rsp 1144392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom popfq 1145392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pop %r15 1146392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pop %r14 1147392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pop %r13 1148392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pop %r12 1149392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pop %rbp 1150392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pop %rbx 1151392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pop %rdi 1152392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom pop %rsi 1153392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom ret 1154392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size se_handler,.-se_handler 1155392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1156392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.section .pdata 1157392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 4 1158392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_begin_${PREFIX}_set_encrypt_key 1159392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_end_${PREFIX}_set_encrypt_key 1160392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_info_${PREFIX}_set_encrypt_key 1161392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1162392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_begin_${PREFIX}_set_decrypt_key 1163392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_end_${PREFIX}_set_decrypt_key 1164392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_info_${PREFIX}_set_decrypt_key 1165392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1166392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_begin_${PREFIX}_encrypt 1167392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_end_${PREFIX}_encrypt 1168392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_info_${PREFIX}_encrypt 1169392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1170392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_begin_${PREFIX}_decrypt 1171392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_end_${PREFIX}_decrypt 1172392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_info_${PREFIX}_decrypt 1173392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1174392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_begin_${PREFIX}_cbc_encrypt 1175392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_end_${PREFIX}_cbc_encrypt 1176392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .LSEH_info_${PREFIX}_cbc_encrypt 1177392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1178392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.section .xdata 1179392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 8 1180392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.LSEH_info_${PREFIX}_set_encrypt_key: 1181392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .byte 9,0,0,0 1182392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva se_handler 1183392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .Lenc_key_body,.Lenc_key_epilogue # HandlerData[] 1184392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.LSEH_info_${PREFIX}_set_decrypt_key: 1185392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .byte 9,0,0,0 1186392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva se_handler 1187392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .Ldec_key_body,.Ldec_key_epilogue # HandlerData[] 1188392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.LSEH_info_${PREFIX}_encrypt: 1189392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .byte 9,0,0,0 1190392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva se_handler 1191392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .Lenc_body,.Lenc_epilogue # HandlerData[] 1192392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.LSEH_info_${PREFIX}_decrypt: 1193392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .byte 9,0,0,0 1194392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva se_handler 1195392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .Ldec_body,.Ldec_epilogue # HandlerData[] 1196392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.LSEH_info_${PREFIX}_cbc_encrypt: 1197392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .byte 9,0,0,0 1198392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva se_handler 1199392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom .rva .Lcbc_body,.Lcbc_epilogue # HandlerData[] 1200392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___ 1201392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom} 1202392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1203392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code =~ s/\`([^\`]*)\`/eval($1)/gem; 1204392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1205392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromprint $code; 1206392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom 1207392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromclose STDOUT; 1208