1392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#!/usr/bin/env perl
2392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
3392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom######################################################################
4392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Constant-time SSSE3 AES core implementation.
5392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## version 0.1
6392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
7392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## By Mike Hamburg (Stanford University), 2009
8392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## Public domain.
9392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
10392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## For details see http://shiftleft.org/papers/vector_aes/ and
11392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom## http://crypto.stanford.edu/vpaes/.
12392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
13392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom######################################################################
14392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# September 2011.
15392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#
16392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Interface to OpenSSL as "almost" drop-in replacement for
17392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# aes-x86_64.pl. "Almost" refers to the fact that AES_cbc_encrypt
18392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# doesn't handle partial vectors (doesn't have to if called from
19392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# EVP only). "Drop-in" implies that this module doesn't share key
20392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# schedule structure with the original nor does it make assumption
21392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# about its alignment...
22392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#
23392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Performance summary. aes-x86_64.pl column lists large-block CBC
24392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# encrypt/decrypt/with-hyper-threading-off(*) results in cycles per
25392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# byte processed with 128-bit key, and vpaes-x86_64.pl column -
26392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# [also large-block CBC] encrypt/decrypt.
27392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#
28392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#		aes-x86_64.pl		vpaes-x86_64.pl
29392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#
30392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Core 2(**)	30.5/43.7/14.3		21.8/25.7(***)
31392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Nehalem	30.5/42.2/14.6		 9.8/11.8
32392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Atom		63.9/79.0/32.1		64.0/84.8(***)
33392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#
34392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# (*)	"Hyper-threading" in the context refers rather to cache shared
35392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#	among multiple cores, than to specifically Intel HTT. As vast
36392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#	majority of contemporary cores share cache, slower code path
37392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#	is common place. In other words "with-hyper-threading-off"
38392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#	results are presented mostly for reference purposes.
39392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#
40392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# (**)	"Core 2" refers to initial 65nm design, a.k.a. Conroe.
41392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#
42392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# (***)	Less impressive improvement on Core 2 and Atom is due to slow
43392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#	pshufb,	yet it's respectable +40%/78% improvement on Core 2
44392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#	(as implied, over "hyper-threading-safe" code path).
45392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#
46392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#						<appro@openssl.org>
47392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
48392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$flavour = shift;
49392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$output  = shift;
50392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromif ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
51392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
52392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
53392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
54392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
55392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
56392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
57392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromdie "can't locate x86_64-xlate.pl";
58392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
5904ef91b390dfcc6125913e2f2af502d23d7a5112Brian Carlstromopen OUT,"| \"$^X\" $xlate $flavour $output";
6004ef91b390dfcc6125913e2f2af502d23d7a5112Brian Carlstrom*STDOUT=*OUT;
61392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
62392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$PREFIX="vpaes";
63392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
64392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
65392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.text
66392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
67392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
68392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  _aes_encrypt_core
69392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
70392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  AES-encrypt %xmm0.
71392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
72392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  Inputs:
73392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##     %xmm0 = input
74392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##     %xmm9-%xmm15 as in _vpaes_preheat
75392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##    (%rdx) = scheduled keys
76392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
77392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  Output in %xmm0
78392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  Clobbers  %xmm1-%xmm5, %r9, %r10, %r11, %rax
79392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  Preserves %xmm6 - %xmm8 so you get some local vectors
80392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
81392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
82392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type	_vpaes_encrypt_core,\@abi-omnipotent
83392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16
84392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom_vpaes_encrypt_core:
85392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	%rdx,	%r9
86392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	\$16,	%r11
87392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	240(%rdx),%eax
88392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm9,	%xmm1
89392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	.Lk_ipt(%rip), %xmm2	# iptlo
90392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pandn	%xmm0,	%xmm1
91392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqu	(%r9),	%xmm5		# round0 key
92392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	psrld	\$4,	%xmm1
93392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pand	%xmm9,	%xmm0
94392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm0,	%xmm2
95392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	.Lk_ipt+16(%rip), %xmm0	# ipthi
96392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm1,	%xmm0
97392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm5,	%xmm2
98392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm2,	%xmm0
99392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	add	\$16,	%r9
100392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lea	.Lk_mc_backward(%rip),%r10
101392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	jmp	.Lenc_entry
102392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
103392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16
104392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lenc_loop:
105392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	# middle of middle round
106392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa  %xmm13,	%xmm4	# 4 : sb1u
107392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb  %xmm2,	%xmm4	# 4 = sb1u
108392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm5,	%xmm4	# 4 = sb1u + k
109392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa  %xmm12,	%xmm0	# 0 : sb1t
110392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb  %xmm3,	%xmm0	# 0 = sb1t
111392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm4,	%xmm0	# 0 = A
112392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa  %xmm15,	%xmm5	# 4 : sb2u
113392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm2,	%xmm5	# 4 = sb2u
114392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	-0x40(%r11,%r10), %xmm1		# .Lk_mc_forward[]
115392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm14, %xmm2	# 2 : sb2t
116392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm3,  %xmm2	# 2 = sb2t
117392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm5,	%xmm2	# 2 = 2A
118392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	(%r11,%r10), %xmm4		# .Lk_mc_backward[]
119392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm0,  %xmm3	# 3 = A
120392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb  %xmm1,  %xmm0	# 0 = B
121392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	add	\$16,	%r9	# next key
122392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm2,  %xmm0	# 0 = 2A+B
123392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm4,	%xmm3	# 3 = D
124392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	add	\$16,	%r11	# next mc
125392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm0,	%xmm3	# 3 = 2A+B+D
126392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb  %xmm1,	%xmm0	# 0 = 2B+C
127392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	and	\$0x30,	%r11	# ... mod 4
128392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm3,	%xmm0	# 0 = 2A+3B+C+D
129392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	sub	\$1,%rax	# nr--
130392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
131392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lenc_entry:
132392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	# top of round
133392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa  %xmm9, 	%xmm1	# 1 : i
134392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pandn	%xmm0, 	%xmm1	# 1 = i<<4
135392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	psrld	\$4,   	%xmm1   # 1 = i
136392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pand	%xmm9, 	%xmm0   # 0 = k
137392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm11, %xmm5	# 2 : a/k
138392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb  %xmm0,  %xmm5	# 2 = a/k
139392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm1,	%xmm0	# 0 = j
140392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm10,	%xmm3  	# 3 : 1/i
141392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb  %xmm1, 	%xmm3  	# 3 = 1/i
142392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm5, 	%xmm3  	# 3 = iak = 1/i + a/k
143392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm10,	%xmm4  	# 4 : 1/j
144392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm0, 	%xmm4  	# 4 = 1/j
145392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm5, 	%xmm4  	# 4 = jak = 1/j + a/k
146392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm10,	%xmm2  	# 2 : 1/iak
147392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb  %xmm3,	%xmm2  	# 2 = 1/iak
148392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm0, 	%xmm2  	# 2 = io
149392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm10, %xmm3   # 3 : 1/jak
150392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqu	(%r9),	%xmm5
151392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb  %xmm4,  %xmm3   # 3 = 1/jak
152392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm1,  %xmm3   # 3 = jo
153392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	jnz	.Lenc_loop
154392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
155392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	# middle of last round
156392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	-0x60(%r10), %xmm4	# 3 : sbou	.Lk_sbo
157392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	-0x50(%r10), %xmm0	# 0 : sbot	.Lk_sbo+16
158392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb  %xmm2,  %xmm4	# 4 = sbou
159392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm5,  %xmm4	# 4 = sb1u + k
160392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb  %xmm3,	%xmm0	# 0 = sb1t
161392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	0x40(%r11,%r10), %xmm1		# .Lk_sr[]
162392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm4,	%xmm0	# 0 = A
163392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm1,	%xmm0
164392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ret
165392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size	_vpaes_encrypt_core,.-_vpaes_encrypt_core
166392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
167392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
168392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  Decryption core
169392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
170392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  Same API as encryption core.
171392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
172392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type	_vpaes_decrypt_core,\@abi-omnipotent
173392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align	16
174392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom_vpaes_decrypt_core:
175392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	%rdx,	%r9		# load key
176392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	240(%rdx),%eax
177392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm9,	%xmm1
178392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	.Lk_dipt(%rip), %xmm2	# iptlo
179392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pandn	%xmm0,	%xmm1
180392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	%rax,	%r11
181392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	psrld	\$4,	%xmm1
182392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqu	(%r9),	%xmm5		# round0 key
183392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shl	\$4,	%r11
184392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pand	%xmm9,	%xmm0
185392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm0,	%xmm2
186392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	.Lk_dipt+16(%rip), %xmm0 # ipthi
187392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	\$0x30,	%r11
188392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lea	.Lk_dsbd(%rip),%r10
189392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm1,	%xmm0
190392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	and	\$0x30,	%r11
191392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm5,	%xmm2
192392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	.Lk_mc_forward+48(%rip), %xmm5
193392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm2,	%xmm0
194392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	add	\$16,	%r9
195392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	add	%r10,	%r11
196392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	jmp	.Ldec_entry
197392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
198392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align 16
199392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Ldec_loop:
200392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
201392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  Inverse mix columns
202392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
203392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa  -0x20(%r10),%xmm4	# 4 : sb9u
204392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm2,	%xmm4		# 4 = sb9u
205392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm0,	%xmm4
206392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa  -0x10(%r10),%xmm0	# 0 : sb9t
207392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm3,	%xmm0		# 0 = sb9t
208392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm4,	%xmm0		# 0 = ch
209392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	add	\$16, %r9		# next round key
210392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
211392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm5,	%xmm0		# MC ch
212392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa  0x00(%r10),%xmm4	# 4 : sbdu
213392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm2,	%xmm4		# 4 = sbdu
214392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm0,	%xmm4		# 4 = ch
215392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa  0x10(%r10),%xmm0	# 0 : sbdt
216392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm3,	%xmm0		# 0 = sbdt
217392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm4,	%xmm0		# 0 = ch
218392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	sub	\$1,%rax		# nr--
219392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
220392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm5,	%xmm0		# MC ch
221392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa  0x20(%r10),%xmm4	# 4 : sbbu
222392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm2,	%xmm4		# 4 = sbbu
223392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm0,	%xmm4		# 4 = ch
224392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa  0x30(%r10),%xmm0	# 0 : sbbt
225392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm3,	%xmm0		# 0 = sbbt
226392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm4,	%xmm0		# 0 = ch
227392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
228392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm5,	%xmm0		# MC ch
229392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa  0x40(%r10),%xmm4	# 4 : sbeu
230392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm2,	%xmm4		# 4 = sbeu
231392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm0,	%xmm4		# 4 = ch
232392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa  0x50(%r10),%xmm0	# 0 : sbet
233392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm3,	%xmm0		# 0 = sbet
234392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm4,	%xmm0		# 0 = ch
235392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
236392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	palignr	\$12,	%xmm5,	%xmm5
237392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
238392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Ldec_entry:
239392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	# top of round
240392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa  %xmm9, 	%xmm1	# 1 : i
241392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pandn	%xmm0, 	%xmm1	# 1 = i<<4
242392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	psrld	\$4,    %xmm1	# 1 = i
243392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pand	%xmm9, 	%xmm0	# 0 = k
244392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm11, %xmm2	# 2 : a/k
245392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb  %xmm0,  %xmm2	# 2 = a/k
246392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm1,	%xmm0	# 0 = j
247392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm10,	%xmm3	# 3 : 1/i
248392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb  %xmm1, 	%xmm3	# 3 = 1/i
249392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm2, 	%xmm3	# 3 = iak = 1/i + a/k
250392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm10,	%xmm4	# 4 : 1/j
251392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm0, 	%xmm4	# 4 = 1/j
252392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm2, 	%xmm4	# 4 = jak = 1/j + a/k
253392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm10,	%xmm2	# 2 : 1/iak
254392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb  %xmm3,	%xmm2	# 2 = 1/iak
255392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm0, 	%xmm2	# 2 = io
256392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm10, %xmm3	# 3 : 1/jak
257392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb  %xmm4,  %xmm3	# 3 = 1/jak
258392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm1,  %xmm3	# 3 = jo
259392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqu	(%r9),	%xmm0
260392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	jnz	.Ldec_loop
261392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
262392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	# middle of last round
263392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	0x60(%r10), %xmm4	# 3 : sbou
264392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb  %xmm2,  %xmm4	# 4 = sbou
265392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm0,  %xmm4	# 4 = sb1u + k
266392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	0x70(%r10), %xmm0	# 0 : sbot
267a1a5710c055e139ea00e785f9eb55b3af3e4dab1Brian Carlstrom	movdqa	-0x160(%r11), %xmm2	# .Lk_sr-.Lk_dsbd=-0x160
268392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb  %xmm3,	%xmm0	# 0 = sb1t
269392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm4,	%xmm0	# 0 = A
270392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm2,	%xmm0
271392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ret
272392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size	_vpaes_decrypt_core,.-_vpaes_decrypt_core
273392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
274392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom########################################################
275392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##                                                    ##
276392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##                  AES key schedule                  ##
277392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##                                                    ##
278392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom########################################################
279392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type	_vpaes_schedule_core,\@abi-omnipotent
280392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align	16
281392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom_vpaes_schedule_core:
282392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	# rdi = key
283392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	# rsi = size in bits
284392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	# rdx = buffer
285392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	# rcx = direction.  0=encrypt, 1=decrypt
286392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
287392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	call	_vpaes_preheat		# load the tables
288392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	.Lk_rcon(%rip), %xmm8	# load rcon
289392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqu	(%rdi),	%xmm0		# load key (unaligned)
290392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
291392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	# input transform
292392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm0,	%xmm3
293392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lea	.Lk_ipt(%rip), %r11
294392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	call	_vpaes_schedule_transform
295392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm0,	%xmm7
296392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
297392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lea	.Lk_sr(%rip),%r10
298392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	test	%rcx,	%rcx
299392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	jnz	.Lschedule_am_decrypting
300392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
301392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	# encrypting, output zeroth round key after transform
302392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqu	%xmm0,	(%rdx)
303392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	jmp	.Lschedule_go
304392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
305392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lschedule_am_decrypting:
306392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	# decrypting, output zeroth round key after shiftrows
307392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	(%r8,%r10),%xmm1
308392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb  %xmm1,	%xmm3
309392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqu	%xmm3,	(%rdx)
310392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	\$0x30, %r8
311392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
312392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lschedule_go:
313392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	cmp	\$192,	%esi
314392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ja	.Lschedule_256
315392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	je	.Lschedule_192
316392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	# 128: fall though
317392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
318392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
319392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  .schedule_128
320392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
321392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  128-bit specific part of key schedule.
322392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
323392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  This schedule is really simple, because all its parts
324392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  are accomplished by the subroutines.
325392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
326392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lschedule_128:
327392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	\$10, %esi
328392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
329392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Loop_schedule_128:
330392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	call 	_vpaes_schedule_round
331392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	dec	%rsi
332392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	jz 	.Lschedule_mangle_last
333392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	call	_vpaes_schedule_mangle	# write output
334392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	jmp 	.Loop_schedule_128
335392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
336392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
337392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  .aes_schedule_192
338392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
339392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  192-bit specific part of key schedule.
340392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
341392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  The main body of this schedule is the same as the 128-bit
342392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  schedule, but with more smearing.  The long, high side is
343392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  stored in %xmm7 as before, and the short, low side is in
344392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  the high bits of %xmm6.
345392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
346392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  This schedule is somewhat nastier, however, because each
347392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  round produces 192 bits of key material, or 1.5 round keys.
348392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  Therefore, on each cycle we do 2 rounds and produce 3 round
349392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  keys.
350392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
351392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align	16
352392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lschedule_192:
353392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqu	8(%rdi),%xmm0		# load key part 2 (very unaligned)
354392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	call	_vpaes_schedule_transform	# input transform
355392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm0,	%xmm6		# save short part
356392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm4,	%xmm4		# clear 4
357392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movhlps	%xmm4,	%xmm6		# clobber low side with zeros
358392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	\$4,	%esi
359392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
360392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Loop_schedule_192:
361392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	call	_vpaes_schedule_round
362392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	palignr	\$8,%xmm6,%xmm0
363392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	call	_vpaes_schedule_mangle	# save key n
364392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	call	_vpaes_schedule_192_smear
365392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	call	_vpaes_schedule_mangle	# save key n+1
366392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	call	_vpaes_schedule_round
367392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	dec	%rsi
368392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	jz 	.Lschedule_mangle_last
369392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	call	_vpaes_schedule_mangle	# save key n+2
370392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	call	_vpaes_schedule_192_smear
371392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	jmp	.Loop_schedule_192
372392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
373392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
374392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  .aes_schedule_256
375392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
376392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  256-bit specific part of key schedule.
377392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
378392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  The structure here is very similar to the 128-bit
379392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  schedule, but with an additional "low side" in
380392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  %xmm6.  The low side's rounds are the same as the
381392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  high side's, except no rcon and no rotation.
382392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
383392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align	16
384392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lschedule_256:
385392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqu	16(%rdi),%xmm0		# load key part 2 (unaligned)
386392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	call	_vpaes_schedule_transform	# input transform
387392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	\$7, %esi
388392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
389392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Loop_schedule_256:
390392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	call	_vpaes_schedule_mangle	# output low result
391392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm0,	%xmm6		# save cur_lo in xmm6
392392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
393392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	# high round
394392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	call	_vpaes_schedule_round
395392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	dec	%rsi
396392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	jz 	.Lschedule_mangle_last
397392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	call	_vpaes_schedule_mangle
398392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
399392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	# low round. swap xmm7 and xmm6
400392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufd	\$0xFF,	%xmm0,	%xmm0
401392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm7,	%xmm5
402392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm6,	%xmm7
403392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	call	_vpaes_schedule_low_round
404392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm5,	%xmm7
405392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
406392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	jmp	.Loop_schedule_256
407392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
408392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
409392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
410392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  .aes_schedule_mangle_last
411392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
412392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  Mangler for last round of key schedule
413392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  Mangles %xmm0
414392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##    when encrypting, outputs out(%xmm0) ^ 63
415392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##    when decrypting, outputs unskew(%xmm0)
416392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
417392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  Always called right before return... jumps to cleanup and exits
418392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
419392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align	16
420392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lschedule_mangle_last:
421392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	# schedule last round key from xmm0
422392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lea	.Lk_deskew(%rip),%r11	# prepare to deskew
423392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	test	%rcx, 	%rcx
424392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	jnz	.Lschedule_mangle_last_dec
425392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
426392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	# encrypting
427392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	(%r8,%r10),%xmm1
428392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm1,	%xmm0		# output permute
429392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lea	.Lk_opt(%rip),	%r11	# prepare to output transform
430392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	add	\$32,	%rdx
431392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
432392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lschedule_mangle_last_dec:
433392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	add	\$-16,	%rdx
434392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	.Lk_s63(%rip),	%xmm0
435392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	call	_vpaes_schedule_transform # output transform
436392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqu	%xmm0,	(%rdx)		# save last key
437392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
438392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	# cleanup
439392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm0,  %xmm0
440392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm1,  %xmm1
441392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm2,  %xmm2
442392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm3,  %xmm3
443392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm4,  %xmm4
444392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm5,  %xmm5
445392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm6,  %xmm6
446392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm7,  %xmm7
447392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ret
448392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size	_vpaes_schedule_core,.-_vpaes_schedule_core
449392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
450392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
451392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  .aes_schedule_192_smear
452392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
453392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  Smear the short, low side in the 192-bit key schedule.
454392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
455392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  Inputs:
456392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##    %xmm7: high side, b  a  x  y
457392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##    %xmm6:  low side, d  c  0  0
458392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##    %xmm13: 0
459392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
460392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  Outputs:
461392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##    %xmm6: b+c+d  b+c  0  0
462392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##    %xmm0: b+c+d  b+c  b  a
463392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
464392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type	_vpaes_schedule_192_smear,\@abi-omnipotent
465392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align	16
466392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom_vpaes_schedule_192_smear:
467392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufd	\$0x80,	%xmm6,	%xmm0	# d c 0 0 -> c 0 0 0
468392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm0,	%xmm6		# -> c+d c 0 0
469392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufd	\$0xFE,	%xmm7,	%xmm0	# b a _ _ -> b b b a
470392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm0,	%xmm6		# -> b+c+d b+c b a
471392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm6,	%xmm0
472392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm1,	%xmm1
473392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movhlps	%xmm1,	%xmm6		# clobber low side with zeros
474392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ret
475392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size	_vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
476392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
477392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
478392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  .aes_schedule_round
479392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
480392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  Runs one main round of the key schedule on %xmm0, %xmm7
481392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
482392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  Specifically, runs subbytes on the high dword of %xmm0
483392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  then rotates it by one byte and xors into the low dword of
484392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  %xmm7.
485392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
486392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  Adds rcon from low byte of %xmm8, then rotates %xmm8 for
487392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  next rcon.
488392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
489392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  Smears the dwords of %xmm7 by xoring the low into the
490392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  second low, result into third, result into highest.
491392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
492392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  Returns results in %xmm7 = %xmm0.
493392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  Clobbers %xmm1-%xmm4, %r11.
494392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
495392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type	_vpaes_schedule_round,\@abi-omnipotent
496392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align	16
497392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom_vpaes_schedule_round:
498392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	# extract rcon from xmm8
499392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm1,	%xmm1
500392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	palignr	\$15,	%xmm8,	%xmm1
501392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	palignr	\$15,	%xmm8,	%xmm8
502392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm1,	%xmm7
503392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
504392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	# rotate
505392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufd	\$0xFF,	%xmm0,	%xmm0
506392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	palignr	\$1,	%xmm0,	%xmm0
507392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
508392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	# fall through...
509392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
510392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	# low round: same as high round, but no rotation and no rcon.
511392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom_vpaes_schedule_low_round:
512392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	# smear xmm7
513392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm7,	%xmm1
514392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pslldq	\$4,	%xmm7
515392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm1,	%xmm7
516392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm7,	%xmm1
517392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pslldq	\$8,	%xmm7
518392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm1,	%xmm7
519392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	.Lk_s63(%rip), %xmm7
520392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
521392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	# subbytes
522392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa  %xmm9, 	%xmm1
523392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pandn	%xmm0, 	%xmm1
524392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	psrld	\$4,    %xmm1		# 1 = i
525392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pand	%xmm9, 	%xmm0		# 0 = k
526392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm11, %xmm2		# 2 : a/k
527392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb  %xmm0,  %xmm2		# 2 = a/k
528392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm1,	%xmm0		# 0 = j
529392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm10,	%xmm3		# 3 : 1/i
530392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb  %xmm1, 	%xmm3		# 3 = 1/i
531392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm2, 	%xmm3		# 3 = iak = 1/i + a/k
532392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm10,	%xmm4		# 4 : 1/j
533392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm0, 	%xmm4		# 4 = 1/j
534392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm2, 	%xmm4		# 4 = jak = 1/j + a/k
535392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm10,	%xmm2		# 2 : 1/iak
536392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb  %xmm3,	%xmm2		# 2 = 1/iak
537392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm0, 	%xmm2		# 2 = io
538392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm10, %xmm3		# 3 : 1/jak
539392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb  %xmm4,  %xmm3		# 3 = 1/jak
540392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm1,  %xmm3		# 3 = jo
541392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm13, %xmm4		# 4 : sbou
542392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb  %xmm2,  %xmm4		# 4 = sbou
543392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm12, %xmm0		# 0 : sbot
544392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb  %xmm3,	%xmm0		# 0 = sb1t
545392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm4, 	%xmm0		# 0 = sbox output
546392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
547392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	# add in smeared stuff
548392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm7,	%xmm0
549392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm0,	%xmm7
550392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ret
551392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size	_vpaes_schedule_round,.-_vpaes_schedule_round
552392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
553392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
554392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  .aes_schedule_transform
555392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
556392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  Linear-transform %xmm0 according to tables at (%r11)
557392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
558392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  Requires that %xmm9 = 0x0F0F... as in preheat
559392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  Output in %xmm0
560392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  Clobbers %xmm1, %xmm2
561392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
562392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type	_vpaes_schedule_transform,\@abi-omnipotent
563392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align	16
564392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom_vpaes_schedule_transform:
565392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm9,	%xmm1
566392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pandn	%xmm0,	%xmm1
567392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	psrld	\$4,	%xmm1
568392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pand	%xmm9,	%xmm0
569392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	(%r11), %xmm2 	# lo
570392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm0,	%xmm2
571392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	16(%r11), %xmm0 # hi
572392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm1,	%xmm0
573392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm2,	%xmm0
574392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ret
575392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size	_vpaes_schedule_transform,.-_vpaes_schedule_transform
576392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
577392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
578392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  .aes_schedule_mangle
579392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
580392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  Mangle xmm0 from (basis-transformed) standard version
581392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  to our version.
582392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
583392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  On encrypt,
584392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##    xor with 0x63
585392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##    multiply by circulant 0,1,1,1
586392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##    apply shiftrows transform
587392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
588392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  On decrypt,
589392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##    xor with 0x63
590392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##    multiply by "inverse mixcolumns" circulant E,B,D,9
591392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##    deskew
592392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##    apply shiftrows transform
593392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
594392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
595392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  Writes out to (%rdx), and increments or decrements it
596392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  Keeps track of round number mod 4 in %r8
597392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  Preserves xmm0
598392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  Clobbers xmm1-xmm5
599392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
600392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type	_vpaes_schedule_mangle,\@abi-omnipotent
601392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align	16
602392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom_vpaes_schedule_mangle:
603392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm0,	%xmm4	# save xmm0 for later
604392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	.Lk_mc_forward(%rip),%xmm5
605392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	test	%rcx, 	%rcx
606392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	jnz	.Lschedule_mangle_dec
607392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
608392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	# encrypting
609392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	add	\$16,	%rdx
610392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	.Lk_s63(%rip),%xmm4
611392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm5,	%xmm4
612392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm4,	%xmm3
613392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm5,	%xmm4
614392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm4,	%xmm3
615392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm5,	%xmm4
616392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm4,	%xmm3
617392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
618392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	jmp	.Lschedule_mangle_both
619392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align	16
620392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lschedule_mangle_dec:
621392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	# inverse mix columns
622392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lea	.Lk_dksd(%rip),%r11
623392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm9,	%xmm1
624392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pandn	%xmm4,	%xmm1
625392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	psrld	\$4,	%xmm1	# 1 = hi
626392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pand	%xmm9,	%xmm4	# 4 = lo
627392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
628392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	0x00(%r11), %xmm2
629392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm4,	%xmm2
630392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	0x10(%r11), %xmm3
631392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm1,	%xmm3
632392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm2,	%xmm3
633392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm5,	%xmm3
634392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
635392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	0x20(%r11), %xmm2
636392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm4,	%xmm2
637392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm3,	%xmm2
638392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	0x30(%r11), %xmm3
639392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm1,	%xmm3
640392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm2,	%xmm3
641392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm5,	%xmm3
642392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
643392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	0x40(%r11), %xmm2
644392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm4,	%xmm2
645392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm3,	%xmm2
646392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	0x50(%r11), %xmm3
647392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm1,	%xmm3
648392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm2,	%xmm3
649392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm5,	%xmm3
650392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
651392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	0x60(%r11), %xmm2
652392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm4,	%xmm2
653392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm3,	%xmm2
654392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	0x70(%r11), %xmm3
655392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm1,	%xmm3
656392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm2,	%xmm3
657392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
658392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	add	\$-16,	%rdx
659392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
660392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lschedule_mangle_both:
661392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	(%r8,%r10),%xmm1
662392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pshufb	%xmm1,%xmm3
663392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	add	\$-16,	%r8
664392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	and	\$0x30,	%r8
665392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqu	%xmm3,	(%rdx)
666392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ret
667392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size	_vpaes_schedule_mangle,.-_vpaes_schedule_mangle
668392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
669392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#
670392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# Interface to OpenSSL
671392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#
672392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.globl	${PREFIX}_set_encrypt_key
673392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type	${PREFIX}_set_encrypt_key,\@function,3
674392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align	16
675392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom${PREFIX}_set_encrypt_key:
676392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
677392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($win64);
678392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lea	-0xb8(%rsp),%rsp
679392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm6,0x10(%rsp)
680392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm7,0x20(%rsp)
681392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm8,0x30(%rsp)
682392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm9,0x40(%rsp)
683392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm10,0x50(%rsp)
684392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm11,0x60(%rsp)
685392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm12,0x70(%rsp)
686392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm13,0x80(%rsp)
687392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm14,0x90(%rsp)
688392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm15,0xa0(%rsp)
689392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lenc_key_body:
690392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
691392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
692392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	%esi,%eax
693392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shr	\$5,%eax
694392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	add	\$5,%eax
695392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	%eax,240(%rdx)	# AES_KEY->rounds = nbits/32+5;
696392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
697392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	\$0,%ecx
698392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	\$0x30,%r8d
699392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	call	_vpaes_schedule_core
700392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
701392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($win64);
702392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x10(%rsp),%xmm6
703392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x20(%rsp),%xmm7
704392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x30(%rsp),%xmm8
705392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x40(%rsp),%xmm9
706392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x50(%rsp),%xmm10
707392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x60(%rsp),%xmm11
708392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x70(%rsp),%xmm12
709392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x80(%rsp),%xmm13
710392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x90(%rsp),%xmm14
711392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0xa0(%rsp),%xmm15
712392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lea	0xb8(%rsp),%rsp
713392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lenc_key_epilogue:
714392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
715392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
716392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	%eax,%eax
717392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ret
718392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size	${PREFIX}_set_encrypt_key,.-${PREFIX}_set_encrypt_key
719392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
720392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.globl	${PREFIX}_set_decrypt_key
721392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type	${PREFIX}_set_decrypt_key,\@function,3
722392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align	16
723392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom${PREFIX}_set_decrypt_key:
724392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
725392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($win64);
726392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lea	-0xb8(%rsp),%rsp
727392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm6,0x10(%rsp)
728392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm7,0x20(%rsp)
729392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm8,0x30(%rsp)
730392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm9,0x40(%rsp)
731392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm10,0x50(%rsp)
732392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm11,0x60(%rsp)
733392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm12,0x70(%rsp)
734392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm13,0x80(%rsp)
735392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm14,0x90(%rsp)
736392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm15,0xa0(%rsp)
737392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Ldec_key_body:
738392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
739392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
740392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	%esi,%eax
741392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shr	\$5,%eax
742392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	add	\$5,%eax
743392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	%eax,240(%rdx)	# AES_KEY->rounds = nbits/32+5;
744392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shl	\$4,%eax
745392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lea	16(%rdx,%rax),%rdx
746392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
747392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	\$1,%ecx
748392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	%esi,%r8d
749392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	shr	\$1,%r8d
750392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	and	\$32,%r8d
751392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	\$32,%r8d	# nbits==192?0:32
752392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	call	_vpaes_schedule_core
753392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
754392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($win64);
755392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x10(%rsp),%xmm6
756392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x20(%rsp),%xmm7
757392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x30(%rsp),%xmm8
758392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x40(%rsp),%xmm9
759392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x50(%rsp),%xmm10
760392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x60(%rsp),%xmm11
761392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x70(%rsp),%xmm12
762392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x80(%rsp),%xmm13
763392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x90(%rsp),%xmm14
764392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0xa0(%rsp),%xmm15
765392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lea	0xb8(%rsp),%rsp
766392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Ldec_key_epilogue:
767392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
768392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
769392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	%eax,%eax
770392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ret
771392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size	${PREFIX}_set_decrypt_key,.-${PREFIX}_set_decrypt_key
772392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
773392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.globl	${PREFIX}_encrypt
774392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type	${PREFIX}_encrypt,\@function,3
775392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align	16
776392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom${PREFIX}_encrypt:
777392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
778392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($win64);
779392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lea	-0xb8(%rsp),%rsp
780392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm6,0x10(%rsp)
781392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm7,0x20(%rsp)
782392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm8,0x30(%rsp)
783392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm9,0x40(%rsp)
784392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm10,0x50(%rsp)
785392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm11,0x60(%rsp)
786392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm12,0x70(%rsp)
787392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm13,0x80(%rsp)
788392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm14,0x90(%rsp)
789392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm15,0xa0(%rsp)
790392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lenc_body:
791392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
792392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
793392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqu	(%rdi),%xmm0
794392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	call	_vpaes_preheat
795392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	call	_vpaes_encrypt_core
796392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqu	%xmm0,(%rsi)
797392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
798392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($win64);
799392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x10(%rsp),%xmm6
800392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x20(%rsp),%xmm7
801392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x30(%rsp),%xmm8
802392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x40(%rsp),%xmm9
803392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x50(%rsp),%xmm10
804392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x60(%rsp),%xmm11
805392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x70(%rsp),%xmm12
806392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x80(%rsp),%xmm13
807392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x90(%rsp),%xmm14
808392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0xa0(%rsp),%xmm15
809392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lea	0xb8(%rsp),%rsp
810392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lenc_epilogue:
811392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
812392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
813392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ret
814392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size	${PREFIX}_encrypt,.-${PREFIX}_encrypt
815392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
816392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.globl	${PREFIX}_decrypt
817392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type	${PREFIX}_decrypt,\@function,3
818392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align	16
819392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom${PREFIX}_decrypt:
820392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
821392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($win64);
822392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lea	-0xb8(%rsp),%rsp
823392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm6,0x10(%rsp)
824392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm7,0x20(%rsp)
825392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm8,0x30(%rsp)
826392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm9,0x40(%rsp)
827392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm10,0x50(%rsp)
828392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm11,0x60(%rsp)
829392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm12,0x70(%rsp)
830392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm13,0x80(%rsp)
831392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm14,0x90(%rsp)
832392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm15,0xa0(%rsp)
833392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Ldec_body:
834392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
835392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
836392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqu	(%rdi),%xmm0
837392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	call	_vpaes_preheat
838392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	call	_vpaes_decrypt_core
839392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqu	%xmm0,(%rsi)
840392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
841392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($win64);
842392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x10(%rsp),%xmm6
843392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x20(%rsp),%xmm7
844392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x30(%rsp),%xmm8
845392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x40(%rsp),%xmm9
846392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x50(%rsp),%xmm10
847392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x60(%rsp),%xmm11
848392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x70(%rsp),%xmm12
849392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x80(%rsp),%xmm13
850392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x90(%rsp),%xmm14
851392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0xa0(%rsp),%xmm15
852392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lea	0xb8(%rsp),%rsp
853392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Ldec_epilogue:
854392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
855392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
856392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ret
857392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size	${PREFIX}_decrypt,.-${PREFIX}_decrypt
858392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
859392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom{
860392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrommy ($inp,$out,$len,$key,$ivp,$enc)=("%rdi","%rsi","%rdx","%rcx","%r8","%r9");
861392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# void AES_cbc_encrypt (const void char *inp, unsigned char *out,
862392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#                       size_t length, const AES_KEY *key,
863392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#                       unsigned char *ivp,const int enc);
864392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
865392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.globl	${PREFIX}_cbc_encrypt
866392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type	${PREFIX}_cbc_encrypt,\@function,6
867392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align	16
868392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom${PREFIX}_cbc_encrypt:
869392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xchg	$key,$len
870392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
871392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom($len,$key)=($key,$len);
872392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
873a1a5710c055e139ea00e785f9eb55b3af3e4dab1Brian Carlstrom	sub	\$16,$len
874a1a5710c055e139ea00e785f9eb55b3af3e4dab1Brian Carlstrom	jc	.Lcbc_abort
875392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
876392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($win64);
877392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lea	-0xb8(%rsp),%rsp
878392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm6,0x10(%rsp)
879392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm7,0x20(%rsp)
880392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm8,0x30(%rsp)
881392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm9,0x40(%rsp)
882392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm10,0x50(%rsp)
883392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm11,0x60(%rsp)
884392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm12,0x70(%rsp)
885392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm13,0x80(%rsp)
886392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm14,0x90(%rsp)
887392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	%xmm15,0xa0(%rsp)
888392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lcbc_body:
889392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
890392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
891392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqu	($ivp),%xmm6		# load IV
892392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	sub	$inp,$out
893392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	call	_vpaes_preheat
894392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	cmp	\$0,${enc}d
895392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	je	.Lcbc_dec_loop
896392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	jmp	.Lcbc_enc_loop
897392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align	16
898392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lcbc_enc_loop:
899392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqu	($inp),%xmm0
900392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm6,%xmm0
901392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	call	_vpaes_encrypt_core
902392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm0,%xmm6
903392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqu	%xmm0,($out,$inp)
904392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lea	16($inp),$inp
905392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	sub	\$16,$len
906392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	jnc	.Lcbc_enc_loop
907392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	jmp	.Lcbc_done
908392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align	16
909392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lcbc_dec_loop:
910392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqu	($inp),%xmm0
911392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm0,%xmm7
912392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	call	_vpaes_decrypt_core
913392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pxor	%xmm6,%xmm0
914392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	%xmm7,%xmm6
915392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqu	%xmm0,($out,$inp)
916392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lea	16($inp),$inp
917392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	sub	\$16,$len
918392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	jnc	.Lcbc_dec_loop
919392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lcbc_done:
920392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqu	%xmm6,($ivp)		# save IV
921392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
922392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___ if ($win64);
923392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x10(%rsp),%xmm6
924392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x20(%rsp),%xmm7
925392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x30(%rsp),%xmm8
926392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x40(%rsp),%xmm9
927392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x50(%rsp),%xmm10
928392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x60(%rsp),%xmm11
929392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x70(%rsp),%xmm12
930392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x80(%rsp),%xmm13
931392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0x90(%rsp),%xmm14
932392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movaps	0xa0(%rsp),%xmm15
933392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lea	0xb8(%rsp),%rsp
934392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lcbc_epilogue:
935392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
936392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
937a1a5710c055e139ea00e785f9eb55b3af3e4dab1Brian Carlstrom.Lcbc_abort:
938392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ret
939392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size	${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt
940392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
941392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom}
942392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
943392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
944392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  _aes_preheat
945392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
946392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  Fills register %r10 -> .aes_consts (so you can -fPIC)
947392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  and %xmm9-%xmm15 as specified below.
948392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
949392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type	_vpaes_preheat,\@abi-omnipotent
950392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align	16
951392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom_vpaes_preheat:
952392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lea	.Lk_s0F(%rip), %r10
953392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	-0x20(%r10), %xmm10	# .Lk_inv
954392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	-0x10(%r10), %xmm11	# .Lk_inv+16
955392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	0x00(%r10), %xmm9	# .Lk_s0F
956392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	0x30(%r10), %xmm13	# .Lk_sb1
957392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	0x40(%r10), %xmm12	# .Lk_sb1+16
958392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	0x50(%r10), %xmm15	# .Lk_sb2
959392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	movdqa	0x60(%r10), %xmm14	# .Lk_sb2+16
960392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ret
961392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size	_vpaes_preheat,.-_vpaes_preheat
962392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom########################################################
963392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##                                                    ##
964392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##                     Constants                      ##
965392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##                                                    ##
966392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom########################################################
967392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type	_vpaes_consts,\@object
968392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align	64
969392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom_vpaes_consts:
970392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_inv:	# inv, inva
971392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0x0E05060F0D080180, 0x040703090A0B0C02
972392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0x01040A060F0B0780, 0x030D0E0C02050809
973392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
974392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_s0F:	# s0F
975392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F
976392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
977392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_ipt:	# input transform (lo, hi)
978392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0xC2B2E8985A2A7000, 0xCABAE09052227808
979392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
980392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
981392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_sb1:	# sb1u, sb1t
982392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
983392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
984392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_sb2:	# sb2u, sb2t
985392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0xE27A93C60B712400, 0x5EB7E955BC982FCD
986392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0x69EB88400AE12900, 0xC2A163C8AB82234A
987392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_sbo:	# sbou, sbot
988392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0xD0D26D176FBDC700, 0x15AABF7AC502A878
989392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA
990392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
991392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_mc_forward:	# mc_forward
992392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0x0407060500030201, 0x0C0F0E0D080B0A09
993392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0x080B0A0904070605, 0x000302010C0F0E0D
994392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0x0C0F0E0D080B0A09, 0x0407060500030201
995392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0x000302010C0F0E0D, 0x080B0A0904070605
996392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
997392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_mc_backward:# mc_backward
998392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0x0605040702010003, 0x0E0D0C0F0A09080B
999392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0x020100030E0D0C0F, 0x0A09080B06050407
1000392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0x0E0D0C0F0A09080B, 0x0605040702010003
1001392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0x0A09080B06050407, 0x020100030E0D0C0F
1002392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
1003392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_sr:		# sr
1004392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0x0706050403020100, 0x0F0E0D0C0B0A0908
1005392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0x030E09040F0A0500, 0x0B06010C07020D08
1006392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0x0F060D040B020900, 0x070E050C030A0108
1007392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0x0B0E0104070A0D00, 0x0306090C0F020508
1008392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
1009392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_rcon:	# rcon
1010392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81
1011392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
1012392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_s63:	# s63: all equal to 0x63 transformed
1013392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B
1014392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
1015392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_opt:	# output transform
1016392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0xFF9F4929D6B66000, 0xF7974121DEBE6808
1017392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0
1018392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
1019392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_deskew:	# deskew tables: inverts the sbox's "skew"
1020392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
1021392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
1022392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
1023392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
1024392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  Decryption stuff
1025392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  Key schedule constants
1026392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
1027392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_dksd:	# decryption key schedule: invskew x*D
1028392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9
1029392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0x41C277F4B5368300, 0x5FDC69EAAB289D1E
1030392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_dksb:	# decryption key schedule: invskew x*B
1031392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0x9A4FCA1F8550D500, 0x03D653861CC94C99
1032392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0x115BEDA7B6FC4A00, 0xD993256F7E3482C8
1033392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_dkse:	# decryption key schedule: invskew x*E + 0x63
1034392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0xD5031CCA1FC9D600, 0x53859A4C994F5086
1035392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0xA23196054FDC7BE8, 0xCD5EF96A20B31487
1036392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_dks9:	# decryption key schedule: invskew x*9
1037392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0xB6116FC87ED9A700, 0x4AED933482255BFC
1038392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0x4576516227143300, 0x8BB89FACE9DAFDCE
1039392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
1040392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
1041392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  Decryption stuff
1042392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##  Round function constants
1043392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom##
1044392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_dipt:	# decryption input transform
1045392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0x0F505B040B545F00, 0x154A411E114E451A
1046392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0x86E383E660056500, 0x12771772F491F194
1047392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
1048392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_dsb9:	# decryption sbox output *9*u, *9*t
1049392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0x851C03539A86D600, 0xCAD51F504F994CC9
1050392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0xC03B1789ECD74900, 0x725E2C9EB2FBA565
1051392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_dsbd:	# decryption sbox output *D*u, *D*t
1052392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0x7D57CCDFE6B1A200, 0xF56E9B13882A4439
1053392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3
1054392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_dsbb:	# decryption sbox output *B*u, *B*t
1055392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0xD022649296B44200, 0x602646F6B0F2D404
1056392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0xC19498A6CD596700, 0xF3FF0C3E3255AA6B
1057392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_dsbe:	# decryption sbox output *E*u, *E*t
1058392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0x46F2929626D4D000, 0x2242600464B4F6B0
1059392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0x0C55A6CDFFAAC100, 0x9467F36B98593E32
1060392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lk_dsbo:	# decryption sbox final output
1061392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
1062392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.quad	0x12D7560F93441D00, 0xCA4B8159D8C58E9C
1063392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.asciz	"Vector Permutaion AES for x86_64/SSSE3, Mike Hamburg (Stanford University)"
1064392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align	64
1065392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size	_vpaes_consts,.-_vpaes_consts
1066392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
1067392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
1068392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromif ($win64) {
1069392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
1070392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom#		CONTEXT *context,DISPATCHER_CONTEXT *disp)
1071392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$rec="%rcx";
1072392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$frame="%rdx";
1073392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$context="%r8";
1074392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$disp="%r9";
1075392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
1076392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code.=<<___;
1077392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.extern	__imp_RtlVirtualUnwind
1078392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.type	se_handler,\@abi-omnipotent
1079392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align	16
1080392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromse_handler:
1081392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	push	%rsi
1082392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	push	%rdi
1083392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	push	%rbx
1084392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	push	%rbp
1085392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	push	%r12
1086392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	push	%r13
1087392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	push	%r14
1088392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	push	%r15
1089392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pushfq
1090392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	sub	\$64,%rsp
1091392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
1092392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	120($context),%rax	# pull context->Rax
1093392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	248($context),%rbx	# pull context->Rip
1094392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
1095392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	8($disp),%rsi		# disp->ImageBase
1096392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	56($disp),%r11		# disp->HandlerData
1097392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
1098392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	0(%r11),%r10d		# HandlerData[0]
1099392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lea	(%rsi,%r10),%r10	# prologue label
1100392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	cmp	%r10,%rbx		# context->Rip<prologue label
1101392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	jb	.Lin_prologue
1102392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
1103392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	152($context),%rax	# pull context->Rsp
1104392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
1105392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	4(%r11),%r10d		# HandlerData[1]
1106392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lea	(%rsi,%r10),%r10	# epilogue label
1107392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	cmp	%r10,%rbx		# context->Rip>=epilogue label
1108392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	jae	.Lin_prologue
1109392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
1110392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lea	16(%rax),%rsi		# %xmm save area
1111392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lea	512($context),%rdi	# &context.Xmm6
1112392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	\$20,%ecx		# 10*sizeof(%xmm0)/sizeof(%rax)
1113392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.long	0xa548f3fc		# cld; rep movsq
1114392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lea	0xb8(%rax),%rax		# adjust stack pointer
1115392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
1116392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.Lin_prologue:
1117392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	8(%rax),%rdi
1118392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	16(%rax),%rsi
1119392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	%rax,152($context)	# restore context->Rsp
1120392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	%rsi,168($context)	# restore context->Rsi
1121392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	%rdi,176($context)	# restore context->Rdi
1122392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
1123392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	40($disp),%rdi		# disp->ContextRecord
1124392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	$context,%rsi		# context
1125392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	\$`1232/8`,%ecx		# sizeof(CONTEXT)
1126392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.long	0xa548f3fc		# cld; rep movsq
1127392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
1128392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	$disp,%rsi
1129392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	xor	%rcx,%rcx		# arg1, UNW_FLAG_NHANDLER
1130392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	8(%rsi),%rdx		# arg2, disp->ImageBase
1131392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	0(%rsi),%r8		# arg3, disp->ControlPc
1132392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	16(%rsi),%r9		# arg4, disp->FunctionEntry
1133392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	40(%rsi),%r10		# disp->ContextRecord
1134392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lea	56(%rsi),%r11		# &disp->HandlerData
1135392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	lea	24(%rsi),%r12		# &disp->EstablisherFrame
1136392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	%r10,32(%rsp)		# arg5
1137392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	%r11,40(%rsp)		# arg6
1138392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	%r12,48(%rsp)		# arg7
1139392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	%rcx,56(%rsp)		# arg8, (NULL)
1140392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	call	*__imp_RtlVirtualUnwind(%rip)
1141392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
1142392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	mov	\$1,%eax		# ExceptionContinueSearch
1143392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	add	\$64,%rsp
1144392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	popfq
1145392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pop	%r15
1146392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pop	%r14
1147392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pop	%r13
1148392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pop	%r12
1149392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pop	%rbp
1150392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pop	%rbx
1151392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pop	%rdi
1152392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	pop	%rsi
1153392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	ret
1154392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.size	se_handler,.-se_handler
1155392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
1156392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.section	.pdata
1157392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align	4
1158392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.rva	.LSEH_begin_${PREFIX}_set_encrypt_key
1159392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.rva	.LSEH_end_${PREFIX}_set_encrypt_key
1160392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.rva	.LSEH_info_${PREFIX}_set_encrypt_key
1161392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
1162392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.rva	.LSEH_begin_${PREFIX}_set_decrypt_key
1163392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.rva	.LSEH_end_${PREFIX}_set_decrypt_key
1164392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.rva	.LSEH_info_${PREFIX}_set_decrypt_key
1165392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
1166392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.rva	.LSEH_begin_${PREFIX}_encrypt
1167392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.rva	.LSEH_end_${PREFIX}_encrypt
1168392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.rva	.LSEH_info_${PREFIX}_encrypt
1169392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
1170392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.rva	.LSEH_begin_${PREFIX}_decrypt
1171392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.rva	.LSEH_end_${PREFIX}_decrypt
1172392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.rva	.LSEH_info_${PREFIX}_decrypt
1173392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
1174392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.rva	.LSEH_begin_${PREFIX}_cbc_encrypt
1175392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.rva	.LSEH_end_${PREFIX}_cbc_encrypt
1176392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.rva	.LSEH_info_${PREFIX}_cbc_encrypt
1177392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
1178392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.section	.xdata
1179392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.align	8
1180392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.LSEH_info_${PREFIX}_set_encrypt_key:
1181392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.byte	9,0,0,0
1182392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.rva	se_handler
1183392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.rva	.Lenc_key_body,.Lenc_key_epilogue	# HandlerData[]
1184392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.LSEH_info_${PREFIX}_set_decrypt_key:
1185392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.byte	9,0,0,0
1186392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.rva	se_handler
1187392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.rva	.Ldec_key_body,.Ldec_key_epilogue	# HandlerData[]
1188392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.LSEH_info_${PREFIX}_encrypt:
1189392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.byte	9,0,0,0
1190392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.rva	se_handler
1191392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.rva	.Lenc_body,.Lenc_epilogue		# HandlerData[]
1192392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.LSEH_info_${PREFIX}_decrypt:
1193392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.byte	9,0,0,0
1194392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.rva	se_handler
1195392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.rva	.Ldec_body,.Ldec_epilogue		# HandlerData[]
1196392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom.LSEH_info_${PREFIX}_cbc_encrypt:
1197392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.byte	9,0,0,0
1198392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.rva	se_handler
1199392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom	.rva	.Lcbc_body,.Lcbc_epilogue		# HandlerData[]
1200392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom___
1201392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom}
1202392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
1203392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom$code =~ s/\`([^\`]*)\`/eval($1)/gem;
1204392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
1205392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromprint $code;
1206392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstrom
1207392aa7cc7d2b122614c5393c3e357da07fd07af3Brian Carlstromclose STDOUT;
1208