1d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#!/usr/bin/env perl
2d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
3d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# ====================================================================
4d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# project. The module is, however, dual licensed under OpenSSL and
6d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# CRYPTOGAMS licenses depending on where you obtain it. For further
7d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# details see http://www.openssl.org/~appro/cryptogams/.
8d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#
9d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# Specific modes and adaptation for Linux kernel by Ard Biesheuvel
10d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# <ard.biesheuvel@linaro.org>. Permission to use under GPL terms is
11d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# granted.
12d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# ====================================================================
13d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
14d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# Bit-sliced AES for ARM NEON
15d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#
16d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# February 2012.
17d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#
18d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# This implementation is direct adaptation of bsaes-x86_64 module for
19d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# ARM NEON. Except that this module is endian-neutral [in sense that
20d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# it can be compiled for either endianness] by courtesy of vld1.8's
21d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# neutrality. Initial version doesn't implement interface to OpenSSL,
22d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# only low-level primitives and unsupported entry points, just enough
23d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# to collect performance results, which for Cortex-A8 core are:
24d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#
25d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# encrypt	19.5 cycles per byte processed with 128-bit key
26d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# decrypt	22.1 cycles per byte processed with 128-bit key
27d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# key conv.	440  cycles per 128-bit key/0.18 of 8x block
28d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#
29d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# Snapdragon S4 encrypts byte in 17.6 cycles and decrypts in 19.7,
30d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# which is [much] worse than anticipated (for further details see
31d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# http://www.openssl.org/~appro/Snapdragon-S4.html).
32d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#
33d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# Cortex-A15 manages in 14.2/16.1 cycles [when integer-only code
34d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# manages in 20.0 cycles].
35d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#
36d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# When comparing to x86_64 results keep in mind that NEON unit is
37d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# [mostly] single-issue and thus can't [fully] benefit from
38d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# instruction-level parallelism. And when comparing to aes-armv4
39d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# results keep in mind key schedule conversion overhead (see
40d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# bsaes-x86_64.pl for further details)...
41d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#
42d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#						<appro@openssl.org>
43d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
44d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# April-August 2013
45d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#
46d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# Add CBC, CTR and XTS subroutines, adapt for kernel use.
47d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#
48d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#					<ard.biesheuvel@linaro.org>
49d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
50d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootwhile (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
51d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootopen STDOUT,">$output";
52d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
53d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy ($inp,$out,$len,$key)=("r0","r1","r2","r3");
54d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy @XMM=map("q$_",(0..15));
55d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
56d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root{
57d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy ($key,$rounds,$const)=("r4","r5","r6");
58d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
59d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootsub Dlo()   { shift=~m|q([1]?[0-9])|?"d".($1*2):"";     }
60d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootsub Dhi()   { shift=~m|q([1]?[0-9])|?"d".($1*2+1):"";   }
61d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
62d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootsub Sbox {
63d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# input in  lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
64d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# output in lsb > [b0, b1, b4, b6, b3, b7, b2, b5] < msb
65d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy @b=@_[0..7];
66d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy @t=@_[8..11];
67d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy @s=@_[12..15];
68d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	&InBasisChange	(@b);
69d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	&Inv_GF256	(@b[6,5,0,3,7,1,4,2],@t,@s);
70d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	&OutBasisChange	(@b[7,1,4,2,6,5,0,3]);
71d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root}
72d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
73d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootsub InBasisChange {
74d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# input in  lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
75d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# output in lsb > [b6, b5, b0, b3, b7, b1, b4, b2] < msb
76d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy @b=@_[0..7];
77d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
78d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[2], @b[2], @b[1]
79d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[5], @b[5], @b[6]
80d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[3], @b[3], @b[0]
81d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[6], @b[6], @b[2]
82d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[5], @b[5], @b[0]
83d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
84d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[6], @b[6], @b[3]
85d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[3], @b[3], @b[7]
86d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[7], @b[7], @b[5]
87d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[3], @b[3], @b[4]
88d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[4], @b[4], @b[5]
89d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
90d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[2], @b[2], @b[7]
91d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[3], @b[3], @b[1]
92d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[1], @b[1], @b[5]
93d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
94d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root}
95d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
96d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootsub OutBasisChange {
97d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# input in  lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
98d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# output in lsb > [b6, b1, b2, b4, b7, b0, b3, b5] < msb
99d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy @b=@_[0..7];
100d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
101d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[0], @b[0], @b[6]
102d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[1], @b[1], @b[4]
103d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[4], @b[4], @b[6]
104d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[2], @b[2], @b[0]
105d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[6], @b[6], @b[1]
106d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
107d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[1], @b[1], @b[5]
108d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[5], @b[5], @b[3]
109d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[3], @b[3], @b[7]
110d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[7], @b[7], @b[5]
111d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[2], @b[2], @b[5]
112d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
113d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[4], @b[4], @b[7]
114d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
115d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root}
116d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
117d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootsub InvSbox {
118d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# input in lsb 	> [b0, b1, b2, b3, b4, b5, b6, b7] < msb
119d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# output in lsb	> [b0, b1, b6, b4, b2, b7, b3, b5] < msb
120d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy @b=@_[0..7];
121d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy @t=@_[8..11];
122d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy @s=@_[12..15];
123d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	&InvInBasisChange	(@b);
124d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	&Inv_GF256		(@b[5,1,2,6,3,7,0,4],@t,@s);
125d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	&InvOutBasisChange	(@b[3,7,0,4,5,1,2,6]);
126d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root}
127d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
128d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootsub InvInBasisChange {		# OutBasisChange in reverse (with twist)
129d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy @b=@_[5,1,2,6,3,7,0,4];
130d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___
131d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor	@b[1], @b[1], @b[7]
132d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[4], @b[4], @b[7]
133d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
134d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[7], @b[7], @b[5]
135d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor	@b[1], @b[1], @b[3]
136d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[2], @b[2], @b[5]
137d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[3], @b[3], @b[7]
138d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
139d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[6], @b[6], @b[1]
140d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[2], @b[2], @b[0]
141d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor	@b[5], @b[5], @b[3]
142d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[4], @b[4], @b[6]
143d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[0], @b[0], @b[6]
144d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[1], @b[1], @b[4]
145d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
146d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root}
147d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
148d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootsub InvOutBasisChange {		# InBasisChange in reverse
149d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy @b=@_[2,5,7,3,6,1,0,4];
150d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
151d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[1], @b[1], @b[5]
152d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[2], @b[2], @b[7]
153d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
154d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[3], @b[3], @b[1]
155d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[4], @b[4], @b[5]
156d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[7], @b[7], @b[5]
157d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[3], @b[3], @b[4]
158d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor 	@b[5], @b[5], @b[0]
159d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[3], @b[3], @b[7]
160d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor	@b[6], @b[6], @b[2]
161d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor	@b[2], @b[2], @b[1]
162d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[6], @b[6], @b[3]
163d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
164d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[3], @b[3], @b[0]
165d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@b[5], @b[5], @b[6]
166d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
167d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root}
168d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
169d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootsub Mul_GF4 {
170d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#;*************************************************************
171d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#;* Mul_GF4: Input x0-x1,y0-y1 Output x0-x1 Temp t0 (8) *
172d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#;*************************************************************
173d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy ($x0,$x1,$y0,$y1,$t0,$t1)=@_;
174d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
175d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor 	$t0, $y0, $y1
176d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vand	$t0, $t0, $x0
177d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	$x0, $x0, $x1
178d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vand	$t1, $x1, $y0
179d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vand	$x0, $x0, $y1
180d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	$x1, $t1, $t0
181d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	$x0, $x0, $t1
182d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
183d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root}
184d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
185d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootsub Mul_GF4_N {				# not used, see next subroutine
186d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# multiply and scale by N
187d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy ($x0,$x1,$y0,$y1,$t0)=@_;
188d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
189d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	$t0, $y0, $y1
190d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vand	$t0, $t0, $x0
191d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	$x0, $x0, $x1
192d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vand	$x1, $x1, $y0
193d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vand	$x0, $x0, $y1
194d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	$x1, $x1, $x0
195d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	$x0, $x0, $t0
196d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
197d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root}
198d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
199d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootsub Mul_GF4_N_GF4 {
200d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# interleaved Mul_GF4_N and Mul_GF4
201d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy ($x0,$x1,$y0,$y1,$t0,
202d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root    $x2,$x3,$y2,$y3,$t1)=@_;
203d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
204d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	$t0, $y0, $y1
205d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor 	$t1, $y2, $y3
206d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vand	$t0, $t0, $x0
207d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vand	$t1, $t1, $x2
208d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	$x0, $x0, $x1
209d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor	$x2, $x2, $x3
210d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vand	$x1, $x1, $y0
211d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vand	$x3, $x3, $y2
212d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vand	$x0, $x0, $y1
213d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vand	$x2, $x2, $y3
214d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	$x1, $x1, $x0
215d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor	$x2, $x2, $x3
216d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	$x0, $x0, $t0
217d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor	$x3, $x3, $t1
218d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
219d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root}
220d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootsub Mul_GF16_2 {
221d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy @x=@_[0..7];
222d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy @y=@_[8..11];
223d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy @t=@_[12..15];
224d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
225d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[0], @x[0], @x[2]
226d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[1], @x[1], @x[3]
227d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
228d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	&Mul_GF4  	(@x[0], @x[1], @y[0], @y[1], @t[2..3]);
229d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
230d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[0], @y[0], @y[2]
231d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[1], @y[1], @y[3]
232d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
233d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	Mul_GF4_N_GF4	(@t[0], @t[1], @y[0], @y[1], @t[3],
234d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root			 @x[2], @x[3], @y[2], @y[3], @t[2]);
235d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
236d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@x[0], @x[0], @t[0]
237d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@x[2], @x[2], @t[0]
238d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@x[1], @x[1], @t[1]
239d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@x[3], @x[3], @t[1]
240d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
241d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[0], @x[4], @x[6]
242d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[1], @x[5], @x[7]
243d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
244d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	&Mul_GF4_N_GF4	(@t[0], @t[1], @y[0], @y[1], @t[3],
245d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root			 @x[6], @x[7], @y[2], @y[3], @t[2]);
246d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
247d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[0], @y[0], @y[2]
248d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[1], @y[1], @y[3]
249d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
250d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	&Mul_GF4  	(@x[4], @x[5], @y[0], @y[1], @t[2..3]);
251d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
252d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@x[4], @x[4], @t[0]
253d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@x[6], @x[6], @t[0]
254d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@x[5], @x[5], @t[1]
255d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@x[7], @x[7], @t[1]
256d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
257d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root}
258d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootsub Inv_GF256 {
259d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#;********************************************************************
260d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#;* Inv_GF256: Input x0-x7 Output x0-x7 Temp t0-t3,s0-s3 (144)       *
261d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#;********************************************************************
262d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy @x=@_[0..7];
263d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy @t=@_[8..11];
264d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy @s=@_[12..15];
265d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# direct optimizations from hardware
266d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
267d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[3], @x[4], @x[6]
268d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[2], @x[5], @x[7]
269d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[1], @x[1], @x[3]
270d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@s[1], @x[7], @x[6]
271d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vmov	@t[0], @t[2]
272d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@s[0], @x[0], @x[2]
273d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
274d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vorr	@t[2], @t[2], @t[1]
275d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@s[3], @t[3], @t[0]
276d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vand	@s[2], @t[3], @s[0]
277d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vorr	@t[3], @t[3], @s[0]
278d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@s[0], @s[0], @t[1]
279d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vand	@t[0], @t[0], @t[1]
280d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[1], @x[3], @x[2]
281d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vand	@s[3], @s[3], @s[0]
282d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vand	@s[1], @s[1], @t[1]
283d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[1], @x[4], @x[5]
284d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@s[0], @x[1], @x[0]
285d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[3], @t[3], @s[1]
286d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[2], @t[2], @s[1]
287d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vand	@s[1], @t[1], @s[0]
288d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vorr	@t[1], @t[1], @s[0]
289d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[3], @t[3], @s[3]
290d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[0], @t[0], @s[1]
291d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[2], @t[2], @s[2]
292d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[1], @t[1], @s[3]
293d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[0], @t[0], @s[2]
294d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vand	@s[0], @x[7], @x[3]
295d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[1], @t[1], @s[2]
296d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vand	@s[1], @x[6], @x[2]
297d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vand	@s[2], @x[5], @x[1]
298d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vorr	@s[3], @x[4], @x[0]
299d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[3], @t[3], @s[0]
300d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[1], @t[1], @s[2]
301d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[0], @t[0], @s[3]
302d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[2], @t[2], @s[1]
303d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
304d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	@ Inv_GF16 \t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3
305d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
306d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	@ new smaller inversion
307d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
308d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vand	@s[2], @t[3], @t[1]
309d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov	@s[0], @t[0]
310d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
311d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@s[1], @t[2], @s[2]
312d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@s[3], @t[0], @s[2]
313d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@s[2], @t[0], @s[2]	@ @s[2]=@s[3]
314d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
315d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vbsl	@s[1], @t[1], @t[0]
316d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vbsl	@s[3], @t[3], @t[2]
317d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[3], @t[3], @t[2]
318d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
319d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vbsl	@s[0], @s[1], @s[2]
320d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vbsl	@t[0], @s[2], @s[1]
321d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
322d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vand	@s[2], @s[0], @s[3]
323d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[1], @t[1], @t[0]
324d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
325d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@s[2], @s[2], @t[3]
326d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
327d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# output in s3, s2, s1, t1
328d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
329d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \t2, \t3, \t0, \t1, \s0, \s1, \s2, \s3
330d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
331d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3
332d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	&Mul_GF16_2(@x,@s[3,2,1],@t[1],@s[0],@t[0,2,3]);
333d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
334d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root### output msb > [x3,x2,x1,x0,x7,x6,x5,x4] < lsb
335d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root}
336d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
337d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# AES linear components
338d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
339d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootsub ShiftRows {
340d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy @x=@_[0..7];
341d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy @t=@_[8..11];
342d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy $mask=pop;
343d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
344d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia	$key!, {@t[0]-@t[3]}
345d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[0], @t[0], @x[0]
346d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[1], @t[1], @x[1]
347d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vtbl.8	`&Dlo(@x[0])`, {@t[0]}, `&Dlo($mask)`
348d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vtbl.8	`&Dhi(@x[0])`, {@t[0]}, `&Dhi($mask)`
349d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia	$key!, {@t[0]}
350d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[2], @t[2], @x[2]
351d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vtbl.8	`&Dlo(@x[1])`, {@t[1]}, `&Dlo($mask)`
352d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vtbl.8	`&Dhi(@x[1])`, {@t[1]}, `&Dhi($mask)`
353d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia	$key!, {@t[1]}
354d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[3], @t[3], @x[3]
355d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vtbl.8	`&Dlo(@x[2])`, {@t[2]}, `&Dlo($mask)`
356d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vtbl.8	`&Dhi(@x[2])`, {@t[2]}, `&Dhi($mask)`
357d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia	$key!, {@t[2]}
358d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vtbl.8	`&Dlo(@x[3])`, {@t[3]}, `&Dlo($mask)`
359d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vtbl.8	`&Dhi(@x[3])`, {@t[3]}, `&Dhi($mask)`
360d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia	$key!, {@t[3]}
361d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[0], @t[0], @x[4]
362d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[1], @t[1], @x[5]
363d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vtbl.8	`&Dlo(@x[4])`, {@t[0]}, `&Dlo($mask)`
364d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vtbl.8	`&Dhi(@x[4])`, {@t[0]}, `&Dhi($mask)`
365d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[2], @t[2], @x[6]
366d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vtbl.8	`&Dlo(@x[5])`, {@t[1]}, `&Dlo($mask)`
367d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vtbl.8	`&Dhi(@x[5])`, {@t[1]}, `&Dhi($mask)`
368d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[3], @t[3], @x[7]
369d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vtbl.8	`&Dlo(@x[6])`, {@t[2]}, `&Dlo($mask)`
370d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vtbl.8	`&Dhi(@x[6])`, {@t[2]}, `&Dhi($mask)`
371d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vtbl.8	`&Dlo(@x[7])`, {@t[3]}, `&Dlo($mask)`
372d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vtbl.8	`&Dhi(@x[7])`, {@t[3]}, `&Dhi($mask)`
373d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
374d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root}
375d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
376d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootsub MixColumns {
377d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# modified to emit output in order suitable for feeding back to aesenc[last]
378d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy @x=@_[0..7];
379d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy @t=@_[8..15];
380d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy $inv=@_[16];	# optional
381d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
382d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[0], @x[0], @x[0], #12	@ x0 <<< 32
383d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[1], @x[1], @x[1], #12
384d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor	@x[0], @x[0], @t[0]		@ x0 ^ (x0 <<< 32)
385d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[2], @x[2], @x[2], #12
386d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor	@x[1], @x[1], @t[1]
387d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[3], @x[3], @x[3], #12
388d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor	@x[2], @x[2], @t[2]
389d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[4], @x[4], @x[4], #12
390d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor	@x[3], @x[3], @t[3]
391d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[5], @x[5], @x[5], #12
392d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor	@x[4], @x[4], @t[4]
393d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[6], @x[6], @x[6], #12
394d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor	@x[5], @x[5], @t[5]
395d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[7], @x[7], @x[7], #12
396d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor	@x[6], @x[6], @t[6]
397d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
398d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[1], @t[1], @x[0]
399d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor	@x[7], @x[7], @t[7]
400d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vext.8	@x[0], @x[0], @x[0], #8		@ (x0 ^ (x0 <<< 32)) <<< 64)
401d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[2], @t[2], @x[1]
402d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[0], @t[0], @x[7]
403d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[1], @t[1], @x[7]
404d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vext.8	@x[1], @x[1], @x[1], #8
405d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[5], @t[5], @x[4]
406d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor	@x[0], @x[0], @t[0]
407d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[6], @t[6], @x[5]
408d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor	@x[1], @x[1], @t[1]
409d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vext.8	@t[0], @x[4], @x[4], #8
410d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[4], @t[4], @x[3]
411d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vext.8	@t[1], @x[5], @x[5], #8
412d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[7], @t[7], @x[6]
413d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vext.8	@x[4], @x[3], @x[3], #8
414d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[3], @t[3], @x[2]
415d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vext.8	@x[5], @x[7], @x[7], #8
416d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[4], @t[4], @x[7]
417d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vext.8	@x[3], @x[6], @x[6], #8
418d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[3], @t[3], @x[7]
419d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vext.8	@x[6], @x[2], @x[2], #8
420d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@x[7], @t[1], @t[5]
421d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
422d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___ if (!$inv);
423d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@x[2], @t[0], @t[4]
424d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@x[4], @x[4], @t[3]
425d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@x[5], @x[5], @t[7]
426d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@x[3], @x[3], @t[6]
427d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 @ vmov	@x[2], @t[0]
428d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@x[6], @x[6], @t[2]
429d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 @ vmov	@x[7], @t[1]
430d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
431d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___ if ($inv);
432d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[3], @t[3], @x[4]
433d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@x[5], @x[5], @t[7]
434d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@x[2], @x[3], @t[6]
435d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@x[3], @t[0], @t[4]
436d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@x[4], @x[6], @t[2]
437d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov	@x[6], @t[3]
438d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 @ vmov	@x[7], @t[1]
439d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
440d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root}
441d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
442d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootsub InvMixColumns_orig {
443d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy @x=@_[0..7];
444d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy @t=@_[8..15];
445d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
446d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
447d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	@ multiplication by 0x0e
448d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[7], @x[7], @x[7], #12
449d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov	@t[2], @x[2]
450d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@x[2], @x[2], @x[5]		@ 2 5
451d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@x[7], @x[7], @x[5]		@ 7 5
452d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[0], @x[0], @x[0], #12
453d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov	@t[5], @x[5]
454d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@x[5], @x[5], @x[0]		@ 5 0		[1]
455d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@x[0], @x[0], @x[1]		@ 0 1
456d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[1], @x[1], @x[1], #12
457d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@x[1], @x[1], @x[2]		@ 1 25
458d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@x[0], @x[0], @x[6]		@ 01 6		[2]
459d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[3], @x[3], @x[3], #12
460d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@x[1], @x[1], @x[3]		@ 125 3		[4]
461d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@x[2], @x[2], @x[0]		@ 25 016	[3]
462d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@x[3], @x[3], @x[7]		@ 3 75
463d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@x[7], @x[7], @x[6]		@ 75 6		[0]
464d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[6], @x[6], @x[6], #12
465d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov	@t[4], @x[4]
466d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@x[6], @x[6], @x[4]		@ 6 4
467d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@x[4], @x[4], @x[3]		@ 4 375		[6]
468d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@x[3], @x[3], @x[7]		@ 375 756=36
469d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@x[6], @x[6], @t[5]		@ 64 5		[7]
470d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@x[3], @x[3], @t[2]		@ 36 2
471d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[5], @t[5], @t[5], #12
472d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@x[3], @x[3], @t[4]		@ 362 4		[5]
473d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
474d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root					my @y = @x[7,5,0,2,1,3,4,6];
475d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
476d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	@ multiplication by 0x0b
477d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[1], @y[1], @y[0]
478d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[0], @y[0], @t[0]
479d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[2], @t[2], @t[2], #12
480d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[1], @y[1], @t[1]
481d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[0], @y[0], @t[5]
482d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[4], @t[4], @t[4], #12
483d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[1], @y[1], @t[6]
484d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[0], @y[0], @t[7]
485d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[7], @t[7], @t[6]		@ clobber t[7]
486d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
487d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[3], @y[3], @t[0]
488d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor	@y[1], @y[1], @y[0]
489d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[0], @t[0], @t[0], #12
490d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[2], @y[2], @t[1]
491d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[4], @y[4], @t[1]
492d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[1], @t[1], @t[1], #12
493d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[2], @y[2], @t[2]
494d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[3], @y[3], @t[2]
495d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[5], @y[5], @t[2]
496d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[2], @y[2], @t[7]
497d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[2], @t[2], @t[2], #12
498d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[3], @y[3], @t[3]
499d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[6], @y[6], @t[3]
500d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[4], @y[4], @t[3]
501d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[7], @y[7], @t[4]
502d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[3], @t[3], @t[3], #12
503d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[5], @y[5], @t[4]
504d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[7], @y[7], @t[7]
505d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[7], @t[7], @t[5]		@ clobber t[7] even more
506d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[3], @y[3], @t[5]
507d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[4], @y[4], @t[4]
508d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
509d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[5], @y[5], @t[7]
510d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[4], @t[4], @t[4], #12
511d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[6], @y[6], @t[7]
512d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[4], @y[4], @t[7]
513d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
514d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[7], @t[7], @t[5]
515d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[5], @t[5], @t[5], #12
516d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
517d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	@ multiplication by 0x0d
518d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[4], @y[4], @y[7]
519d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor	@t[7], @t[7], @t[6]		@ restore t[7]
520d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[7], @y[7], @t[4]
521d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[6], @t[6], @t[6], #12
522d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[2], @y[2], @t[0]
523d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[7], @y[7], @t[5]
524d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[7], @t[7], @t[7], #12
525d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[2], @y[2], @t[2]
526d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
527d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[3], @y[3], @y[1]
528d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[1], @y[1], @t[1]
529d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[0], @y[0], @t[0]
530d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[3], @y[3], @t[0]
531d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[1], @y[1], @t[5]
532d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[0], @y[0], @t[5]
533d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[0], @t[0], @t[0], #12
534d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[1], @y[1], @t[7]
535d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[0], @y[0], @t[6]
536d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[3], @y[3], @y[1]
537d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[4], @y[4], @t[1]
538d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[1], @t[1], @t[1], #12
539d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
540d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[7], @y[7], @t[7]
541d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[4], @y[4], @t[2]
542d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[5], @y[5], @t[2]
543d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[2], @y[2], @t[6]
544d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[6], @t[6], @t[3]		@ clobber t[6]
545d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[2], @t[2], @t[2], #12
546d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[4], @y[4], @y[7]
547d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[3], @y[3], @t[6]
548d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
549d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[6], @y[6], @t[6]
550d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[5], @y[5], @t[5]
551d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[5], @t[5], @t[5], #12
552d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[6], @y[6], @t[4]
553d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[4], @t[4], @t[4], #12
554d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[5], @y[5], @t[6]
555d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[6], @y[6], @t[7]
556d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[7], @t[7], @t[7], #12
557d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[6], @t[6], @t[3]		@ restore t[6]
558d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[3], @t[3], @t[3], #12
559d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
560d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	@ multiplication by 0x09
561d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[4], @y[4], @y[1]
562d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[1], @t[1], @y[1]		@ t[1]=y[1]
563d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[0], @t[0], @t[5]		@ clobber t[0]
564d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[6], @t[6], @t[6], #12
565d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[1], @t[1], @t[5]
566d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[3], @y[3], @t[0]
567d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[0], @t[0], @y[0]		@ t[0]=y[0]
568d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[1], @t[1], @t[6]
569d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[6], @t[6], @t[7]		@ clobber t[6]
570d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[4], @y[4], @t[1]
571d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[7], @y[7], @t[4]
572d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[6], @y[6], @t[3]
573d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@y[5], @y[5], @t[2]
574d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[4], @t[4], @y[4]		@ t[4]=y[4]
575d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[3], @t[3], @y[3]		@ t[3]=y[3]
576d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[5], @t[5], @y[5]		@ t[5]=y[5]
577d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[2], @t[2], @y[2]		@ t[2]=y[2]
578d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[3], @t[3], @t[7]
579d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[5], @t[5], @t[6]
580d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[6], @t[6], @y[6]		@ t[6]=y[6]
581d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[2], @t[2], @t[6]
582d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[7], @t[7], @y[7]		@ t[7]=y[7]
583d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
584d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov	@XMM[0], @t[0]
585d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov	@XMM[1], @t[1]
586d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	@ vmov	@XMM[2], @t[2]
587d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov	@XMM[3], @t[3]
588d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov	@XMM[4], @t[4]
589d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	@ vmov	@XMM[5], @t[5]
590d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	@ vmov	@XMM[6], @t[6]
591d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	@ vmov	@XMM[7], @t[7]
592d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
593d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root}
594d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
595d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootsub InvMixColumns {
596d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy @x=@_[0..7];
597d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy @t=@_[8..15];
598d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
599d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# Thanks to Jussi Kivilinna for providing pointer to
600d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#
601d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# | 0e 0b 0d 09 |   | 02 03 01 01 |   | 05 00 04 00 |
602d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# | 09 0e 0b 0d | = | 01 02 03 01 | x | 00 05 00 04 |
603d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# | 0d 09 0e 0b |   | 01 01 02 03 |   | 04 00 05 00 |
604d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# | 0b 0d 09 0e |   | 03 01 01 02 |   | 00 04 00 05 |
605d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
606d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
607d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	@ multiplication by 0x05-0x00-0x04-0x00
608d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[0], @x[0], @x[0], #8
609d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[6], @x[6], @x[6], #8
610d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[7], @x[7], @x[7], #8
611d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[0], @t[0], @x[0]
612d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[1], @x[1], @x[1], #8
613d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[6], @t[6], @x[6]
614d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[2], @x[2], @x[2], #8
615d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[7], @t[7], @x[7]
616d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[3], @x[3], @x[3], #8
617d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[1], @t[1], @x[1]
618d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[4], @x[4], @x[4], #8
619d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[2], @t[2], @x[2]
620d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8	@t[5], @x[5], @x[5], #8
621d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[3], @t[3], @x[3]
622d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[4], @t[4], @x[4]
623d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@t[5], @t[5], @x[5]
624d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
625d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor	@x[0], @x[0], @t[6]
626d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor	@x[1], @x[1], @t[6]
627d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor	@x[2], @x[2], @t[0]
628d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor	@x[4], @x[4], @t[2]
629d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor	@x[3], @x[3], @t[1]
630d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor	@x[1], @x[1], @t[7]
631d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor	@x[2], @x[2], @t[7]
632d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor	@x[4], @x[4], @t[6]
633d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor	@x[5], @x[5], @t[3]
634d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor	@x[3], @x[3], @t[6]
635d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor	@x[6], @x[6], @t[4]
636d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor	@x[4], @x[4], @t[7]
637d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor	@x[5], @x[5], @t[7]
638d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor	@x[7], @x[7], @t[5]
639d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
640d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	&MixColumns	(@x,@t,1);	# flipped 2<->3 and 4<->6
641d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root}
642d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
643d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootsub swapmove {
644d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy ($a,$b,$n,$mask,$t)=@_;
645d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
646d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vshr.u64	$t, $b, #$n
647d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		$t, $t, $a
648d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vand		$t, $t, $mask
649d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		$a, $a, $t
650d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vshl.u64	$t, $t, #$n
651d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		$b, $b, $t
652d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
653d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root}
654d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootsub swapmove2x {
655d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy ($a0,$b0,$a1,$b1,$n,$mask,$t0,$t1)=@_;
656d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
657d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vshr.u64	$t0, $b0, #$n
658d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vshr.u64	$t1, $b1, #$n
659d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		$t0, $t0, $a0
660d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor		$t1, $t1, $a1
661d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vand		$t0, $t0, $mask
662d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vand		$t1, $t1, $mask
663d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		$a0, $a0, $t0
664d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vshl.u64	$t0, $t0, #$n
665d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor		$a1, $a1, $t1
666d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vshl.u64	$t1, $t1, #$n
667d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		$b0, $b0, $t0
668d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 veor		$b1, $b1, $t1
669d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
670d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root}
671d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
672d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootsub bitslice {
673d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy @x=reverse(@_[0..7]);
674d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy ($t0,$t1,$t2,$t3)=@_[8..11];
675d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
676d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov.i8	$t0,#0x55			@ compose .LBS0
677d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov.i8	$t1,#0x33			@ compose .LBS1
678d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
679d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	&swapmove2x(@x[0,1,2,3],1,$t0,$t2,$t3);
680d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	&swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3);
681d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
682d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov.i8	$t0,#0x0f			@ compose .LBS2
683d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
684d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	&swapmove2x(@x[0,2,1,3],2,$t1,$t2,$t3);
685d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	&swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3);
686d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
687d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	&swapmove2x(@x[0,4,1,5],4,$t0,$t2,$t3);
688d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	&swapmove2x(@x[2,6,3,7],4,$t0,$t2,$t3);
689d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root}
690d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
691d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
692d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifndef __KERNEL__
693d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# include "arm_arch.h"
694d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
695d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# define VFP_ABI_PUSH	vstmdb	sp!,{d8-d15}
696d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# define VFP_ABI_POP	vldmia	sp!,{d8-d15}
697d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# define VFP_ABI_FRAME	0x40
698d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#else
699d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# define VFP_ABI_PUSH
700d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# define VFP_ABI_POP
701d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# define VFP_ABI_FRAME	0
702d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# define BSAES_ASM_EXTENDED_KEY
703d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# define XTS_CHAIN_TWEAK
704d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# define __ARM_ARCH__ __LINUX_ARM_ARCH__
705d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
706d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
707d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifdef __thumb__
708d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# define adrl adr
709d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
710d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
711d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#if __ARM_ARCH__>=7
712d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.text
713d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.syntax	unified 	@ ARMv7-capable assembler is expected to handle this
714d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifdef __thumb2__
715d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.thumb
716d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#else
717d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.code   32
718d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
719d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
720d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.fpu	neon
721d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
722d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.type	_bsaes_decrypt8,%function
723d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	4
724d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root_bsaes_decrypt8:
725d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	adr	$const,_bsaes_decrypt8
726d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia	$key!, {@XMM[9]}		@ round 0 key
727d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add	$const,$const,#.LM0ISR-_bsaes_decrypt8
728d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
729d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia	$const!, {@XMM[8]}		@ .LM0ISR
730d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[10], @XMM[0], @XMM[9]	@ xor with round0 key
731d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[11], @XMM[1], @XMM[9]
732d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vtbl.8	`&Dlo(@XMM[0])`, {@XMM[10]}, `&Dlo(@XMM[8])`
733d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vtbl.8	`&Dhi(@XMM[0])`, {@XMM[10]}, `&Dhi(@XMM[8])`
734d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[12], @XMM[2], @XMM[9]
735d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vtbl.8	`&Dlo(@XMM[1])`, {@XMM[11]}, `&Dlo(@XMM[8])`
736d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vtbl.8	`&Dhi(@XMM[1])`, {@XMM[11]}, `&Dhi(@XMM[8])`
737d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[13], @XMM[3], @XMM[9]
738d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vtbl.8	`&Dlo(@XMM[2])`, {@XMM[12]}, `&Dlo(@XMM[8])`
739d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vtbl.8	`&Dhi(@XMM[2])`, {@XMM[12]}, `&Dhi(@XMM[8])`
740d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[14], @XMM[4], @XMM[9]
741d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vtbl.8	`&Dlo(@XMM[3])`, {@XMM[13]}, `&Dlo(@XMM[8])`
742d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vtbl.8	`&Dhi(@XMM[3])`, {@XMM[13]}, `&Dhi(@XMM[8])`
743d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[15], @XMM[5], @XMM[9]
744d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vtbl.8	`&Dlo(@XMM[4])`, {@XMM[14]}, `&Dlo(@XMM[8])`
745d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vtbl.8	`&Dhi(@XMM[4])`, {@XMM[14]}, `&Dhi(@XMM[8])`
746d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[10], @XMM[6], @XMM[9]
747d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vtbl.8	`&Dlo(@XMM[5])`, {@XMM[15]}, `&Dlo(@XMM[8])`
748d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vtbl.8	`&Dhi(@XMM[5])`, {@XMM[15]}, `&Dhi(@XMM[8])`
749d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[11], @XMM[7], @XMM[9]
750d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vtbl.8	`&Dlo(@XMM[6])`, {@XMM[10]}, `&Dlo(@XMM[8])`
751d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vtbl.8	`&Dhi(@XMM[6])`, {@XMM[10]}, `&Dhi(@XMM[8])`
752d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vtbl.8	`&Dlo(@XMM[7])`, {@XMM[11]}, `&Dlo(@XMM[8])`
753d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vtbl.8	`&Dhi(@XMM[7])`, {@XMM[11]}, `&Dhi(@XMM[8])`
754d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
755d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	&bitslice	(@XMM[0..7, 8..11]);
756d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
757d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	sub	$rounds,$rounds,#1
758d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	b	.Ldec_sbox
759d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	4
760d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Ldec_loop:
761d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
762d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	&ShiftRows	(@XMM[0..7, 8..12]);
763d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=".Ldec_sbox:\n";
764d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	&InvSbox	(@XMM[0..7, 8..15]);
765d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
766d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	subs	$rounds,$rounds,#1
767d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bcc	.Ldec_done
768d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
769d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	&InvMixColumns	(@XMM[0,1,6,4,2,7,3,5, 8..15]);
770d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
771d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia	$const, {@XMM[12]}		@ .LISR
772d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	ite	eq				@ Thumb2 thing, sanity check in ARM
773d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	addeq	$const,$const,#0x10
774d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bne	.Ldec_loop
775d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia	$const, {@XMM[12]}		@ .LISRM0
776d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	b	.Ldec_loop
777d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	4
778d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Ldec_done:
779d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
780d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	&bitslice	(@XMM[0,1,6,4,2,7,3,5, 8..11]);
781d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
782d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia	$key, {@XMM[8]}			@ last round key
783d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[6], @XMM[6], @XMM[8]
784d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[4], @XMM[4], @XMM[8]
785d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[2], @XMM[2], @XMM[8]
786d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[7], @XMM[7], @XMM[8]
787d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[3], @XMM[3], @XMM[8]
788d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[5], @XMM[5], @XMM[8]
789d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[0], @XMM[0], @XMM[8]
790d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[1], @XMM[1], @XMM[8]
791d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bx	lr
792d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.size	_bsaes_decrypt8,.-_bsaes_decrypt8
793d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
794d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.type	_bsaes_const,%object
795d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	6
796d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root_bsaes_const:
797d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.LM0ISR:	@ InvShiftRows constants
798d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	.quad	0x0a0e0206070b0f03, 0x0004080c0d010509
799d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.LISR:
800d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	.quad	0x0504070602010003, 0x0f0e0d0c080b0a09
801d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.LISRM0:
802d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	.quad	0x01040b0e0205080f, 0x0306090c00070a0d
803d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.LM0SR:		@ ShiftRows constants
804d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	.quad	0x0a0e02060f03070b, 0x0004080c05090d01
805d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.LSR:
806d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	.quad	0x0504070600030201, 0x0f0e0d0c0a09080b
807d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.LSRM0:
808d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	.quad	0x0304090e00050a0f, 0x01060b0c0207080d
809d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.LM0:
810d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	.quad	0x02060a0e03070b0f, 0x0004080c0105090d
811d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.LREVM0SR:
812d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	.quad	0x090d01050c000408, 0x03070b0f060a0e02
813d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.asciz	"Bit-sliced AES for NEON, CRYPTOGAMS by <appro\@openssl.org>"
814d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	6
815d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.size	_bsaes_const,.-_bsaes_const
816d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
817d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.type	_bsaes_encrypt8,%function
818d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	4
819d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root_bsaes_encrypt8:
820d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	adr	$const,_bsaes_encrypt8
821d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia	$key!, {@XMM[9]}		@ round 0 key
822d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	sub	$const,$const,#_bsaes_encrypt8-.LM0SR
823d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
824d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia	$const!, {@XMM[8]}		@ .LM0SR
825d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root_bsaes_encrypt8_alt:
826d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[10], @XMM[0], @XMM[9]	@ xor with round0 key
827d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[11], @XMM[1], @XMM[9]
828d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vtbl.8	`&Dlo(@XMM[0])`, {@XMM[10]}, `&Dlo(@XMM[8])`
829d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vtbl.8	`&Dhi(@XMM[0])`, {@XMM[10]}, `&Dhi(@XMM[8])`
830d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[12], @XMM[2], @XMM[9]
831d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vtbl.8	`&Dlo(@XMM[1])`, {@XMM[11]}, `&Dlo(@XMM[8])`
832d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vtbl.8	`&Dhi(@XMM[1])`, {@XMM[11]}, `&Dhi(@XMM[8])`
833d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[13], @XMM[3], @XMM[9]
834d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vtbl.8	`&Dlo(@XMM[2])`, {@XMM[12]}, `&Dlo(@XMM[8])`
835d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vtbl.8	`&Dhi(@XMM[2])`, {@XMM[12]}, `&Dhi(@XMM[8])`
836d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[14], @XMM[4], @XMM[9]
837d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vtbl.8	`&Dlo(@XMM[3])`, {@XMM[13]}, `&Dlo(@XMM[8])`
838d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vtbl.8	`&Dhi(@XMM[3])`, {@XMM[13]}, `&Dhi(@XMM[8])`
839d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[15], @XMM[5], @XMM[9]
840d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vtbl.8	`&Dlo(@XMM[4])`, {@XMM[14]}, `&Dlo(@XMM[8])`
841d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vtbl.8	`&Dhi(@XMM[4])`, {@XMM[14]}, `&Dhi(@XMM[8])`
842d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[10], @XMM[6], @XMM[9]
843d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vtbl.8	`&Dlo(@XMM[5])`, {@XMM[15]}, `&Dlo(@XMM[8])`
844d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vtbl.8	`&Dhi(@XMM[5])`, {@XMM[15]}, `&Dhi(@XMM[8])`
845d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[11], @XMM[7], @XMM[9]
846d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vtbl.8	`&Dlo(@XMM[6])`, {@XMM[10]}, `&Dlo(@XMM[8])`
847d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vtbl.8	`&Dhi(@XMM[6])`, {@XMM[10]}, `&Dhi(@XMM[8])`
848d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vtbl.8	`&Dlo(@XMM[7])`, {@XMM[11]}, `&Dlo(@XMM[8])`
849d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	 vtbl.8	`&Dhi(@XMM[7])`, {@XMM[11]}, `&Dhi(@XMM[8])`
850d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root_bsaes_encrypt8_bitslice:
851d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
852d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	&bitslice	(@XMM[0..7, 8..11]);
853d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
854d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	sub	$rounds,$rounds,#1
855d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	b	.Lenc_sbox
856d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	4
857d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lenc_loop:
858d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
859d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	&ShiftRows	(@XMM[0..7, 8..12]);
860d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=".Lenc_sbox:\n";
861d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	&Sbox		(@XMM[0..7, 8..15]);
862d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
863d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	subs	$rounds,$rounds,#1
864d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bcc	.Lenc_done
865d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
866d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	&MixColumns	(@XMM[0,1,4,6,3,7,2,5, 8..15]);
867d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
868d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia	$const, {@XMM[12]}		@ .LSR
869d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	ite	eq				@ Thumb2 thing, samity check in ARM
870d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	addeq	$const,$const,#0x10
871d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bne	.Lenc_loop
872d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia	$const, {@XMM[12]}		@ .LSRM0
873d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	b	.Lenc_loop
874d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	4
875d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lenc_done:
876d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
877d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	# output in lsb > [t0, t1, t4, t6, t3, t7, t2, t5] < msb
878d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	&bitslice	(@XMM[0,1,4,6,3,7,2,5, 8..11]);
879d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
880d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia	$key, {@XMM[8]}			@ last round key
881d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[4], @XMM[4], @XMM[8]
882d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[6], @XMM[6], @XMM[8]
883d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[3], @XMM[3], @XMM[8]
884d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[7], @XMM[7], @XMM[8]
885d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[2], @XMM[2], @XMM[8]
886d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[5], @XMM[5], @XMM[8]
887d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[0], @XMM[0], @XMM[8]
888d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[1], @XMM[1], @XMM[8]
889d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bx	lr
890d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.size	_bsaes_encrypt8,.-_bsaes_encrypt8
891d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
892d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root}
893d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root{
894d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy ($out,$inp,$rounds,$const)=("r12","r4","r5","r6");
895d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
896d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootsub bitslice_key {
897d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy @x=reverse(@_[0..7]);
898d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy ($bs0,$bs1,$bs2,$t2,$t3)=@_[8..12];
899d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
900d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	&swapmove	(@x[0,1],1,$bs0,$t2,$t3);
901d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
902d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	@ &swapmove(@x[2,3],1,$t0,$t2,$t3);
903d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov	@x[2], @x[0]
904d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov	@x[3], @x[1]
905d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
906d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	#&swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3);
907d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
908d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	&swapmove2x	(@x[0,2,1,3],2,$bs1,$t2,$t3);
909d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
910d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	@ &swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3);
911d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov	@x[4], @x[0]
912d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov	@x[6], @x[2]
913d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov	@x[5], @x[1]
914d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov	@x[7], @x[3]
915d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
916d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	&swapmove2x	(@x[0,4,1,5],4,$bs2,$t2,$t3);
917d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	&swapmove2x	(@x[2,6,3,7],4,$bs2,$t2,$t3);
918d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root}
919d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
920d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
921d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.type	_bsaes_key_convert,%function
922d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	4
923d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root_bsaes_key_convert:
924d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	adr	$const,_bsaes_key_convert
925d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[7]},  [$inp]!		@ load round 0 key
926d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	sub	$const,$const,#_bsaes_key_convert-.LM0
927d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[15]}, [$inp]!		@ load round 1 key
928d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
929d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov.i8	@XMM[8],  #0x01			@ bit masks
930d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov.i8	@XMM[9],  #0x02
931d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov.i8	@XMM[10], #0x04
932d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov.i8	@XMM[11], #0x08
933d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov.i8	@XMM[12], #0x10
934d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov.i8	@XMM[13], #0x20
935d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia	$const, {@XMM[14]}		@ .LM0
936d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
937d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifdef __ARMEL__
938d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vrev32.8	@XMM[7],  @XMM[7]
939d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vrev32.8	@XMM[15], @XMM[15]
940d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
941d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	sub	$rounds,$rounds,#1
942d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vstmia	$out!, {@XMM[7]}		@ save round 0 key
943d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	b	.Lkey_loop
944d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
945d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	4
946d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lkey_loop:
947d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vtbl.8	`&Dlo(@XMM[7])`,{@XMM[15]},`&Dlo(@XMM[14])`
948d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vtbl.8	`&Dhi(@XMM[7])`,{@XMM[15]},`&Dhi(@XMM[14])`
949d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov.i8	@XMM[6],  #0x40
950d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov.i8	@XMM[15], #0x80
951d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
952d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vtst.8	@XMM[0], @XMM[7], @XMM[8]
953d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vtst.8	@XMM[1], @XMM[7], @XMM[9]
954d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vtst.8	@XMM[2], @XMM[7], @XMM[10]
955d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vtst.8	@XMM[3], @XMM[7], @XMM[11]
956d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vtst.8	@XMM[4], @XMM[7], @XMM[12]
957d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vtst.8	@XMM[5], @XMM[7], @XMM[13]
958d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vtst.8	@XMM[6], @XMM[7], @XMM[6]
959d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vtst.8	@XMM[7], @XMM[7], @XMM[15]
960d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[15]}, [$inp]!		@ load next round key
961d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmvn	@XMM[0], @XMM[0]		@ "pnot"
962d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmvn	@XMM[1], @XMM[1]
963d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmvn	@XMM[5], @XMM[5]
964d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmvn	@XMM[6], @XMM[6]
965d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifdef __ARMEL__
966d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vrev32.8	@XMM[15], @XMM[15]
967d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
968d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	subs	$rounds,$rounds,#1
969d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vstmia	$out!,{@XMM[0]-@XMM[7]}		@ write bit-sliced round key
970d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bne	.Lkey_loop
971d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
972d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov.i8	@XMM[7],#0x63			@ compose .L63
973d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	@ don't save last round key
974d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bx	lr
975d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.size	_bsaes_key_convert,.-_bsaes_key_convert
976d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
977d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root}
978d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
979d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootif (0) {		# following four functions are unsupported interface
980d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root			# used for benchmarking...
981d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
982d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.globl	bsaes_enc_key_convert
983d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.type	bsaes_enc_key_convert,%function
984d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	4
985d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootbsaes_enc_key_convert:
986d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	stmdb	sp!,{r4-r6,lr}
987d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vstmdb	sp!,{d8-d15}		@ ABI specification says so
988d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
989d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	ldr	r5,[$inp,#240]			@ pass rounds
990d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r4,$inp				@ pass key
991d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r12,$out			@ pass key schedule
992d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl	_bsaes_key_convert
993d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[7],@XMM[7],@XMM[15]	@ fix up last round key
994d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vstmia	r12, {@XMM[7]}			@ save last round key
995d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
996d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia	sp!,{d8-d15}
997d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	ldmia	sp!,{r4-r6,pc}
998d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.size	bsaes_enc_key_convert,.-bsaes_enc_key_convert
999d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1000d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.globl	bsaes_encrypt_128
1001d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.type	bsaes_encrypt_128,%function
1002d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	4
1003d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootbsaes_encrypt_128:
1004d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	stmdb	sp!,{r4-r6,lr}
1005d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vstmdb	sp!,{d8-d15}		@ ABI specification says so
1006d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lenc128_loop:
1007d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[0]-@XMM[1]}, [$inp]!	@ load input
1008d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[2]-@XMM[3]}, [$inp]!
1009d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r4,$key				@ pass the key
1010d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[4]-@XMM[5]}, [$inp]!
1011d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r5,#10				@ pass rounds
1012d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[6]-@XMM[7]}, [$inp]!
1013d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1014d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl	_bsaes_encrypt8
1015d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1016d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[0]-@XMM[1]}, [$out]!	@ write output
1017d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[4]}, [$out]!
1018d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[6]}, [$out]!
1019d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[3]}, [$out]!
1020d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[7]}, [$out]!
1021d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[2]}, [$out]!
1022d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	subs	$len,$len,#0x80
1023d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[5]}, [$out]!
1024d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bhi	.Lenc128_loop
1025d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1026d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia	sp!,{d8-d15}
1027d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	ldmia	sp!,{r4-r6,pc}
1028d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.size	bsaes_encrypt_128,.-bsaes_encrypt_128
1029d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1030d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.globl	bsaes_dec_key_convert
1031d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.type	bsaes_dec_key_convert,%function
1032d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	4
1033d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootbsaes_dec_key_convert:
1034d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	stmdb	sp!,{r4-r6,lr}
1035d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vstmdb	sp!,{d8-d15}		@ ABI specification says so
1036d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1037d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	ldr	r5,[$inp,#240]			@ pass rounds
1038d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r4,$inp				@ pass key
1039d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r12,$out			@ pass key schedule
1040d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl	_bsaes_key_convert
1041d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia	$out, {@XMM[6]}
1042d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vstmia	r12,  {@XMM[15]}		@ save last round key
1043d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[7], @XMM[7], @XMM[6]	@ fix up round 0 key
1044d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vstmia	$out, {@XMM[7]}
1045d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1046d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia	sp!,{d8-d15}
1047d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	ldmia	sp!,{r4-r6,pc}
1048d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.size	bsaes_dec_key_convert,.-bsaes_dec_key_convert
1049d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1050d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.globl	bsaes_decrypt_128
1051d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.type	bsaes_decrypt_128,%function
1052d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	4
1053d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootbsaes_decrypt_128:
1054d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	stmdb	sp!,{r4-r6,lr}
1055d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vstmdb	sp!,{d8-d15}		@ ABI specification says so
1056d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Ldec128_loop:
1057d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[0]-@XMM[1]}, [$inp]!	@ load input
1058d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[2]-@XMM[3]}, [$inp]!
1059d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r4,$key				@ pass the key
1060d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[4]-@XMM[5]}, [$inp]!
1061d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r5,#10				@ pass rounds
1062d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[6]-@XMM[7]}, [$inp]!
1063d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1064d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl	_bsaes_decrypt8
1065d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1066d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[0]-@XMM[1]}, [$out]!	@ write output
1067d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[6]}, [$out]!
1068d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[4]}, [$out]!
1069d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[2]}, [$out]!
1070d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[7]}, [$out]!
1071d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[3]}, [$out]!
1072d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	subs	$len,$len,#0x80
1073d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[5]}, [$out]!
1074d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bhi	.Ldec128_loop
1075d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1076d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia	sp!,{d8-d15}
1077d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	ldmia	sp!,{r4-r6,pc}
1078d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.size	bsaes_decrypt_128,.-bsaes_decrypt_128
1079d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
1080d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root}
1081d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root{
1082d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy ($inp,$out,$len,$key, $ivp,$fp,$rounds)=map("r$_",(0..3,8..10));
1083d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy ($keysched)=("sp");
1084d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1085d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
1086d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.extern AES_cbc_encrypt
1087d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.extern AES_decrypt
1088d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1089d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.global	bsaes_cbc_encrypt
1090d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.type	bsaes_cbc_encrypt,%function
1091d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	5
1092d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootbsaes_cbc_encrypt:
1093d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifndef	__KERNEL__
1094d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	cmp	$len, #128
1095d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifndef	__thumb__
1096d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	blo	AES_cbc_encrypt
1097d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#else
1098d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bhs	1f
1099d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	b	AES_cbc_encrypt
1100d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root1:
1101d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
1102d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
1103d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1104d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	@ it is up to the caller to make sure we are called with enc == 0
1105d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1106d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	ip, sp
1107d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	stmdb	sp!, {r4-r10, lr}
1108d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	VFP_ABI_PUSH
1109d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	ldr	$ivp, [ip]			@ IV is 1st arg on the stack
1110d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	$len, $len, lsr#4		@ len in 16 byte blocks
1111d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	sub	sp, #0x10			@ scratch space to carry over the IV
1112d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	$fp, sp				@ save sp
1113d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1114d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	ldr	$rounds, [$key, #240]		@ get # of rounds
1115d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifndef	BSAES_ASM_EXTENDED_KEY
1116d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	@ allocate the key schedule on the stack
1117d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	sub	r12, sp, $rounds, lsl#7		@ 128 bytes per inner round key
1118d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add	r12, #`128-32`			@ sifze of bit-slices key schedule
1119d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1120d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	@ populate the key schedule
1121d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r4, $key			@ pass key
1122d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r5, $rounds			@ pass # of rounds
1123d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	sp, r12				@ sp is $keysched
1124d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl	_bsaes_key_convert
1125d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia	$keysched, {@XMM[6]}
1126d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vstmia	r12,  {@XMM[15]}		@ save last round key
1127d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[7], @XMM[7], @XMM[6]	@ fix up round 0 key
1128d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vstmia	$keysched, {@XMM[7]}
1129d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#else
1130d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	ldr	r12, [$key, #244]
1131d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	eors	r12, #1
1132d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	beq	0f
1133d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1134d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	@ populate the key schedule
1135d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	str	r12, [$key, #244]
1136d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r4, $key			@ pass key
1137d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r5, $rounds			@ pass # of rounds
1138d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add	r12, $key, #248			@ pass key schedule
1139d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl	_bsaes_key_convert
1140d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add	r4, $key, #248
1141d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia	r4, {@XMM[6]}
1142d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vstmia	r12, {@XMM[15]}			@ save last round key
1143d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[7], @XMM[7], @XMM[6]	@ fix up round 0 key
1144d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vstmia	r4, {@XMM[7]}
1145d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1146d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	2
1147d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root0:
1148d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
1149d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1150d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[15]}, [$ivp]		@ load IV
1151d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	b	.Lcbc_dec_loop
1152d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1153d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	4
1154d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lcbc_dec_loop:
1155d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	subs	$len, $len, #0x8
1156d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bmi	.Lcbc_dec_loop_finish
1157d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1158d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[0]-@XMM[1]}, [$inp]!	@ load input
1159d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[2]-@XMM[3]}, [$inp]!
1160d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifndef	BSAES_ASM_EXTENDED_KEY
1161d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r4, $keysched			@ pass the key
1162d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#else
1163d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add	r4, $key, #248
1164d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
1165d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[4]-@XMM[5]}, [$inp]!
1166d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r5, $rounds
1167d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[6]-@XMM[7]}, [$inp]
1168d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	sub	$inp, $inp, #0x60
1169d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vstmia	$fp, {@XMM[15]}			@ put aside IV
1170d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1171d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl	_bsaes_decrypt8
1172d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1173d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia	$fp, {@XMM[14]}			@ reload IV
1174d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[8]-@XMM[9]}, [$inp]!	@ reload input
1175d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[0], @XMM[0], @XMM[14]	@ ^= IV
1176d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[10]-@XMM[11]}, [$inp]!
1177d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[1], @XMM[1], @XMM[8]
1178d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[6], @XMM[6], @XMM[9]
1179d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[12]-@XMM[13]}, [$inp]!
1180d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[4], @XMM[4], @XMM[10]
1181d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[2], @XMM[2], @XMM[11]
1182d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[14]-@XMM[15]}, [$inp]!
1183d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[7], @XMM[7], @XMM[12]
1184d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[0]-@XMM[1]}, [$out]!	@ write output
1185d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[3], @XMM[3], @XMM[13]
1186d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[6]}, [$out]!
1187d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[5], @XMM[5], @XMM[14]
1188d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[4]}, [$out]!
1189d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[2]}, [$out]!
1190d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[7]}, [$out]!
1191d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[3]}, [$out]!
1192d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[5]}, [$out]!
1193d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1194d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	b	.Lcbc_dec_loop
1195d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1196d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lcbc_dec_loop_finish:
1197d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	adds	$len, $len, #8
1198d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	beq	.Lcbc_dec_done
1199d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1200d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[0]}, [$inp]!		@ load input
1201d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	cmp	$len, #2
1202d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	blo	.Lcbc_dec_one
1203d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[1]}, [$inp]!
1204d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifndef	BSAES_ASM_EXTENDED_KEY
1205d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r4, $keysched			@ pass the key
1206d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#else
1207d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add	r4, $key, #248
1208d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
1209d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r5, $rounds
1210d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vstmia	$fp, {@XMM[15]}			@ put aside IV
1211d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	beq	.Lcbc_dec_two
1212d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[2]}, [$inp]!
1213d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	cmp	$len, #4
1214d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	blo	.Lcbc_dec_three
1215d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[3]}, [$inp]!
1216d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	beq	.Lcbc_dec_four
1217d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[4]}, [$inp]!
1218d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	cmp	$len, #6
1219d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	blo	.Lcbc_dec_five
1220d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[5]}, [$inp]!
1221d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	beq	.Lcbc_dec_six
1222d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[6]}, [$inp]!
1223d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	sub	$inp, $inp, #0x70
1224d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1225d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl	_bsaes_decrypt8
1226d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1227d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia	$fp, {@XMM[14]}			@ reload IV
1228d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[8]-@XMM[9]}, [$inp]!	@ reload input
1229d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[0], @XMM[0], @XMM[14]	@ ^= IV
1230d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[10]-@XMM[11]}, [$inp]!
1231d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[1], @XMM[1], @XMM[8]
1232d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[6], @XMM[6], @XMM[9]
1233d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[12]-@XMM[13]}, [$inp]!
1234d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[4], @XMM[4], @XMM[10]
1235d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[2], @XMM[2], @XMM[11]
1236d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[15]}, [$inp]!
1237d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[7], @XMM[7], @XMM[12]
1238d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[0]-@XMM[1]}, [$out]!	@ write output
1239d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[3], @XMM[3], @XMM[13]
1240d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[6]}, [$out]!
1241d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[4]}, [$out]!
1242d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[2]}, [$out]!
1243d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[7]}, [$out]!
1244d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[3]}, [$out]!
1245d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	b	.Lcbc_dec_done
1246d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	4
1247d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lcbc_dec_six:
1248d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	sub	$inp, $inp, #0x60
1249d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl	_bsaes_decrypt8
1250d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia	$fp,{@XMM[14]}			@ reload IV
1251d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[8]-@XMM[9]}, [$inp]!	@ reload input
1252d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[0], @XMM[0], @XMM[14]	@ ^= IV
1253d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[10]-@XMM[11]}, [$inp]!
1254d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[1], @XMM[1], @XMM[8]
1255d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[6], @XMM[6], @XMM[9]
1256d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[12]}, [$inp]!
1257d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[4], @XMM[4], @XMM[10]
1258d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[2], @XMM[2], @XMM[11]
1259d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[15]}, [$inp]!
1260d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[7], @XMM[7], @XMM[12]
1261d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[0]-@XMM[1]}, [$out]!	@ write output
1262d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[6]}, [$out]!
1263d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[4]}, [$out]!
1264d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[2]}, [$out]!
1265d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[7]}, [$out]!
1266d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	b	.Lcbc_dec_done
1267d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	4
1268d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lcbc_dec_five:
1269d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	sub	$inp, $inp, #0x50
1270d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl	_bsaes_decrypt8
1271d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia	$fp, {@XMM[14]}			@ reload IV
1272d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[8]-@XMM[9]}, [$inp]!	@ reload input
1273d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[0], @XMM[0], @XMM[14]	@ ^= IV
1274d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[10]-@XMM[11]}, [$inp]!
1275d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[1], @XMM[1], @XMM[8]
1276d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[6], @XMM[6], @XMM[9]
1277d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[15]}, [$inp]!
1278d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[4], @XMM[4], @XMM[10]
1279d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[0]-@XMM[1]}, [$out]!	@ write output
1280d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[2], @XMM[2], @XMM[11]
1281d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[6]}, [$out]!
1282d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[4]}, [$out]!
1283d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[2]}, [$out]!
1284d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	b	.Lcbc_dec_done
1285d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	4
1286d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lcbc_dec_four:
1287d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	sub	$inp, $inp, #0x40
1288d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl	_bsaes_decrypt8
1289d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia	$fp, {@XMM[14]}			@ reload IV
1290d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[8]-@XMM[9]}, [$inp]!	@ reload input
1291d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[0], @XMM[0], @XMM[14]	@ ^= IV
1292d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[10]}, [$inp]!
1293d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[1], @XMM[1], @XMM[8]
1294d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[6], @XMM[6], @XMM[9]
1295d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[15]}, [$inp]!
1296d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[4], @XMM[4], @XMM[10]
1297d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[0]-@XMM[1]}, [$out]!	@ write output
1298d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[6]}, [$out]!
1299d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[4]}, [$out]!
1300d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	b	.Lcbc_dec_done
1301d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	4
1302d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lcbc_dec_three:
1303d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	sub	$inp, $inp, #0x30
1304d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl	_bsaes_decrypt8
1305d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia	$fp, {@XMM[14]}			@ reload IV
1306d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[8]-@XMM[9]}, [$inp]!	@ reload input
1307d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[0], @XMM[0], @XMM[14]	@ ^= IV
1308d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[15]}, [$inp]!
1309d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[1], @XMM[1], @XMM[8]
1310d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[6], @XMM[6], @XMM[9]
1311d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[0]-@XMM[1]}, [$out]!	@ write output
1312d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[6]}, [$out]!
1313d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	b	.Lcbc_dec_done
1314d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	4
1315d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lcbc_dec_two:
1316d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	sub	$inp, $inp, #0x20
1317d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl	_bsaes_decrypt8
1318d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia	$fp, {@XMM[14]}			@ reload IV
1319d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[8]}, [$inp]!		@ reload input
1320d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[0], @XMM[0], @XMM[14]	@ ^= IV
1321d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[15]}, [$inp]!		@ reload input
1322d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[1], @XMM[1], @XMM[8]
1323d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[0]-@XMM[1]}, [$out]!	@ write output
1324d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	b	.Lcbc_dec_done
1325d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	4
1326d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lcbc_dec_one:
1327d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	sub	$inp, $inp, #0x10
1328d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	$rounds, $out			@ save original out pointer
1329d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	$out, $fp			@ use the iv scratch space as out buffer
1330d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r2, $key
1331d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov	@XMM[4],@XMM[15]		@ just in case ensure that IV
1332d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov	@XMM[5],@XMM[0]			@ and input are preserved
1333d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl	AES_decrypt
1334d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[0]}, [$fp,:64]		@ load result
1335d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[0], @XMM[0], @XMM[4]	@ ^= IV
1336d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov	@XMM[15], @XMM[5]		@ @XMM[5] holds input
1337d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[0]}, [$rounds]		@ write output
1338d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1339d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lcbc_dec_done:
1340d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifndef	BSAES_ASM_EXTENDED_KEY
1341d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov.i32	q0, #0
1342d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov.i32	q1, #0
1343d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lcbc_dec_bzero:				@ wipe key schedule [if any]
1344d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vstmia		$keysched!, {q0-q1}
1345d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	cmp		$keysched, $fp
1346d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bne		.Lcbc_dec_bzero
1347d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
1348d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1349d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	sp, $fp
1350d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add	sp, #0x10			@ add sp,$fp,#0x10 is no good for thumb
1351d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[15]}, [$ivp]		@ return IV
1352d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	VFP_ABI_POP
1353d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	ldmia	sp!, {r4-r10, pc}
1354d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.size	bsaes_cbc_encrypt,.-bsaes_cbc_encrypt
1355d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
1356d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root}
1357d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root{
1358d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy ($inp,$out,$len,$key, $ctr,$fp,$rounds)=(map("r$_",(0..3,8..10)));
1359d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy $const = "r6";	# shared with _bsaes_encrypt8_alt
1360d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy $keysched = "sp";
1361d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1362d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
1363d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.extern	AES_encrypt
1364d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.global	bsaes_ctr32_encrypt_blocks
1365d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.type	bsaes_ctr32_encrypt_blocks,%function
1366d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	5
1367d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootbsaes_ctr32_encrypt_blocks:
1368d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	cmp	$len, #8			@ use plain AES for
1369d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	blo	.Lctr_enc_short			@ small sizes
1370d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1371d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	ip, sp
1372d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	stmdb	sp!, {r4-r10, lr}
1373d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	VFP_ABI_PUSH
1374d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	ldr	$ctr, [ip]			@ ctr is 1st arg on the stack
1375d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	sub	sp, sp, #0x10			@ scratch space to carry over the ctr
1376d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	$fp, sp				@ save sp
1377d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1378d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	ldr	$rounds, [$key, #240]		@ get # of rounds
1379d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifndef	BSAES_ASM_EXTENDED_KEY
1380d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	@ allocate the key schedule on the stack
1381d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	sub	r12, sp, $rounds, lsl#7		@ 128 bytes per inner round key
1382d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add	r12, #`128-32`			@ size of bit-sliced key schedule
1383d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1384d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	@ populate the key schedule
1385d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r4, $key			@ pass key
1386d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r5, $rounds			@ pass # of rounds
1387d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	sp, r12				@ sp is $keysched
1388d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl	_bsaes_key_convert
1389d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[7],@XMM[7],@XMM[15]	@ fix up last round key
1390d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vstmia	r12, {@XMM[7]}			@ save last round key
1391d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1392d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[0]}, [$ctr]		@ load counter
1393d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add	$ctr, $const, #.LREVM0SR-.LM0	@ borrow $ctr
1394d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia	$keysched, {@XMM[4]}		@ load round0 key
1395d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#else
1396d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	ldr	r12, [$key, #244]
1397d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	eors	r12, #1
1398d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	beq	0f
1399d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1400d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	@ populate the key schedule
1401d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	str	r12, [$key, #244]
1402d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r4, $key			@ pass key
1403d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r5, $rounds			@ pass # of rounds
1404d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add	r12, $key, #248			@ pass key schedule
1405d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl	_bsaes_key_convert
1406d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[7],@XMM[7],@XMM[15]	@ fix up last round key
1407d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vstmia	r12, {@XMM[7]}			@ save last round key
1408d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1409d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	2
1410d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root0:	add	r12, $key, #248
1411d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[0]}, [$ctr]		@ load counter
1412d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	adrl	$ctr, .LREVM0SR			@ borrow $ctr
1413d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia	r12, {@XMM[4]}			@ load round0 key
1414d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	sub	sp, #0x10			@ place for adjusted round0 key
1415d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
1416d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1417d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov.i32	@XMM[8],#1		@ compose 1<<96
1418d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[9],@XMM[9],@XMM[9]
1419d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vrev32.8	@XMM[0],@XMM[0]
1420d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8		@XMM[8],@XMM[9],@XMM[8],#4
1421d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vrev32.8	@XMM[4],@XMM[4]
1422d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vadd.u32	@XMM[9],@XMM[8],@XMM[8]	@ compose 2<<96
1423d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vstmia	$keysched, {@XMM[4]}		@ save adjusted round0 key
1424d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	b	.Lctr_enc_loop
1425d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1426d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	4
1427d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lctr_enc_loop:
1428d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vadd.u32	@XMM[10], @XMM[8], @XMM[9]	@ compose 3<<96
1429d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vadd.u32	@XMM[1], @XMM[0], @XMM[8]	@ +1
1430d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vadd.u32	@XMM[2], @XMM[0], @XMM[9]	@ +2
1431d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vadd.u32	@XMM[3], @XMM[0], @XMM[10]	@ +3
1432d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vadd.u32	@XMM[4], @XMM[1], @XMM[10]
1433d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vadd.u32	@XMM[5], @XMM[2], @XMM[10]
1434d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vadd.u32	@XMM[6], @XMM[3], @XMM[10]
1435d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vadd.u32	@XMM[7], @XMM[4], @XMM[10]
1436d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vadd.u32	@XMM[10], @XMM[5], @XMM[10]	@ next counter
1437d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1438d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	@ Borrow prologue from _bsaes_encrypt8 to use the opportunity
1439d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	@ to flip byte order in 32-bit counter
1440d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1441d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia		$keysched, {@XMM[9]}		@ load round0 key
1442d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifndef	BSAES_ASM_EXTENDED_KEY
1443d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add		r4, $keysched, #0x10		@ pass next round key
1444d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#else
1445d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add		r4, $key, #`248+16`
1446d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
1447d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia		$ctr, {@XMM[8]}			@ .LREVM0SR
1448d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r5, $rounds			@ pass rounds
1449d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vstmia		$fp, {@XMM[10]}			@ save next counter
1450d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	sub		$const, $ctr, #.LREVM0SR-.LSR	@ pass constants
1451d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1452d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl		_bsaes_encrypt8_alt
1453d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1454d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	subs		$len, $len, #8
1455d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	blo		.Lctr_enc_loop_done
1456d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1457d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8		{@XMM[8]-@XMM[9]}, [$inp]!	@ load input
1458d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8		{@XMM[10]-@XMM[11]}, [$inp]!
1459d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[0], @XMM[8]
1460d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[1], @XMM[9]
1461d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8		{@XMM[12]-@XMM[13]}, [$inp]!
1462d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[4], @XMM[10]
1463d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[6], @XMM[11]
1464d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8		{@XMM[14]-@XMM[15]}, [$inp]!
1465d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[3], @XMM[12]
1466d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[0]-@XMM[1]}, [$out]!	@ write output
1467d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[7], @XMM[13]
1468d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[2], @XMM[14]
1469d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[4]}, [$out]!
1470d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[5], @XMM[15]
1471d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[6]}, [$out]!
1472d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov.i32	@XMM[8], #1			@ compose 1<<96
1473d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[3]}, [$out]!
1474d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[9], @XMM[9], @XMM[9]
1475d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[7]}, [$out]!
1476d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vext.8		@XMM[8], @XMM[9], @XMM[8], #4
1477d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[2]}, [$out]!
1478d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vadd.u32	@XMM[9],@XMM[8],@XMM[8]		@ compose 2<<96
1479d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[5]}, [$out]!
1480d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia		$fp, {@XMM[0]}			@ load counter
1481d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1482d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bne		.Lctr_enc_loop
1483d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	b		.Lctr_enc_done
1484d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1485d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	4
1486d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lctr_enc_loop_done:
1487d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add		$len, $len, #8
1488d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8		{@XMM[8]}, [$inp]!	@ load input
1489d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[0], @XMM[8]
1490d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[0]}, [$out]!	@ write output
1491d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	cmp		$len, #2
1492d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	blo		.Lctr_enc_done
1493d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8		{@XMM[9]}, [$inp]!
1494d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[1], @XMM[9]
1495d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[1]}, [$out]!
1496d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	beq		.Lctr_enc_done
1497d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8		{@XMM[10]}, [$inp]!
1498d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[4], @XMM[10]
1499d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[4]}, [$out]!
1500d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	cmp		$len, #4
1501d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	blo		.Lctr_enc_done
1502d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8		{@XMM[11]}, [$inp]!
1503d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[6], @XMM[11]
1504d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[6]}, [$out]!
1505d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	beq		.Lctr_enc_done
1506d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8		{@XMM[12]}, [$inp]!
1507d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[3], @XMM[12]
1508d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[3]}, [$out]!
1509d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	cmp		$len, #6
1510d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	blo		.Lctr_enc_done
1511d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8		{@XMM[13]}, [$inp]!
1512d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[7], @XMM[13]
1513d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[7]}, [$out]!
1514d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	beq		.Lctr_enc_done
1515d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8		{@XMM[14]}, [$inp]
1516d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[2], @XMM[14]
1517d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[2]}, [$out]!
1518d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1519d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lctr_enc_done:
1520d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov.i32	q0, #0
1521d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov.i32	q1, #0
1522d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifndef	BSAES_ASM_EXTENDED_KEY
1523d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lctr_enc_bzero:			@ wipe key schedule [if any]
1524d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vstmia		$keysched!, {q0-q1}
1525d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	cmp		$keysched, $fp
1526d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bne		.Lctr_enc_bzero
1527d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#else
1528d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vstmia		$keysched, {q0-q1}
1529d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
1530d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1531d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	sp, $fp
1532d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add	sp, #0x10		@ add sp,$fp,#0x10 is no good for thumb
1533d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	VFP_ABI_POP
1534d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	ldmia	sp!, {r4-r10, pc}	@ return
1535d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1536d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	4
1537d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lctr_enc_short:
1538d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	ldr	ip, [sp]		@ ctr pointer is passed on stack
1539d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	stmdb	sp!, {r4-r8, lr}
1540d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1541d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r4, $inp		@ copy arguments
1542d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r5, $out
1543d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r6, $len
1544d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r7, $key
1545d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	ldr	r8, [ip, #12]		@ load counter LSW
1546d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[1]}, [ip]		@ load whole counter value
1547d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifdef __ARMEL__
1548d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	rev	r8, r8
1549d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
1550d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	sub	sp, sp, #0x10
1551d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[1]}, [sp,:64]	@ copy counter value
1552d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	sub	sp, sp, #0x10
1553d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1554d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lctr_enc_short_loop:
1555d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add	r0, sp, #0x10		@ input counter value
1556d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r1, sp			@ output on the stack
1557d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r2, r7			@ key
1558d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1559d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl	AES_encrypt
1560d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1561d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[0]}, [r4]!	@ load input
1562d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[1]}, [sp,:64]	@ load encrypted counter
1563d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add	r8, r8, #1
1564d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifdef __ARMEL__
1565d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	rev	r0, r8
1566d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	str	r0, [sp, #0x1c]		@ next counter value
1567d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#else
1568d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	str	r8, [sp, #0x1c]		@ next counter value
1569d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
1570d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[0],@XMM[0],@XMM[1]
1571d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8	{@XMM[0]}, [r5]!	@ store output
1572d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	subs	r6, r6, #1
1573d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bne	.Lctr_enc_short_loop
1574d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1575d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov.i32	q0, #0
1576d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov.i32	q1, #0
1577d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vstmia		sp!, {q0-q1}
1578d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1579d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	ldmia	sp!, {r4-r8, pc}
1580d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.size	bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks
1581d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
1582d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root}
1583d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root{
1584d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root######################################################################
1585d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root# void bsaes_xts_[en|de]crypt(const char *inp,char *out,size_t len,
1586d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#	const AES_KEY *key1, const AES_KEY *key2,
1587d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#	const unsigned char iv[16]);
1588d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#
1589d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy ($inp,$out,$len,$key,$rounds,$magic,$fp)=(map("r$_",(7..10,1..3)));
1590d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy $const="r6";		# returned by _bsaes_key_convert
1591d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy $twmask=@XMM[5];
1592d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootmy @T=@XMM[6..7];
1593d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1594d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
1595d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.globl	bsaes_xts_encrypt
1596d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.type	bsaes_xts_encrypt,%function
1597d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	4
1598d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootbsaes_xts_encrypt:
1599d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	ip, sp
1600d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	stmdb	sp!, {r4-r10, lr}		@ 0x20
1601d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	VFP_ABI_PUSH
1602d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r6, sp				@ future $fp
1603d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1604d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	$inp, r0
1605d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	$out, r1
1606d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	$len, r2
1607d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	$key, r3
1608d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1609d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	sub	r0, sp, #0x10			@ 0x10
1610d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bic	r0, #0xf			@ align at 16 bytes
1611d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	sp, r0
1612d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1613d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifdef	XTS_CHAIN_TWEAK
1614d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	ldr	r0, [ip]			@ pointer to input tweak
1615d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#else
1616d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	@ generate initial tweak
1617d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	ldr	r0, [ip, #4]			@ iv[]
1618d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r1, sp
1619d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	ldr	r2, [ip, #0]			@ key2
1620d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl	AES_encrypt
1621d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r0,sp				@ pointer to initial tweak
1622d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
1623d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1624d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	ldr	$rounds, [$key, #240]		@ get # of rounds
1625d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	$fp, r6
1626d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifndef	BSAES_ASM_EXTENDED_KEY
1627d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	@ allocate the key schedule on the stack
1628d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	sub	r12, sp, $rounds, lsl#7		@ 128 bytes per inner round key
1629d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	@ add	r12, #`128-32`			@ size of bit-sliced key schedule
1630d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	sub	r12, #`32+16`			@ place for tweak[9]
1631d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1632d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	@ populate the key schedule
1633d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r4, $key			@ pass key
1634d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r5, $rounds			@ pass # of rounds
1635d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	sp, r12
1636d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add	r12, #0x90			@ pass key schedule
1637d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl	_bsaes_key_convert
1638d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[7], @XMM[7], @XMM[15]	@ fix up last round key
1639d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vstmia	r12, {@XMM[7]}			@ save last round key
1640d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#else
1641d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	ldr	r12, [$key, #244]
1642d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	eors	r12, #1
1643d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	beq	0f
1644d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1645d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	str	r12, [$key, #244]
1646d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r4, $key			@ pass key
1647d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r5, $rounds			@ pass # of rounds
1648d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add	r12, $key, #248			@ pass key schedule
1649d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl	_bsaes_key_convert
1650d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[7], @XMM[7], @XMM[15]	@ fix up last round key
1651d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vstmia	r12, {@XMM[7]}
1652d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1653d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	2
1654d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root0:	sub	sp, #0x90			@ place for tweak[9]
1655d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
1656d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1657d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[8]}, [r0]			@ initial tweak
1658d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	adr	$magic, .Lxts_magic
1659d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1660d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	subs	$len, #0x80
1661d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	blo	.Lxts_enc_short
1662d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	b	.Lxts_enc_loop
1663d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1664d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	4
1665d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lxts_enc_loop:
1666d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia		$magic, {$twmask}	@ load XTS magic
1667d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vshr.s64	@T[0], @XMM[8], #63
1668d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r0, sp
1669d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vand		@T[0], @T[0], $twmask
1670d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
1671d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootfor($i=9;$i<16;$i++) {
1672d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
1673d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vadd.u64	@XMM[$i], @XMM[$i-1], @XMM[$i-1]
1674d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.64		{@XMM[$i-1]}, [r0,:128]!
1675d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vswp		`&Dhi("@T[0]")`,`&Dlo("@T[0]")`
1676d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vshr.s64	@T[1], @XMM[$i], #63
1677d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[$i], @XMM[$i], @T[0]
1678d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vand		@T[1], @T[1], $twmask
1679d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
1680d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	@T=reverse(@T);
1681d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1682d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___ if ($i>=10);
1683d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8		{@XMM[$i-10]}, [$inp]!
1684d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
1685d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___ if ($i>=11);
1686d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[$i-11], @XMM[$i-11], @XMM[$i-3]
1687d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
1688d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root}
1689d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
1690d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vadd.u64	@XMM[8], @XMM[15], @XMM[15]
1691d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.64		{@XMM[15]}, [r0,:128]!
1692d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vswp		`&Dhi("@T[0]")`,`&Dlo("@T[0]")`
1693d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[8], @XMM[8], @T[0]
1694d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.64		{@XMM[8]}, [r0,:128]		@ next round tweak
1695d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1696d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8		{@XMM[6]-@XMM[7]}, [$inp]!
1697d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[5], @XMM[5], @XMM[13]
1698d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifndef	BSAES_ASM_EXTENDED_KEY
1699d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add		r4, sp, #0x90			@ pass key schedule
1700d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#else
1701d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add		r4, $key, #248			@ pass key schedule
1702d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
1703d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[6], @XMM[6], @XMM[14]
1704d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r5, $rounds			@ pass rounds
1705d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[7], @XMM[7], @XMM[15]
1706d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r0, sp
1707d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1708d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl		_bsaes_encrypt8
1709d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1710d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
1711d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[10]-@XMM[11]}, [r0,:128]!
1712d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[0], @XMM[0], @XMM[ 8]
1713d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[12]-@XMM[13]}, [r0,:128]!
1714d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[1], @XMM[1], @XMM[ 9]
1715d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[8], @XMM[4], @XMM[10]
1716d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[0]-@XMM[1]}, [$out]!
1717d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[9], @XMM[6], @XMM[11]
1718d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[14]-@XMM[15]}, [r0,:128]!
1719d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[10], @XMM[3], @XMM[12]
1720d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[8]-@XMM[9]}, [$out]!
1721d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[11], @XMM[7], @XMM[13]
1722d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[12], @XMM[2], @XMM[14]
1723d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[10]-@XMM[11]}, [$out]!
1724d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[13], @XMM[5], @XMM[15]
1725d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[12]-@XMM[13]}, [$out]!
1726d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1727d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[8]}, [r0,:128]		@ next round tweak
1728d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1729d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	subs		$len, #0x80
1730d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bpl		.Lxts_enc_loop
1731d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1732d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lxts_enc_short:
1733d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	adds		$len, #0x70
1734d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bmi		.Lxts_enc_done
1735d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1736d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia		$magic, {$twmask}	@ load XTS magic
1737d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vshr.s64	@T[0], @XMM[8], #63
1738d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r0, sp
1739d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vand		@T[0], @T[0], $twmask
1740d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
1741d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootfor($i=9;$i<16;$i++) {
1742d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
1743d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vadd.u64	@XMM[$i], @XMM[$i-1], @XMM[$i-1]
1744d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.64		{@XMM[$i-1]}, [r0,:128]!
1745d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vswp		`&Dhi("@T[0]")`,`&Dlo("@T[0]")`
1746d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vshr.s64	@T[1], @XMM[$i], #63
1747d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[$i], @XMM[$i], @T[0]
1748d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vand		@T[1], @T[1], $twmask
1749d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
1750d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	@T=reverse(@T);
1751d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1752d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___ if ($i>=10);
1753d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8		{@XMM[$i-10]}, [$inp]!
1754d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	subs		$len, #0x10
1755d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bmi		.Lxts_enc_`$i-9`
1756d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
1757d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___ if ($i>=11);
1758d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[$i-11], @XMM[$i-11], @XMM[$i-3]
1759d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
1760d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root}
1761d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
1762d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	sub		$len, #0x10
1763d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.64		{@XMM[15]}, [r0,:128]		@ next round tweak
1764d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1765d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8		{@XMM[6]}, [$inp]!
1766d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[5], @XMM[5], @XMM[13]
1767d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifndef	BSAES_ASM_EXTENDED_KEY
1768d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add		r4, sp, #0x90			@ pass key schedule
1769d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#else
1770d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add		r4, $key, #248			@ pass key schedule
1771d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
1772d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[6], @XMM[6], @XMM[14]
1773d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r5, $rounds			@ pass rounds
1774d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r0, sp
1775d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1776d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl		_bsaes_encrypt8
1777d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1778d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
1779d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[10]-@XMM[11]}, [r0,:128]!
1780d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[0], @XMM[0], @XMM[ 8]
1781d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[12]-@XMM[13]}, [r0,:128]!
1782d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[1], @XMM[1], @XMM[ 9]
1783d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[8], @XMM[4], @XMM[10]
1784d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[0]-@XMM[1]}, [$out]!
1785d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[9], @XMM[6], @XMM[11]
1786d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[14]}, [r0,:128]!
1787d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[10], @XMM[3], @XMM[12]
1788d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[8]-@XMM[9]}, [$out]!
1789d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[11], @XMM[7], @XMM[13]
1790d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[12], @XMM[2], @XMM[14]
1791d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[10]-@XMM[11]}, [$out]!
1792d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[12]}, [$out]!
1793d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1794d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[8]}, [r0,:128]		@ next round tweak
1795d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	b		.Lxts_enc_done
1796d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	4
1797d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lxts_enc_6:
1798d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.64		{@XMM[14]}, [r0,:128]		@ next round tweak
1799d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1800d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[4], @XMM[4], @XMM[12]
1801d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifndef	BSAES_ASM_EXTENDED_KEY
1802d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add		r4, sp, #0x90			@ pass key schedule
1803d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#else
1804d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add		r4, $key, #248			@ pass key schedule
1805d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
1806d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[5], @XMM[5], @XMM[13]
1807d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r5, $rounds			@ pass rounds
1808d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r0, sp
1809d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1810d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl		_bsaes_encrypt8
1811d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1812d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
1813d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[10]-@XMM[11]}, [r0,:128]!
1814d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[0], @XMM[0], @XMM[ 8]
1815d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[12]-@XMM[13]}, [r0,:128]!
1816d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[1], @XMM[1], @XMM[ 9]
1817d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[8], @XMM[4], @XMM[10]
1818d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[0]-@XMM[1]}, [$out]!
1819d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[9], @XMM[6], @XMM[11]
1820d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[10], @XMM[3], @XMM[12]
1821d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[8]-@XMM[9]}, [$out]!
1822d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[11], @XMM[7], @XMM[13]
1823d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[10]-@XMM[11]}, [$out]!
1824d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1825d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[8]}, [r0,:128]		@ next round tweak
1826d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	b		.Lxts_enc_done
1827d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1828d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root@ put this in range for both ARM and Thumb mode adr instructions
1829d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	5
1830d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lxts_magic:
1831d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	.quad	1, 0x87
1832d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1833d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	5
1834d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lxts_enc_5:
1835d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.64		{@XMM[13]}, [r0,:128]		@ next round tweak
1836d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1837d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[3], @XMM[3], @XMM[11]
1838d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifndef	BSAES_ASM_EXTENDED_KEY
1839d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add		r4, sp, #0x90			@ pass key schedule
1840d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#else
1841d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add		r4, $key, #248			@ pass key schedule
1842d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
1843d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[4], @XMM[4], @XMM[12]
1844d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r5, $rounds			@ pass rounds
1845d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r0, sp
1846d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1847d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl		_bsaes_encrypt8
1848d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1849d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
1850d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[10]-@XMM[11]}, [r0,:128]!
1851d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[0], @XMM[0], @XMM[ 8]
1852d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[12]}, [r0,:128]!
1853d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[1], @XMM[1], @XMM[ 9]
1854d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[8], @XMM[4], @XMM[10]
1855d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[0]-@XMM[1]}, [$out]!
1856d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[9], @XMM[6], @XMM[11]
1857d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[10], @XMM[3], @XMM[12]
1858d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[8]-@XMM[9]}, [$out]!
1859d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[10]}, [$out]!
1860d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1861d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[8]}, [r0,:128]		@ next round tweak
1862d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	b		.Lxts_enc_done
1863d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	4
1864d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lxts_enc_4:
1865d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.64		{@XMM[12]}, [r0,:128]		@ next round tweak
1866d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1867d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[2], @XMM[2], @XMM[10]
1868d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifndef	BSAES_ASM_EXTENDED_KEY
1869d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add		r4, sp, #0x90			@ pass key schedule
1870d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#else
1871d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add		r4, $key, #248			@ pass key schedule
1872d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
1873d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[3], @XMM[3], @XMM[11]
1874d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r5, $rounds			@ pass rounds
1875d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r0, sp
1876d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1877d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl		_bsaes_encrypt8
1878d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1879d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
1880d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[10]-@XMM[11]}, [r0,:128]!
1881d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[0], @XMM[0], @XMM[ 8]
1882d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[1], @XMM[1], @XMM[ 9]
1883d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[8], @XMM[4], @XMM[10]
1884d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[0]-@XMM[1]}, [$out]!
1885d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[9], @XMM[6], @XMM[11]
1886d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[8]-@XMM[9]}, [$out]!
1887d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1888d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[8]}, [r0,:128]		@ next round tweak
1889d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	b		.Lxts_enc_done
1890d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	4
1891d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lxts_enc_3:
1892d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.64		{@XMM[11]}, [r0,:128]		@ next round tweak
1893d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1894d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[1], @XMM[1], @XMM[9]
1895d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifndef	BSAES_ASM_EXTENDED_KEY
1896d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add		r4, sp, #0x90			@ pass key schedule
1897d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#else
1898d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add		r4, $key, #248			@ pass key schedule
1899d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
1900d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[2], @XMM[2], @XMM[10]
1901d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r5, $rounds			@ pass rounds
1902d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r0, sp
1903d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1904d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl		_bsaes_encrypt8
1905d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1906d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[8]-@XMM[9]}, [r0,:128]!
1907d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[10]}, [r0,:128]!
1908d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[0], @XMM[0], @XMM[ 8]
1909d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[1], @XMM[1], @XMM[ 9]
1910d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[8], @XMM[4], @XMM[10]
1911d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[0]-@XMM[1]}, [$out]!
1912d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[8]}, [$out]!
1913d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1914d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[8]}, [r0,:128]		@ next round tweak
1915d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	b		.Lxts_enc_done
1916d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	4
1917d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lxts_enc_2:
1918d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.64		{@XMM[10]}, [r0,:128]		@ next round tweak
1919d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1920d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[0], @XMM[0], @XMM[8]
1921d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifndef	BSAES_ASM_EXTENDED_KEY
1922d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add		r4, sp, #0x90			@ pass key schedule
1923d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#else
1924d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add		r4, $key, #248			@ pass key schedule
1925d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
1926d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[1], @XMM[1], @XMM[9]
1927d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r5, $rounds			@ pass rounds
1928d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r0, sp
1929d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1930d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl		_bsaes_encrypt8
1931d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1932d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[8]-@XMM[9]}, [r0,:128]!
1933d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[0], @XMM[0], @XMM[ 8]
1934d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[1], @XMM[1], @XMM[ 9]
1935d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[0]-@XMM[1]}, [$out]!
1936d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1937d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[8]}, [r0,:128]		@ next round tweak
1938d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	b		.Lxts_enc_done
1939d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	4
1940d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lxts_enc_1:
1941d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r0, sp
1942d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[0], @XMM[8]
1943d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r1, sp
1944d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[0]}, [sp,:128]
1945d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r2, $key
1946d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r4, $fp				@ preserve fp
1947d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1948d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl		AES_encrypt
1949d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1950d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8		{@XMM[0]}, [sp,:128]
1951d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[0], @XMM[0], @XMM[8]
1952d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[0]}, [$out]!
1953d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		$fp, r4
1954d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1955d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov		@XMM[8], @XMM[9]		@ next round tweak
1956d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1957d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lxts_enc_done:
1958d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifndef	XTS_CHAIN_TWEAK
1959d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	adds		$len, #0x10
1960d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	beq		.Lxts_enc_ret
1961d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	sub		r6, $out, #0x10
1962d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1963d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lxts_enc_steal:
1964d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	ldrb		r0, [$inp], #1
1965d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	ldrb		r1, [$out, #-0x10]
1966d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	strb		r0, [$out, #-0x10]
1967d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	strb		r1, [$out], #1
1968d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1969d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	subs		$len, #1
1970d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bhi		.Lxts_enc_steal
1971d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1972d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8		{@XMM[0]}, [r6]
1973d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r0, sp
1974d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[0], @XMM[0], @XMM[8]
1975d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r1, sp
1976d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[0]}, [sp,:128]
1977d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r2, $key
1978d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r4, $fp			@ preserve fp
1979d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1980d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl		AES_encrypt
1981d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1982d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8		{@XMM[0]}, [sp,:128]
1983d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[0], @XMM[0], @XMM[8]
1984d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[0]}, [r6]
1985d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		$fp, r4
1986d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
1987d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
1988d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lxts_enc_ret:
1989d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bic		r0, $fp, #0xf
1990d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov.i32	q0, #0
1991d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov.i32	q1, #0
1992d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifdef	XTS_CHAIN_TWEAK
1993d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	ldr		r1, [$fp, #0x20+VFP_ABI_FRAME]	@ chain tweak
1994d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
1995d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lxts_enc_bzero:				@ wipe key schedule [if any]
1996d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vstmia		sp!, {q0-q1}
1997d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	cmp		sp, r0
1998d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bne		.Lxts_enc_bzero
1999d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2000d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		sp, $fp
2001d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifdef	XTS_CHAIN_TWEAK
2002d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[8]}, [r1]
2003d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
2004d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	VFP_ABI_POP
2005d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	ldmia		sp!, {r4-r10, pc}	@ return
2006d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2007d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.size	bsaes_xts_encrypt,.-bsaes_xts_encrypt
2008d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2009d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.globl	bsaes_xts_decrypt
2010d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.type	bsaes_xts_decrypt,%function
2011d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	4
2012d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootbsaes_xts_decrypt:
2013d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	ip, sp
2014d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	stmdb	sp!, {r4-r10, lr}		@ 0x20
2015d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	VFP_ABI_PUSH
2016d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r6, sp				@ future $fp
2017d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2018d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	$inp, r0
2019d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	$out, r1
2020d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	$len, r2
2021d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	$key, r3
2022d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2023d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	sub	r0, sp, #0x10			@ 0x10
2024d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bic	r0, #0xf			@ align at 16 bytes
2025d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	sp, r0
2026d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2027d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifdef	XTS_CHAIN_TWEAK
2028d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	ldr	r0, [ip]			@ pointer to input tweak
2029d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#else
2030d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	@ generate initial tweak
2031d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	ldr	r0, [ip, #4]			@ iv[]
2032d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r1, sp
2033d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	ldr	r2, [ip, #0]			@ key2
2034d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl	AES_encrypt
2035d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r0, sp				@ pointer to initial tweak
2036d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
2037d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2038d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	ldr	$rounds, [$key, #240]		@ get # of rounds
2039d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	$fp, r6
2040d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifndef	BSAES_ASM_EXTENDED_KEY
2041d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	@ allocate the key schedule on the stack
2042d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	sub	r12, sp, $rounds, lsl#7		@ 128 bytes per inner round key
2043d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	@ add	r12, #`128-32`			@ size of bit-sliced key schedule
2044d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	sub	r12, #`32+16`			@ place for tweak[9]
2045d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2046d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	@ populate the key schedule
2047d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r4, $key			@ pass key
2048d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r5, $rounds			@ pass # of rounds
2049d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	sp, r12
2050d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add	r12, #0x90			@ pass key schedule
2051d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl	_bsaes_key_convert
2052d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add	r4, sp, #0x90
2053d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia	r4, {@XMM[6]}
2054d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vstmia	r12,  {@XMM[15]}		@ save last round key
2055d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[7], @XMM[7], @XMM[6]	@ fix up round 0 key
2056d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vstmia	r4, {@XMM[7]}
2057d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#else
2058d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	ldr	r12, [$key, #244]
2059d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	eors	r12, #1
2060d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	beq	0f
2061d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2062d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	str	r12, [$key, #244]
2063d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r4, $key			@ pass key
2064d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov	r5, $rounds			@ pass # of rounds
2065d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add	r12, $key, #248			@ pass key schedule
2066d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl	_bsaes_key_convert
2067d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add	r4, $key, #248
2068d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia	r4, {@XMM[6]}
2069d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vstmia	r12,  {@XMM[15]}		@ save last round key
2070d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor	@XMM[7], @XMM[7], @XMM[6]	@ fix up round 0 key
2071d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vstmia	r4, {@XMM[7]}
2072d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2073d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	2
2074d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root0:	sub	sp, #0x90			@ place for tweak[9]
2075d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
2076d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8	{@XMM[8]}, [r0]			@ initial tweak
2077d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	adr	$magic, .Lxts_magic
2078d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2079d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	tst	$len, #0xf			@ if not multiple of 16
2080d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	it	ne				@ Thumb2 thing, sanity check in ARM
2081d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	subne	$len, #0x10			@ subtract another 16 bytes
2082d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	subs	$len, #0x80
2083d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2084d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	blo	.Lxts_dec_short
2085d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	b	.Lxts_dec_loop
2086d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2087d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	4
2088d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lxts_dec_loop:
2089d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia		$magic, {$twmask}	@ load XTS magic
2090d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vshr.s64	@T[0], @XMM[8], #63
2091d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r0, sp
2092d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vand		@T[0], @T[0], $twmask
2093d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
2094d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootfor($i=9;$i<16;$i++) {
2095d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
2096d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vadd.u64	@XMM[$i], @XMM[$i-1], @XMM[$i-1]
2097d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.64		{@XMM[$i-1]}, [r0,:128]!
2098d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vswp		`&Dhi("@T[0]")`,`&Dlo("@T[0]")`
2099d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vshr.s64	@T[1], @XMM[$i], #63
2100d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[$i], @XMM[$i], @T[0]
2101d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vand		@T[1], @T[1], $twmask
2102d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
2103d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	@T=reverse(@T);
2104d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2105d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___ if ($i>=10);
2106d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8		{@XMM[$i-10]}, [$inp]!
2107d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
2108d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___ if ($i>=11);
2109d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[$i-11], @XMM[$i-11], @XMM[$i-3]
2110d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
2111d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root}
2112d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
2113d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vadd.u64	@XMM[8], @XMM[15], @XMM[15]
2114d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.64		{@XMM[15]}, [r0,:128]!
2115d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vswp		`&Dhi("@T[0]")`,`&Dlo("@T[0]")`
2116d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[8], @XMM[8], @T[0]
2117d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.64		{@XMM[8]}, [r0,:128]		@ next round tweak
2118d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2119d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8		{@XMM[6]-@XMM[7]}, [$inp]!
2120d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[5], @XMM[5], @XMM[13]
2121d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifndef	BSAES_ASM_EXTENDED_KEY
2122d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add		r4, sp, #0x90			@ pass key schedule
2123d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#else
2124d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add		r4, $key, #248			@ pass key schedule
2125d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
2126d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[6], @XMM[6], @XMM[14]
2127d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r5, $rounds			@ pass rounds
2128d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[7], @XMM[7], @XMM[15]
2129d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r0, sp
2130d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2131d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl		_bsaes_decrypt8
2132d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2133d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
2134d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[10]-@XMM[11]}, [r0,:128]!
2135d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[0], @XMM[0], @XMM[ 8]
2136d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[12]-@XMM[13]}, [r0,:128]!
2137d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[1], @XMM[1], @XMM[ 9]
2138d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[8], @XMM[6], @XMM[10]
2139d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[0]-@XMM[1]}, [$out]!
2140d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[9], @XMM[4], @XMM[11]
2141d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[14]-@XMM[15]}, [r0,:128]!
2142d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[10], @XMM[2], @XMM[12]
2143d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[8]-@XMM[9]}, [$out]!
2144d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[11], @XMM[7], @XMM[13]
2145d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[12], @XMM[3], @XMM[14]
2146d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[10]-@XMM[11]}, [$out]!
2147d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[13], @XMM[5], @XMM[15]
2148d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[12]-@XMM[13]}, [$out]!
2149d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2150d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[8]}, [r0,:128]		@ next round tweak
2151d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2152d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	subs		$len, #0x80
2153d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bpl		.Lxts_dec_loop
2154d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2155d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lxts_dec_short:
2156d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	adds		$len, #0x70
2157d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bmi		.Lxts_dec_done
2158d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2159d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia		$magic, {$twmask}	@ load XTS magic
2160d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vshr.s64	@T[0], @XMM[8], #63
2161d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r0, sp
2162d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vand		@T[0], @T[0], $twmask
2163d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
2164d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootfor($i=9;$i<16;$i++) {
2165d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
2166d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vadd.u64	@XMM[$i], @XMM[$i-1], @XMM[$i-1]
2167d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.64		{@XMM[$i-1]}, [r0,:128]!
2168d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vswp		`&Dhi("@T[0]")`,`&Dlo("@T[0]")`
2169d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vshr.s64	@T[1], @XMM[$i], #63
2170d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[$i], @XMM[$i], @T[0]
2171d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vand		@T[1], @T[1], $twmask
2172d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
2173d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	@T=reverse(@T);
2174d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2175d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___ if ($i>=10);
2176d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8		{@XMM[$i-10]}, [$inp]!
2177d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	subs		$len, #0x10
2178d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bmi		.Lxts_dec_`$i-9`
2179d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
2180d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___ if ($i>=11);
2181d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[$i-11], @XMM[$i-11], @XMM[$i-3]
2182d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
2183d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root}
2184d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
2185d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	sub		$len, #0x10
2186d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.64		{@XMM[15]}, [r0,:128]		@ next round tweak
2187d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2188d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8		{@XMM[6]}, [$inp]!
2189d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[5], @XMM[5], @XMM[13]
2190d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifndef	BSAES_ASM_EXTENDED_KEY
2191d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add		r4, sp, #0x90			@ pass key schedule
2192d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#else
2193d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add		r4, $key, #248			@ pass key schedule
2194d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
2195d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[6], @XMM[6], @XMM[14]
2196d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r5, $rounds			@ pass rounds
2197d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r0, sp
2198d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2199d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl		_bsaes_decrypt8
2200d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2201d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
2202d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[10]-@XMM[11]}, [r0,:128]!
2203d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[0], @XMM[0], @XMM[ 8]
2204d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[12]-@XMM[13]}, [r0,:128]!
2205d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[1], @XMM[1], @XMM[ 9]
2206d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[8], @XMM[6], @XMM[10]
2207d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[0]-@XMM[1]}, [$out]!
2208d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[9], @XMM[4], @XMM[11]
2209d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[14]}, [r0,:128]!
2210d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[10], @XMM[2], @XMM[12]
2211d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[8]-@XMM[9]}, [$out]!
2212d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[11], @XMM[7], @XMM[13]
2213d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[12], @XMM[3], @XMM[14]
2214d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[10]-@XMM[11]}, [$out]!
2215d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[12]}, [$out]!
2216d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2217d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[8]}, [r0,:128]		@ next round tweak
2218d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	b		.Lxts_dec_done
2219d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	4
2220d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lxts_dec_6:
2221d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.64		{@XMM[14]}, [r0,:128]		@ next round tweak
2222d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2223d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[4], @XMM[4], @XMM[12]
2224d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifndef	BSAES_ASM_EXTENDED_KEY
2225d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add		r4, sp, #0x90			@ pass key schedule
2226d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#else
2227d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add		r4, $key, #248			@ pass key schedule
2228d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
2229d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[5], @XMM[5], @XMM[13]
2230d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r5, $rounds			@ pass rounds
2231d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r0, sp
2232d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2233d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl		_bsaes_decrypt8
2234d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2235d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
2236d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[10]-@XMM[11]}, [r0,:128]!
2237d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[0], @XMM[0], @XMM[ 8]
2238d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[12]-@XMM[13]}, [r0,:128]!
2239d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[1], @XMM[1], @XMM[ 9]
2240d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[8], @XMM[6], @XMM[10]
2241d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[0]-@XMM[1]}, [$out]!
2242d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[9], @XMM[4], @XMM[11]
2243d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[10], @XMM[2], @XMM[12]
2244d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[8]-@XMM[9]}, [$out]!
2245d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[11], @XMM[7], @XMM[13]
2246d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[10]-@XMM[11]}, [$out]!
2247d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2248d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[8]}, [r0,:128]		@ next round tweak
2249d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	b		.Lxts_dec_done
2250d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	4
2251d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lxts_dec_5:
2252d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.64		{@XMM[13]}, [r0,:128]		@ next round tweak
2253d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2254d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[3], @XMM[3], @XMM[11]
2255d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifndef	BSAES_ASM_EXTENDED_KEY
2256d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add		r4, sp, #0x90			@ pass key schedule
2257d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#else
2258d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add		r4, $key, #248			@ pass key schedule
2259d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
2260d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[4], @XMM[4], @XMM[12]
2261d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r5, $rounds			@ pass rounds
2262d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r0, sp
2263d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2264d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl		_bsaes_decrypt8
2265d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2266d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
2267d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[10]-@XMM[11]}, [r0,:128]!
2268d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[0], @XMM[0], @XMM[ 8]
2269d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[12]}, [r0,:128]!
2270d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[1], @XMM[1], @XMM[ 9]
2271d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[8], @XMM[6], @XMM[10]
2272d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[0]-@XMM[1]}, [$out]!
2273d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[9], @XMM[4], @XMM[11]
2274d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[10], @XMM[2], @XMM[12]
2275d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[8]-@XMM[9]}, [$out]!
2276d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[10]}, [$out]!
2277d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2278d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[8]}, [r0,:128]		@ next round tweak
2279d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	b		.Lxts_dec_done
2280d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	4
2281d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lxts_dec_4:
2282d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.64		{@XMM[12]}, [r0,:128]		@ next round tweak
2283d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2284d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[2], @XMM[2], @XMM[10]
2285d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifndef	BSAES_ASM_EXTENDED_KEY
2286d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add		r4, sp, #0x90			@ pass key schedule
2287d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#else
2288d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add		r4, $key, #248			@ pass key schedule
2289d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
2290d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[3], @XMM[3], @XMM[11]
2291d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r5, $rounds			@ pass rounds
2292d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r0, sp
2293d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2294d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl		_bsaes_decrypt8
2295d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2296d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
2297d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[10]-@XMM[11]}, [r0,:128]!
2298d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[0], @XMM[0], @XMM[ 8]
2299d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[1], @XMM[1], @XMM[ 9]
2300d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[8], @XMM[6], @XMM[10]
2301d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[0]-@XMM[1]}, [$out]!
2302d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[9], @XMM[4], @XMM[11]
2303d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[8]-@XMM[9]}, [$out]!
2304d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2305d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[8]}, [r0,:128]		@ next round tweak
2306d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	b		.Lxts_dec_done
2307d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	4
2308d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lxts_dec_3:
2309d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.64		{@XMM[11]}, [r0,:128]		@ next round tweak
2310d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2311d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[1], @XMM[1], @XMM[9]
2312d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifndef	BSAES_ASM_EXTENDED_KEY
2313d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add		r4, sp, #0x90			@ pass key schedule
2314d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#else
2315d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add		r4, $key, #248			@ pass key schedule
2316d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
2317d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[2], @XMM[2], @XMM[10]
2318d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r5, $rounds			@ pass rounds
2319d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r0, sp
2320d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2321d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl		_bsaes_decrypt8
2322d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2323d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[8]-@XMM[9]}, [r0,:128]!
2324d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[10]}, [r0,:128]!
2325d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[0], @XMM[0], @XMM[ 8]
2326d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[1], @XMM[1], @XMM[ 9]
2327d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[8], @XMM[6], @XMM[10]
2328d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[0]-@XMM[1]}, [$out]!
2329d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[8]}, [$out]!
2330d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2331d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[8]}, [r0,:128]		@ next round tweak
2332d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	b		.Lxts_dec_done
2333d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	4
2334d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lxts_dec_2:
2335d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.64		{@XMM[10]}, [r0,:128]		@ next round tweak
2336d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2337d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[0], @XMM[0], @XMM[8]
2338d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifndef	BSAES_ASM_EXTENDED_KEY
2339d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add		r4, sp, #0x90			@ pass key schedule
2340d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#else
2341d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	add		r4, $key, #248			@ pass key schedule
2342d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
2343d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[1], @XMM[1], @XMM[9]
2344d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r5, $rounds			@ pass rounds
2345d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r0, sp
2346d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2347d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl		_bsaes_decrypt8
2348d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2349d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[8]-@XMM[9]}, [r0,:128]!
2350d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[0], @XMM[0], @XMM[ 8]
2351d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[1], @XMM[1], @XMM[ 9]
2352d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[0]-@XMM[1]}, [$out]!
2353d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2354d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.64		{@XMM[8]}, [r0,:128]		@ next round tweak
2355d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	b		.Lxts_dec_done
2356d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.align	4
2357d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lxts_dec_1:
2358d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r0, sp
2359d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[0], @XMM[8]
2360d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r1, sp
2361d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[0]}, [sp,:128]
2362d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r2, $key
2363d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r4, $fp				@ preserve fp
2364d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r5, $magic			@ preserve magic
2365d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2366d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl		AES_decrypt
2367d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2368d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8		{@XMM[0]}, [sp,:128]
2369d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[0], @XMM[0], @XMM[8]
2370d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[0]}, [$out]!
2371d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		$fp, r4
2372d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		$magic, r5
2373d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2374d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov		@XMM[8], @XMM[9]		@ next round tweak
2375d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2376d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lxts_dec_done:
2377d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifndef	XTS_CHAIN_TWEAK
2378d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	adds		$len, #0x10
2379d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	beq		.Lxts_dec_ret
2380d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2381d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	@ calculate one round of extra tweak for the stolen ciphertext
2382d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vldmia		$magic, {$twmask}
2383d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vshr.s64	@XMM[6], @XMM[8], #63
2384d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vand		@XMM[6], @XMM[6], $twmask
2385d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vadd.u64	@XMM[9], @XMM[8], @XMM[8]
2386d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vswp		`&Dhi("@XMM[6]")`,`&Dlo("@XMM[6]")`
2387d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[9], @XMM[9], @XMM[6]
2388d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2389d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	@ perform the final decryption with the last tweak value
2390d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8		{@XMM[0]}, [$inp]!
2391d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r0, sp
2392d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[0], @XMM[0], @XMM[9]
2393d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r1, sp
2394d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[0]}, [sp,:128]
2395d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r2, $key
2396d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r4, $fp			@ preserve fp
2397d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2398d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl		AES_decrypt
2399d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2400d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8		{@XMM[0]}, [sp,:128]
2401d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[0], @XMM[0], @XMM[9]
2402d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[0]}, [$out]
2403d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2404d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r6, $out
2405d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lxts_dec_steal:
2406d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	ldrb		r1, [$out]
2407d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	ldrb		r0, [$inp], #1
2408d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	strb		r1, [$out, #0x10]
2409d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	strb		r0, [$out], #1
2410d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2411d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	subs		$len, #1
2412d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bhi		.Lxts_dec_steal
2413d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2414d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8		{@XMM[0]}, [r6]
2415d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r0, sp
2416d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[0], @XMM[8]
2417d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r1, sp
2418d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[0]}, [sp,:128]
2419d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		r2, $key
2420d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2421d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bl		AES_decrypt
2422d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2423d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vld1.8		{@XMM[0]}, [sp,:128]
2424d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	veor		@XMM[0], @XMM[0], @XMM[8]
2425d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[0]}, [r6]
2426d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		$fp, r4
2427d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
2428d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2429d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lxts_dec_ret:
2430d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bic		r0, $fp, #0xf
2431d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov.i32	q0, #0
2432d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vmov.i32	q1, #0
2433d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifdef	XTS_CHAIN_TWEAK
2434d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	ldr		r1, [$fp, #0x20+VFP_ABI_FRAME]	@ chain tweak
2435d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
2436d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.Lxts_dec_bzero:				@ wipe key schedule [if any]
2437d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vstmia		sp!, {q0-q1}
2438d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	cmp		sp, r0
2439d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	bne		.Lxts_dec_bzero
2440d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2441d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	mov		sp, $fp
2442d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#ifdef	XTS_CHAIN_TWEAK
2443d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	vst1.8		{@XMM[8]}, [r1]
2444d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
2445d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	VFP_ABI_POP
2446d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	ldmia		sp!, {r4-r10, pc}	@ return
2447d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2448d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root.size	bsaes_xts_decrypt,.-bsaes_xts_decrypt
2449d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
2450d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root}
2451d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code.=<<___;
2452d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root#endif
2453d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root___
2454d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2455d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root$code =~ s/\`([^\`]*)\`/eval($1)/gem;
2456d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2457d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootopen SELF,$0;
2458d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootwhile(<SELF>) {
2459d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root	next if (/^#!/);
2460d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root        last if (!s/^#/@/ and !/^$/);
2461d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root        print;
2462d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root}
2463d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootclose SELF;
2464d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2465d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootprint $code;
2466d55031a5e797d10e7106668121d18ef5608aaed9Kenny Root
2467d55031a5e797d10e7106668121d18ef5608aaed9Kenny Rootclose STDOUT;
2468