aes-ppc.pl revision 221304ee937bc0910948a8be1320cb8cc4eb6d36
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# Needs more work: key setup, page boundaries, CBC routine...
11#
12# ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
13# 128-bit key, which is ~40% better than 64-bit code generated by gcc
14# 4.0. But these are not the ones currently used! Their "compact"
15# counterparts are, for security reason. ppc_AES_encrypt_compact runs
16# at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
17# at 1/3 of ppc_AES_decrypt.
18
19$flavour = shift;
20
21if ($flavour =~ /64/) {
22	$SIZE_T	=8;
23	$STU	="stdu";
24	$POP	="ld";
25	$PUSH	="std";
26} elsif ($flavour =~ /32/) {
27	$SIZE_T	=4;
28	$STU	="stwu";
29	$POP	="lwz";
30	$PUSH	="stw";
31} else { die "nonsense $flavour"; }
32
33$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
34( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
35( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
36die "can't locate ppc-xlate.pl";
37
38open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
39
40$FRAME=32*$SIZE_T;
41
42sub _data_word()
43{ my $i;
44    while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
45}
46
47$sp="r1";
48$toc="r2";
49$inp="r3";
50$out="r4";
51$key="r5";
52
53$Tbl0="r3";
54$Tbl1="r6";
55$Tbl2="r7";
56$Tbl3="r2";
57
58$s0="r8";
59$s1="r9";
60$s2="r10";
61$s3="r11";
62
63$t0="r12";
64$t1="r13";
65$t2="r14";
66$t3="r15";
67
68$acc00="r16";
69$acc01="r17";
70$acc02="r18";
71$acc03="r19";
72
73$acc04="r20";
74$acc05="r21";
75$acc06="r22";
76$acc07="r23";
77
78$acc08="r24";
79$acc09="r25";
80$acc10="r26";
81$acc11="r27";
82
83$acc12="r28";
84$acc13="r29";
85$acc14="r30";
86$acc15="r31";
87
88# stay away from TLS pointer
89if ($SIZE_T==8)	{ die if ($t1 ne "r13");  $t1="r0";		}
90else		{ die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0";	}
91$mask80=$Tbl2;
92$mask1b=$Tbl3;
93
94$code.=<<___;
95.machine	"any"
96.text
97
98.align	7
99LAES_Te:
100	mflr	r0
101	bcl	20,31,\$+4
102	mflr	$Tbl0	;    vvvvv "distance" between . and 1st data entry
103	addi	$Tbl0,$Tbl0,`128-8`
104	mtlr	r0
105	blr
106	.space	`32-24`
107LAES_Td:
108	mflr	r0
109	bcl	20,31,\$+4
110	mflr	$Tbl0	;    vvvvvvvv "distance" between . and 1st data entry
111	addi	$Tbl0,$Tbl0,`128-8-32+2048+256`
112	mtlr	r0
113	blr
114	.space	`128-32-24`
115___
116&_data_word(
117	0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
118	0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
119	0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
120	0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
121	0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
122	0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
123	0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
124	0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
125	0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
126	0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
127	0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
128	0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
129	0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
130	0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
131	0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
132	0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
133	0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
134	0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
135	0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
136	0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
137	0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
138	0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
139	0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
140	0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
141	0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
142	0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
143	0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
144	0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
145	0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
146	0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
147	0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
148	0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
149	0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
150	0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
151	0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
152	0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
153	0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
154	0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
155	0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
156	0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
157	0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
158	0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
159	0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
160	0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
161	0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
162	0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
163	0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
164	0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
165	0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
166	0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
167	0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
168	0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
169	0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
170	0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
171	0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
172	0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
173	0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
174	0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
175	0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
176	0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
177	0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
178	0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
179	0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
180	0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
181$code.=<<___;
182.byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
183.byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
184.byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
185.byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
186.byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
187.byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
188.byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
189.byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
190.byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
191.byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
192.byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
193.byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
194.byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
195.byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
196.byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
197.byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
198.byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
199.byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
200.byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
201.byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
202.byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
203.byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
204.byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
205.byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
206.byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
207.byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
208.byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
209.byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
210.byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
211.byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
212.byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
213.byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
214___
215&_data_word(
216	0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
217	0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
218	0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
219	0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
220	0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
221	0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
222	0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
223	0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
224	0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
225	0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
226	0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
227	0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
228	0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
229	0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
230	0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
231	0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
232	0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
233	0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
234	0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
235	0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
236	0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
237	0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
238	0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
239	0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
240	0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
241	0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
242	0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
243	0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
244	0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
245	0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
246	0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
247	0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
248	0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
249	0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
250	0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
251	0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
252	0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
253	0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
254	0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
255	0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
256	0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
257	0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
258	0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
259	0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
260	0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
261	0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
262	0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
263	0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
264	0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
265	0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
266	0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
267	0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
268	0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
269	0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
270	0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
271	0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
272	0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
273	0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
274	0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
275	0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
276	0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
277	0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
278	0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
279	0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
280$code.=<<___;
281.byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
282.byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
283.byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
284.byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
285.byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
286.byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
287.byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
288.byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
289.byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
290.byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
291.byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
292.byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
293.byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
294.byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
295.byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
296.byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
297.byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
298.byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
299.byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
300.byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
301.byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
302.byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
303.byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
304.byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
305.byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
306.byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
307.byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
308.byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
309.byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
310.byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
311.byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
312.byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
313
314
315.globl	.AES_encrypt
316.align	7
317.AES_encrypt:
318	mflr	r0
319	$STU	$sp,-$FRAME($sp)
320
321	$PUSH	r0,`$FRAME-$SIZE_T*21`($sp)
322	$PUSH	$toc,`$FRAME-$SIZE_T*20`($sp)
323	$PUSH	r13,`$FRAME-$SIZE_T*19`($sp)
324	$PUSH	r14,`$FRAME-$SIZE_T*18`($sp)
325	$PUSH	r15,`$FRAME-$SIZE_T*17`($sp)
326	$PUSH	r16,`$FRAME-$SIZE_T*16`($sp)
327	$PUSH	r17,`$FRAME-$SIZE_T*15`($sp)
328	$PUSH	r18,`$FRAME-$SIZE_T*14`($sp)
329	$PUSH	r19,`$FRAME-$SIZE_T*13`($sp)
330	$PUSH	r20,`$FRAME-$SIZE_T*12`($sp)
331	$PUSH	r21,`$FRAME-$SIZE_T*11`($sp)
332	$PUSH	r22,`$FRAME-$SIZE_T*10`($sp)
333	$PUSH	r23,`$FRAME-$SIZE_T*9`($sp)
334	$PUSH	r24,`$FRAME-$SIZE_T*8`($sp)
335	$PUSH	r25,`$FRAME-$SIZE_T*7`($sp)
336	$PUSH	r26,`$FRAME-$SIZE_T*6`($sp)
337	$PUSH	r27,`$FRAME-$SIZE_T*5`($sp)
338	$PUSH	r28,`$FRAME-$SIZE_T*4`($sp)
339	$PUSH	r29,`$FRAME-$SIZE_T*3`($sp)
340	$PUSH	r30,`$FRAME-$SIZE_T*2`($sp)
341	$PUSH	r31,`$FRAME-$SIZE_T*1`($sp)
342
343	lwz	$s0,0($inp)
344	lwz	$s1,4($inp)
345	lwz	$s2,8($inp)
346	lwz	$s3,12($inp)
347	bl	LAES_Te
348	bl	Lppc_AES_encrypt_compact
349	stw	$s0,0($out)
350	stw	$s1,4($out)
351	stw	$s2,8($out)
352	stw	$s3,12($out)
353
354	$POP	r0,`$FRAME-$SIZE_T*21`($sp)
355	$POP	$toc,`$FRAME-$SIZE_T*20`($sp)
356	$POP	r13,`$FRAME-$SIZE_T*19`($sp)
357	$POP	r14,`$FRAME-$SIZE_T*18`($sp)
358	$POP	r15,`$FRAME-$SIZE_T*17`($sp)
359	$POP	r16,`$FRAME-$SIZE_T*16`($sp)
360	$POP	r17,`$FRAME-$SIZE_T*15`($sp)
361	$POP	r18,`$FRAME-$SIZE_T*14`($sp)
362	$POP	r19,`$FRAME-$SIZE_T*13`($sp)
363	$POP	r20,`$FRAME-$SIZE_T*12`($sp)
364	$POP	r21,`$FRAME-$SIZE_T*11`($sp)
365	$POP	r22,`$FRAME-$SIZE_T*10`($sp)
366	$POP	r23,`$FRAME-$SIZE_T*9`($sp)
367	$POP	r24,`$FRAME-$SIZE_T*8`($sp)
368	$POP	r25,`$FRAME-$SIZE_T*7`($sp)
369	$POP	r26,`$FRAME-$SIZE_T*6`($sp)
370	$POP	r27,`$FRAME-$SIZE_T*5`($sp)
371	$POP	r28,`$FRAME-$SIZE_T*4`($sp)
372	$POP	r29,`$FRAME-$SIZE_T*3`($sp)
373	$POP	r30,`$FRAME-$SIZE_T*2`($sp)
374	$POP	r31,`$FRAME-$SIZE_T*1`($sp)
375	mtlr	r0
376	addi	$sp,$sp,$FRAME
377	blr
378
379.align	4
380Lppc_AES_encrypt:
381	lwz	$acc00,240($key)
382	lwz	$t0,0($key)
383	lwz	$t1,4($key)
384	lwz	$t2,8($key)
385	lwz	$t3,12($key)
386	addi	$Tbl1,$Tbl0,3
387	addi	$Tbl2,$Tbl0,2
388	addi	$Tbl3,$Tbl0,1
389	addi	$acc00,$acc00,-1
390	addi	$key,$key,16
391	xor	$s0,$s0,$t0
392	xor	$s1,$s1,$t1
393	xor	$s2,$s2,$t2
394	xor	$s3,$s3,$t3
395	mtctr	$acc00
396.align	4
397Lenc_loop:
398	rlwinm	$acc00,$s0,`32-24+3`,21,28
399	rlwinm	$acc01,$s1,`32-24+3`,21,28
400	lwz	$t0,0($key)
401	lwz	$t1,4($key)
402	rlwinm	$acc02,$s2,`32-24+3`,21,28
403	rlwinm	$acc03,$s3,`32-24+3`,21,28
404	lwz	$t2,8($key)
405	lwz	$t3,12($key)
406	rlwinm	$acc04,$s1,`32-16+3`,21,28
407	rlwinm	$acc05,$s2,`32-16+3`,21,28
408	lwzx	$acc00,$Tbl0,$acc00
409	lwzx	$acc01,$Tbl0,$acc01
410	rlwinm	$acc06,$s3,`32-16+3`,21,28
411	rlwinm	$acc07,$s0,`32-16+3`,21,28
412	lwzx	$acc02,$Tbl0,$acc02
413	lwzx	$acc03,$Tbl0,$acc03
414	rlwinm	$acc08,$s2,`32-8+3`,21,28
415	rlwinm	$acc09,$s3,`32-8+3`,21,28
416	lwzx	$acc04,$Tbl1,$acc04
417	lwzx	$acc05,$Tbl1,$acc05
418	rlwinm	$acc10,$s0,`32-8+3`,21,28
419	rlwinm	$acc11,$s1,`32-8+3`,21,28
420	lwzx	$acc06,$Tbl1,$acc06
421	lwzx	$acc07,$Tbl1,$acc07
422	rlwinm	$acc12,$s3,`0+3`,21,28
423	rlwinm	$acc13,$s0,`0+3`,21,28
424	lwzx	$acc08,$Tbl2,$acc08
425	lwzx	$acc09,$Tbl2,$acc09
426	rlwinm	$acc14,$s1,`0+3`,21,28
427	rlwinm	$acc15,$s2,`0+3`,21,28
428	lwzx	$acc10,$Tbl2,$acc10
429	lwzx	$acc11,$Tbl2,$acc11
430	xor	$t0,$t0,$acc00
431	xor	$t1,$t1,$acc01
432	lwzx	$acc12,$Tbl3,$acc12
433	lwzx	$acc13,$Tbl3,$acc13
434	xor	$t2,$t2,$acc02
435	xor	$t3,$t3,$acc03
436	lwzx	$acc14,$Tbl3,$acc14
437	lwzx	$acc15,$Tbl3,$acc15
438	xor	$t0,$t0,$acc04
439	xor	$t1,$t1,$acc05
440	xor	$t2,$t2,$acc06
441	xor	$t3,$t3,$acc07
442	xor	$t0,$t0,$acc08
443	xor	$t1,$t1,$acc09
444	xor	$t2,$t2,$acc10
445	xor	$t3,$t3,$acc11
446	xor	$s0,$t0,$acc12
447	xor	$s1,$t1,$acc13
448	xor	$s2,$t2,$acc14
449	xor	$s3,$t3,$acc15
450	addi	$key,$key,16
451	bdnz-	Lenc_loop
452
453	addi	$Tbl2,$Tbl0,2048
454	nop
455	lwz	$acc08,`2048+0`($Tbl0)	! prefetch Te4
456	lwz	$acc09,`2048+32`($Tbl0)
457	lwz	$acc10,`2048+64`($Tbl0)
458	lwz	$acc11,`2048+96`($Tbl0)
459	lwz	$acc08,`2048+128`($Tbl0)
460	lwz	$acc09,`2048+160`($Tbl0)
461	lwz	$acc10,`2048+192`($Tbl0)
462	lwz	$acc11,`2048+224`($Tbl0)
463	rlwinm	$acc00,$s0,`32-24`,24,31
464	rlwinm	$acc01,$s1,`32-24`,24,31
465	lwz	$t0,0($key)
466	lwz	$t1,4($key)
467	rlwinm	$acc02,$s2,`32-24`,24,31
468	rlwinm	$acc03,$s3,`32-24`,24,31
469	lwz	$t2,8($key)
470	lwz	$t3,12($key)
471	rlwinm	$acc04,$s1,`32-16`,24,31
472	rlwinm	$acc05,$s2,`32-16`,24,31
473	lbzx	$acc00,$Tbl2,$acc00
474	lbzx	$acc01,$Tbl2,$acc01
475	rlwinm	$acc06,$s3,`32-16`,24,31
476	rlwinm	$acc07,$s0,`32-16`,24,31
477	lbzx	$acc02,$Tbl2,$acc02
478	lbzx	$acc03,$Tbl2,$acc03
479	rlwinm	$acc08,$s2,`32-8`,24,31
480	rlwinm	$acc09,$s3,`32-8`,24,31
481	lbzx	$acc04,$Tbl2,$acc04
482	lbzx	$acc05,$Tbl2,$acc05
483	rlwinm	$acc10,$s0,`32-8`,24,31
484	rlwinm	$acc11,$s1,`32-8`,24,31
485	lbzx	$acc06,$Tbl2,$acc06
486	lbzx	$acc07,$Tbl2,$acc07
487	rlwinm	$acc12,$s3,`0`,24,31
488	rlwinm	$acc13,$s0,`0`,24,31
489	lbzx	$acc08,$Tbl2,$acc08
490	lbzx	$acc09,$Tbl2,$acc09
491	rlwinm	$acc14,$s1,`0`,24,31
492	rlwinm	$acc15,$s2,`0`,24,31
493	lbzx	$acc10,$Tbl2,$acc10
494	lbzx	$acc11,$Tbl2,$acc11
495	rlwinm	$s0,$acc00,24,0,7
496	rlwinm	$s1,$acc01,24,0,7
497	lbzx	$acc12,$Tbl2,$acc12
498	lbzx	$acc13,$Tbl2,$acc13
499	rlwinm	$s2,$acc02,24,0,7
500	rlwinm	$s3,$acc03,24,0,7
501	lbzx	$acc14,$Tbl2,$acc14
502	lbzx	$acc15,$Tbl2,$acc15
503	rlwimi	$s0,$acc04,16,8,15
504	rlwimi	$s1,$acc05,16,8,15
505	rlwimi	$s2,$acc06,16,8,15
506	rlwimi	$s3,$acc07,16,8,15
507	rlwimi	$s0,$acc08,8,16,23
508	rlwimi	$s1,$acc09,8,16,23
509	rlwimi	$s2,$acc10,8,16,23
510	rlwimi	$s3,$acc11,8,16,23
511	or	$s0,$s0,$acc12
512	or	$s1,$s1,$acc13
513	or	$s2,$s2,$acc14
514	or	$s3,$s3,$acc15
515	xor	$s0,$s0,$t0
516	xor	$s1,$s1,$t1
517	xor	$s2,$s2,$t2
518	xor	$s3,$s3,$t3
519	blr
520
521.align	4
522Lppc_AES_encrypt_compact:
523	lwz	$acc00,240($key)
524	lwz	$t0,0($key)
525	lwz	$t1,4($key)
526	lwz	$t2,8($key)
527	lwz	$t3,12($key)
528	addi	$Tbl1,$Tbl0,2048
529	lis	$mask80,0x8080
530	lis	$mask1b,0x1b1b
531	addi	$key,$key,16
532	ori	$mask80,$mask80,0x8080
533	ori	$mask1b,$mask1b,0x1b1b
534	mtctr	$acc00
535.align	4
536Lenc_compact_loop:
537	xor	$s0,$s0,$t0
538	xor	$s1,$s1,$t1
539	xor	$s2,$s2,$t2
540	xor	$s3,$s3,$t3
541	rlwinm	$acc00,$s0,`32-24`,24,31
542	rlwinm	$acc01,$s1,`32-24`,24,31
543	rlwinm	$acc02,$s2,`32-24`,24,31
544	rlwinm	$acc03,$s3,`32-24`,24,31
545	lbzx	$acc00,$Tbl1,$acc00
546	lbzx	$acc01,$Tbl1,$acc01
547	rlwinm	$acc04,$s1,`32-16`,24,31
548	rlwinm	$acc05,$s2,`32-16`,24,31
549	lbzx	$acc02,$Tbl1,$acc02
550	lbzx	$acc03,$Tbl1,$acc03
551	rlwinm	$acc06,$s3,`32-16`,24,31
552	rlwinm	$acc07,$s0,`32-16`,24,31
553	lbzx	$acc04,$Tbl1,$acc04
554	lbzx	$acc05,$Tbl1,$acc05
555	rlwinm	$acc08,$s2,`32-8`,24,31
556	rlwinm	$acc09,$s3,`32-8`,24,31
557	lbzx	$acc06,$Tbl1,$acc06
558	lbzx	$acc07,$Tbl1,$acc07
559	rlwinm	$acc10,$s0,`32-8`,24,31
560	rlwinm	$acc11,$s1,`32-8`,24,31
561	lbzx	$acc08,$Tbl1,$acc08
562	lbzx	$acc09,$Tbl1,$acc09
563	rlwinm	$acc12,$s3,`0`,24,31
564	rlwinm	$acc13,$s0,`0`,24,31
565	lbzx	$acc10,$Tbl1,$acc10
566	lbzx	$acc11,$Tbl1,$acc11
567	rlwinm	$acc14,$s1,`0`,24,31
568	rlwinm	$acc15,$s2,`0`,24,31
569	lbzx	$acc12,$Tbl1,$acc12
570	lbzx	$acc13,$Tbl1,$acc13
571	rlwinm	$s0,$acc00,24,0,7
572	rlwinm	$s1,$acc01,24,0,7
573	lbzx	$acc14,$Tbl1,$acc14
574	lbzx	$acc15,$Tbl1,$acc15
575	rlwinm	$s2,$acc02,24,0,7
576	rlwinm	$s3,$acc03,24,0,7
577	rlwimi	$s0,$acc04,16,8,15
578	rlwimi	$s1,$acc05,16,8,15
579	rlwimi	$s2,$acc06,16,8,15
580	rlwimi	$s3,$acc07,16,8,15
581	rlwimi	$s0,$acc08,8,16,23
582	rlwimi	$s1,$acc09,8,16,23
583	rlwimi	$s2,$acc10,8,16,23
584	rlwimi	$s3,$acc11,8,16,23
585	lwz	$t0,0($key)
586	lwz	$t1,4($key)
587	or	$s0,$s0,$acc12
588	or	$s1,$s1,$acc13
589	lwz	$t2,8($key)
590	lwz	$t3,12($key)
591	or	$s2,$s2,$acc14
592	or	$s3,$s3,$acc15
593
594	addi	$key,$key,16
595	bdz	Lenc_compact_done
596
597	and	$acc00,$s0,$mask80	# r1=r0&0x80808080
598	and	$acc01,$s1,$mask80
599	and	$acc02,$s2,$mask80
600	and	$acc03,$s3,$mask80
601	srwi	$acc04,$acc00,7		# r1>>7
602	srwi	$acc05,$acc01,7
603	srwi	$acc06,$acc02,7
604	srwi	$acc07,$acc03,7
605	andc	$acc08,$s0,$mask80	# r0&0x7f7f7f7f
606	andc	$acc09,$s1,$mask80
607	andc	$acc10,$s2,$mask80
608	andc	$acc11,$s3,$mask80
609	sub	$acc00,$acc00,$acc04	# r1-(r1>>7)
610	sub	$acc01,$acc01,$acc05
611	sub	$acc02,$acc02,$acc06
612	sub	$acc03,$acc03,$acc07
613	add	$acc08,$acc08,$acc08	# (r0&0x7f7f7f7f)<<1
614	add	$acc09,$acc09,$acc09
615	add	$acc10,$acc10,$acc10
616	add	$acc11,$acc11,$acc11
617	and	$acc00,$acc00,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
618	and	$acc01,$acc01,$mask1b
619	and	$acc02,$acc02,$mask1b
620	and	$acc03,$acc03,$mask1b
621	xor	$acc00,$acc00,$acc08	# r2
622	xor	$acc01,$acc01,$acc09
623	xor	$acc02,$acc02,$acc10
624	xor	$acc03,$acc03,$acc11
625
626	rotlwi	$acc12,$s0,16		# ROTATE(r0,16)
627	rotlwi	$acc13,$s1,16
628	rotlwi	$acc14,$s2,16
629	rotlwi	$acc15,$s3,16
630	xor	$s0,$s0,$acc00		# r0^r2
631	xor	$s1,$s1,$acc01
632	xor	$s2,$s2,$acc02
633	xor	$s3,$s3,$acc03
634	rotrwi	$s0,$s0,24		# ROTATE(r2^r0,24)
635	rotrwi	$s1,$s1,24
636	rotrwi	$s2,$s2,24
637	rotrwi	$s3,$s3,24
638	xor	$s0,$s0,$acc00		# ROTATE(r2^r0,24)^r2
639	xor	$s1,$s1,$acc01
640	xor	$s2,$s2,$acc02
641	xor	$s3,$s3,$acc03
642	rotlwi	$acc08,$acc12,8		# ROTATE(r0,24)
643	rotlwi	$acc09,$acc13,8
644	rotlwi	$acc10,$acc14,8
645	rotlwi	$acc11,$acc15,8
646	xor	$s0,$s0,$acc12		#
647	xor	$s1,$s1,$acc13
648	xor	$s2,$s2,$acc14
649	xor	$s3,$s3,$acc15
650	xor	$s0,$s0,$acc08		#
651	xor	$s1,$s1,$acc09
652	xor	$s2,$s2,$acc10
653	xor	$s3,$s3,$acc11
654
655	b	Lenc_compact_loop
656.align	4
657Lenc_compact_done:
658	xor	$s0,$s0,$t0
659	xor	$s1,$s1,$t1
660	xor	$s2,$s2,$t2
661	xor	$s3,$s3,$t3
662	blr
663
664.globl	.AES_decrypt
665.align	7
666.AES_decrypt:
667	mflr	r0
668	$STU	$sp,-$FRAME($sp)
669
670	$PUSH	r0,`$FRAME-$SIZE_T*21`($sp)
671	$PUSH	$toc,`$FRAME-$SIZE_T*20`($sp)
672	$PUSH	r13,`$FRAME-$SIZE_T*19`($sp)
673	$PUSH	r14,`$FRAME-$SIZE_T*18`($sp)
674	$PUSH	r15,`$FRAME-$SIZE_T*17`($sp)
675	$PUSH	r16,`$FRAME-$SIZE_T*16`($sp)
676	$PUSH	r17,`$FRAME-$SIZE_T*15`($sp)
677	$PUSH	r18,`$FRAME-$SIZE_T*14`($sp)
678	$PUSH	r19,`$FRAME-$SIZE_T*13`($sp)
679	$PUSH	r20,`$FRAME-$SIZE_T*12`($sp)
680	$PUSH	r21,`$FRAME-$SIZE_T*11`($sp)
681	$PUSH	r22,`$FRAME-$SIZE_T*10`($sp)
682	$PUSH	r23,`$FRAME-$SIZE_T*9`($sp)
683	$PUSH	r24,`$FRAME-$SIZE_T*8`($sp)
684	$PUSH	r25,`$FRAME-$SIZE_T*7`($sp)
685	$PUSH	r26,`$FRAME-$SIZE_T*6`($sp)
686	$PUSH	r27,`$FRAME-$SIZE_T*5`($sp)
687	$PUSH	r28,`$FRAME-$SIZE_T*4`($sp)
688	$PUSH	r29,`$FRAME-$SIZE_T*3`($sp)
689	$PUSH	r30,`$FRAME-$SIZE_T*2`($sp)
690	$PUSH	r31,`$FRAME-$SIZE_T*1`($sp)
691
692	lwz	$s0,0($inp)
693	lwz	$s1,4($inp)
694	lwz	$s2,8($inp)
695	lwz	$s3,12($inp)
696	bl	LAES_Td
697	bl	Lppc_AES_decrypt_compact
698	stw	$s0,0($out)
699	stw	$s1,4($out)
700	stw	$s2,8($out)
701	stw	$s3,12($out)
702
703	$POP	r0,`$FRAME-$SIZE_T*21`($sp)
704	$POP	$toc,`$FRAME-$SIZE_T*20`($sp)
705	$POP	r13,`$FRAME-$SIZE_T*19`($sp)
706	$POP	r14,`$FRAME-$SIZE_T*18`($sp)
707	$POP	r15,`$FRAME-$SIZE_T*17`($sp)
708	$POP	r16,`$FRAME-$SIZE_T*16`($sp)
709	$POP	r17,`$FRAME-$SIZE_T*15`($sp)
710	$POP	r18,`$FRAME-$SIZE_T*14`($sp)
711	$POP	r19,`$FRAME-$SIZE_T*13`($sp)
712	$POP	r20,`$FRAME-$SIZE_T*12`($sp)
713	$POP	r21,`$FRAME-$SIZE_T*11`($sp)
714	$POP	r22,`$FRAME-$SIZE_T*10`($sp)
715	$POP	r23,`$FRAME-$SIZE_T*9`($sp)
716	$POP	r24,`$FRAME-$SIZE_T*8`($sp)
717	$POP	r25,`$FRAME-$SIZE_T*7`($sp)
718	$POP	r26,`$FRAME-$SIZE_T*6`($sp)
719	$POP	r27,`$FRAME-$SIZE_T*5`($sp)
720	$POP	r28,`$FRAME-$SIZE_T*4`($sp)
721	$POP	r29,`$FRAME-$SIZE_T*3`($sp)
722	$POP	r30,`$FRAME-$SIZE_T*2`($sp)
723	$POP	r31,`$FRAME-$SIZE_T*1`($sp)
724	mtlr	r0
725	addi	$sp,$sp,$FRAME
726	blr
727
728.align	4
729Lppc_AES_decrypt:
730	lwz	$acc00,240($key)
731	lwz	$t0,0($key)
732	lwz	$t1,4($key)
733	lwz	$t2,8($key)
734	lwz	$t3,12($key)
735	addi	$Tbl1,$Tbl0,3
736	addi	$Tbl2,$Tbl0,2
737	addi	$Tbl3,$Tbl0,1
738	addi	$acc00,$acc00,-1
739	addi	$key,$key,16
740	xor	$s0,$s0,$t0
741	xor	$s1,$s1,$t1
742	xor	$s2,$s2,$t2
743	xor	$s3,$s3,$t3
744	mtctr	$acc00
745.align	4
746Ldec_loop:
747	rlwinm	$acc00,$s0,`32-24+3`,21,28
748	rlwinm	$acc01,$s1,`32-24+3`,21,28
749	lwz	$t0,0($key)
750	lwz	$t1,4($key)
751	rlwinm	$acc02,$s2,`32-24+3`,21,28
752	rlwinm	$acc03,$s3,`32-24+3`,21,28
753	lwz	$t2,8($key)
754	lwz	$t3,12($key)
755	rlwinm	$acc04,$s3,`32-16+3`,21,28
756	rlwinm	$acc05,$s0,`32-16+3`,21,28
757	lwzx	$acc00,$Tbl0,$acc00
758	lwzx	$acc01,$Tbl0,$acc01
759	rlwinm	$acc06,$s1,`32-16+3`,21,28
760	rlwinm	$acc07,$s2,`32-16+3`,21,28
761	lwzx	$acc02,$Tbl0,$acc02
762	lwzx	$acc03,$Tbl0,$acc03
763	rlwinm	$acc08,$s2,`32-8+3`,21,28
764	rlwinm	$acc09,$s3,`32-8+3`,21,28
765	lwzx	$acc04,$Tbl1,$acc04
766	lwzx	$acc05,$Tbl1,$acc05
767	rlwinm	$acc10,$s0,`32-8+3`,21,28
768	rlwinm	$acc11,$s1,`32-8+3`,21,28
769	lwzx	$acc06,$Tbl1,$acc06
770	lwzx	$acc07,$Tbl1,$acc07
771	rlwinm	$acc12,$s1,`0+3`,21,28
772	rlwinm	$acc13,$s2,`0+3`,21,28
773	lwzx	$acc08,$Tbl2,$acc08
774	lwzx	$acc09,$Tbl2,$acc09
775	rlwinm	$acc14,$s3,`0+3`,21,28
776	rlwinm	$acc15,$s0,`0+3`,21,28
777	lwzx	$acc10,$Tbl2,$acc10
778	lwzx	$acc11,$Tbl2,$acc11
779	xor	$t0,$t0,$acc00
780	xor	$t1,$t1,$acc01
781	lwzx	$acc12,$Tbl3,$acc12
782	lwzx	$acc13,$Tbl3,$acc13
783	xor	$t2,$t2,$acc02
784	xor	$t3,$t3,$acc03
785	lwzx	$acc14,$Tbl3,$acc14
786	lwzx	$acc15,$Tbl3,$acc15
787	xor	$t0,$t0,$acc04
788	xor	$t1,$t1,$acc05
789	xor	$t2,$t2,$acc06
790	xor	$t3,$t3,$acc07
791	xor	$t0,$t0,$acc08
792	xor	$t1,$t1,$acc09
793	xor	$t2,$t2,$acc10
794	xor	$t3,$t3,$acc11
795	xor	$s0,$t0,$acc12
796	xor	$s1,$t1,$acc13
797	xor	$s2,$t2,$acc14
798	xor	$s3,$t3,$acc15
799	addi	$key,$key,16
800	bdnz-	Ldec_loop
801
802	addi	$Tbl2,$Tbl0,2048
803	nop
804	lwz	$acc08,`2048+0`($Tbl0)	! prefetch Td4
805	lwz	$acc09,`2048+32`($Tbl0)
806	lwz	$acc10,`2048+64`($Tbl0)
807	lwz	$acc11,`2048+96`($Tbl0)
808	lwz	$acc08,`2048+128`($Tbl0)
809	lwz	$acc09,`2048+160`($Tbl0)
810	lwz	$acc10,`2048+192`($Tbl0)
811	lwz	$acc11,`2048+224`($Tbl0)
812	rlwinm	$acc00,$s0,`32-24`,24,31
813	rlwinm	$acc01,$s1,`32-24`,24,31
814	lwz	$t0,0($key)
815	lwz	$t1,4($key)
816	rlwinm	$acc02,$s2,`32-24`,24,31
817	rlwinm	$acc03,$s3,`32-24`,24,31
818	lwz	$t2,8($key)
819	lwz	$t3,12($key)
820	rlwinm	$acc04,$s3,`32-16`,24,31
821	rlwinm	$acc05,$s0,`32-16`,24,31
822	lbzx	$acc00,$Tbl2,$acc00
823	lbzx	$acc01,$Tbl2,$acc01
824	rlwinm	$acc06,$s1,`32-16`,24,31
825	rlwinm	$acc07,$s2,`32-16`,24,31
826	lbzx	$acc02,$Tbl2,$acc02
827	lbzx	$acc03,$Tbl2,$acc03
828	rlwinm	$acc08,$s2,`32-8`,24,31
829	rlwinm	$acc09,$s3,`32-8`,24,31
830	lbzx	$acc04,$Tbl2,$acc04
831	lbzx	$acc05,$Tbl2,$acc05
832	rlwinm	$acc10,$s0,`32-8`,24,31
833	rlwinm	$acc11,$s1,`32-8`,24,31
834	lbzx	$acc06,$Tbl2,$acc06
835	lbzx	$acc07,$Tbl2,$acc07
836	rlwinm	$acc12,$s1,`0`,24,31
837	rlwinm	$acc13,$s2,`0`,24,31
838	lbzx	$acc08,$Tbl2,$acc08
839	lbzx	$acc09,$Tbl2,$acc09
840	rlwinm	$acc14,$s3,`0`,24,31
841	rlwinm	$acc15,$s0,`0`,24,31
842	lbzx	$acc10,$Tbl2,$acc10
843	lbzx	$acc11,$Tbl2,$acc11
844	rlwinm	$s0,$acc00,24,0,7
845	rlwinm	$s1,$acc01,24,0,7
846	lbzx	$acc12,$Tbl2,$acc12
847	lbzx	$acc13,$Tbl2,$acc13
848	rlwinm	$s2,$acc02,24,0,7
849	rlwinm	$s3,$acc03,24,0,7
850	lbzx	$acc14,$Tbl2,$acc14
851	lbzx	$acc15,$Tbl2,$acc15
852	rlwimi	$s0,$acc04,16,8,15
853	rlwimi	$s1,$acc05,16,8,15
854	rlwimi	$s2,$acc06,16,8,15
855	rlwimi	$s3,$acc07,16,8,15
856	rlwimi	$s0,$acc08,8,16,23
857	rlwimi	$s1,$acc09,8,16,23
858	rlwimi	$s2,$acc10,8,16,23
859	rlwimi	$s3,$acc11,8,16,23
860	or	$s0,$s0,$acc12
861	or	$s1,$s1,$acc13
862	or	$s2,$s2,$acc14
863	or	$s3,$s3,$acc15
864	xor	$s0,$s0,$t0
865	xor	$s1,$s1,$t1
866	xor	$s2,$s2,$t2
867	xor	$s3,$s3,$t3
868	blr
869
870.align	4
871Lppc_AES_decrypt_compact:
872	lwz	$acc00,240($key)
873	lwz	$t0,0($key)
874	lwz	$t1,4($key)
875	lwz	$t2,8($key)
876	lwz	$t3,12($key)
877	addi	$Tbl1,$Tbl0,2048
878	lis	$mask80,0x8080
879	lis	$mask1b,0x1b1b
880	addi	$key,$key,16
881	ori	$mask80,$mask80,0x8080
882	ori	$mask1b,$mask1b,0x1b1b
883___
884$code.=<<___ if ($SIZE_T==8);
885	insrdi	$mask80,$mask80,32,0
886	insrdi	$mask1b,$mask1b,32,0
887___
888$code.=<<___;
889	mtctr	$acc00
890.align	4
891Ldec_compact_loop:
892	xor	$s0,$s0,$t0
893	xor	$s1,$s1,$t1
894	xor	$s2,$s2,$t2
895	xor	$s3,$s3,$t3
896	rlwinm	$acc00,$s0,`32-24`,24,31
897	rlwinm	$acc01,$s1,`32-24`,24,31
898	rlwinm	$acc02,$s2,`32-24`,24,31
899	rlwinm	$acc03,$s3,`32-24`,24,31
900	lbzx	$acc00,$Tbl1,$acc00
901	lbzx	$acc01,$Tbl1,$acc01
902	rlwinm	$acc04,$s3,`32-16`,24,31
903	rlwinm	$acc05,$s0,`32-16`,24,31
904	lbzx	$acc02,$Tbl1,$acc02
905	lbzx	$acc03,$Tbl1,$acc03
906	rlwinm	$acc06,$s1,`32-16`,24,31
907	rlwinm	$acc07,$s2,`32-16`,24,31
908	lbzx	$acc04,$Tbl1,$acc04
909	lbzx	$acc05,$Tbl1,$acc05
910	rlwinm	$acc08,$s2,`32-8`,24,31
911	rlwinm	$acc09,$s3,`32-8`,24,31
912	lbzx	$acc06,$Tbl1,$acc06
913	lbzx	$acc07,$Tbl1,$acc07
914	rlwinm	$acc10,$s0,`32-8`,24,31
915	rlwinm	$acc11,$s1,`32-8`,24,31
916	lbzx	$acc08,$Tbl1,$acc08
917	lbzx	$acc09,$Tbl1,$acc09
918	rlwinm	$acc12,$s1,`0`,24,31
919	rlwinm	$acc13,$s2,`0`,24,31
920	lbzx	$acc10,$Tbl1,$acc10
921	lbzx	$acc11,$Tbl1,$acc11
922	rlwinm	$acc14,$s3,`0`,24,31
923	rlwinm	$acc15,$s0,`0`,24,31
924	lbzx	$acc12,$Tbl1,$acc12
925	lbzx	$acc13,$Tbl1,$acc13
926	rlwinm	$s0,$acc00,24,0,7
927	rlwinm	$s1,$acc01,24,0,7
928	lbzx	$acc14,$Tbl1,$acc14
929	lbzx	$acc15,$Tbl1,$acc15
930	rlwinm	$s2,$acc02,24,0,7
931	rlwinm	$s3,$acc03,24,0,7
932	rlwimi	$s0,$acc04,16,8,15
933	rlwimi	$s1,$acc05,16,8,15
934	rlwimi	$s2,$acc06,16,8,15
935	rlwimi	$s3,$acc07,16,8,15
936	rlwimi	$s0,$acc08,8,16,23
937	rlwimi	$s1,$acc09,8,16,23
938	rlwimi	$s2,$acc10,8,16,23
939	rlwimi	$s3,$acc11,8,16,23
940	lwz	$t0,0($key)
941	lwz	$t1,4($key)
942	or	$s0,$s0,$acc12
943	or	$s1,$s1,$acc13
944	lwz	$t2,8($key)
945	lwz	$t3,12($key)
946	or	$s2,$s2,$acc14
947	or	$s3,$s3,$acc15
948
949	addi	$key,$key,16
950	bdz	Ldec_compact_done
951___
952$code.=<<___ if ($SIZE_T==8);
953	# vectorized permutation improves decrypt performance by 10%
954	insrdi	$s0,$s1,32,0
955	insrdi	$s2,$s3,32,0
956
957	and	$acc00,$s0,$mask80	# r1=r0&0x80808080
958	and	$acc02,$s2,$mask80
959	srdi	$acc04,$acc00,7		# r1>>7
960	srdi	$acc06,$acc02,7
961	andc	$acc08,$s0,$mask80	# r0&0x7f7f7f7f
962	andc	$acc10,$s2,$mask80
963	sub	$acc00,$acc00,$acc04	# r1-(r1>>7)
964	sub	$acc02,$acc02,$acc06
965	add	$acc08,$acc08,$acc08	# (r0&0x7f7f7f7f)<<1
966	add	$acc10,$acc10,$acc10
967	and	$acc00,$acc00,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
968	and	$acc02,$acc02,$mask1b
969	xor	$acc00,$acc00,$acc08	# r2
970	xor	$acc02,$acc02,$acc10
971
972	and	$acc04,$acc00,$mask80	# r1=r2&0x80808080
973	and	$acc06,$acc02,$mask80
974	srdi	$acc08,$acc04,7		# r1>>7
975	srdi	$acc10,$acc06,7
976	andc	$acc12,$acc00,$mask80	# r2&0x7f7f7f7f
977	andc	$acc14,$acc02,$mask80
978	sub	$acc04,$acc04,$acc08	# r1-(r1>>7)
979	sub	$acc06,$acc06,$acc10
980	add	$acc12,$acc12,$acc12	# (r2&0x7f7f7f7f)<<1
981	add	$acc14,$acc14,$acc14
982	and	$acc04,$acc04,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
983	and	$acc06,$acc06,$mask1b
984	xor	$acc04,$acc04,$acc12	# r4
985	xor	$acc06,$acc06,$acc14
986
987	and	$acc08,$acc04,$mask80	# r1=r4&0x80808080
988	and	$acc10,$acc06,$mask80
989	srdi	$acc12,$acc08,7		# r1>>7
990	srdi	$acc14,$acc10,7
991	sub	$acc08,$acc08,$acc12	# r1-(r1>>7)
992	sub	$acc10,$acc10,$acc14
993	andc	$acc12,$acc04,$mask80	# r4&0x7f7f7f7f
994	andc	$acc14,$acc06,$mask80
995	add	$acc12,$acc12,$acc12	# (r4&0x7f7f7f7f)<<1
996	add	$acc14,$acc14,$acc14
997	and	$acc08,$acc08,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
998	and	$acc10,$acc10,$mask1b
999	xor	$acc08,$acc08,$acc12	# r8
1000	xor	$acc10,$acc10,$acc14
1001
1002	xor	$acc00,$acc00,$s0	# r2^r0
1003	xor	$acc02,$acc02,$s2
1004	xor	$acc04,$acc04,$s0	# r4^r0
1005	xor	$acc06,$acc06,$s2
1006
1007	extrdi	$acc01,$acc00,32,0
1008	extrdi	$acc03,$acc02,32,0
1009	extrdi	$acc05,$acc04,32,0
1010	extrdi	$acc07,$acc06,32,0
1011	extrdi	$acc09,$acc08,32,0
1012	extrdi	$acc11,$acc10,32,0
1013___
1014$code.=<<___ if ($SIZE_T==4);
1015	and	$acc00,$s0,$mask80	# r1=r0&0x80808080
1016	and	$acc01,$s1,$mask80
1017	and	$acc02,$s2,$mask80
1018	and	$acc03,$s3,$mask80
1019	srwi	$acc04,$acc00,7		# r1>>7
1020	srwi	$acc05,$acc01,7
1021	srwi	$acc06,$acc02,7
1022	srwi	$acc07,$acc03,7
1023	andc	$acc08,$s0,$mask80	# r0&0x7f7f7f7f
1024	andc	$acc09,$s1,$mask80
1025	andc	$acc10,$s2,$mask80
1026	andc	$acc11,$s3,$mask80
1027	sub	$acc00,$acc00,$acc04	# r1-(r1>>7)
1028	sub	$acc01,$acc01,$acc05
1029	sub	$acc02,$acc02,$acc06
1030	sub	$acc03,$acc03,$acc07
1031	add	$acc08,$acc08,$acc08	# (r0&0x7f7f7f7f)<<1
1032	add	$acc09,$acc09,$acc09
1033	add	$acc10,$acc10,$acc10
1034	add	$acc11,$acc11,$acc11
1035	and	$acc00,$acc00,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
1036	and	$acc01,$acc01,$mask1b
1037	and	$acc02,$acc02,$mask1b
1038	and	$acc03,$acc03,$mask1b
1039	xor	$acc00,$acc00,$acc08	# r2
1040	xor	$acc01,$acc01,$acc09
1041	xor	$acc02,$acc02,$acc10
1042	xor	$acc03,$acc03,$acc11
1043
1044	and	$acc04,$acc00,$mask80	# r1=r2&0x80808080
1045	and	$acc05,$acc01,$mask80
1046	and	$acc06,$acc02,$mask80
1047	and	$acc07,$acc03,$mask80
1048	srwi	$acc08,$acc04,7		# r1>>7
1049	srwi	$acc09,$acc05,7
1050	srwi	$acc10,$acc06,7
1051	srwi	$acc11,$acc07,7
1052	andc	$acc12,$acc00,$mask80	# r2&0x7f7f7f7f
1053	andc	$acc13,$acc01,$mask80
1054	andc	$acc14,$acc02,$mask80
1055	andc	$acc15,$acc03,$mask80
1056	sub	$acc04,$acc04,$acc08	# r1-(r1>>7)
1057	sub	$acc05,$acc05,$acc09
1058	sub	$acc06,$acc06,$acc10
1059	sub	$acc07,$acc07,$acc11
1060	add	$acc12,$acc12,$acc12	# (r2&0x7f7f7f7f)<<1
1061	add	$acc13,$acc13,$acc13
1062	add	$acc14,$acc14,$acc14
1063	add	$acc15,$acc15,$acc15
1064	and	$acc04,$acc04,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
1065	and	$acc05,$acc05,$mask1b
1066	and	$acc06,$acc06,$mask1b
1067	and	$acc07,$acc07,$mask1b
1068	xor	$acc04,$acc04,$acc12	# r4
1069	xor	$acc05,$acc05,$acc13
1070	xor	$acc06,$acc06,$acc14
1071	xor	$acc07,$acc07,$acc15
1072
1073	and	$acc08,$acc04,$mask80	# r1=r4&0x80808080
1074	and	$acc09,$acc05,$mask80
1075	and	$acc10,$acc06,$mask80
1076	and	$acc11,$acc07,$mask80
1077	srwi	$acc12,$acc08,7		# r1>>7
1078	srwi	$acc13,$acc09,7
1079	srwi	$acc14,$acc10,7
1080	srwi	$acc15,$acc11,7
1081	sub	$acc08,$acc08,$acc12	# r1-(r1>>7)
1082	sub	$acc09,$acc09,$acc13
1083	sub	$acc10,$acc10,$acc14
1084	sub	$acc11,$acc11,$acc15
1085	andc	$acc12,$acc04,$mask80	# r4&0x7f7f7f7f
1086	andc	$acc13,$acc05,$mask80
1087	andc	$acc14,$acc06,$mask80
1088	andc	$acc15,$acc07,$mask80
1089	add	$acc12,$acc12,$acc12	# (r4&0x7f7f7f7f)<<1
1090	add	$acc13,$acc13,$acc13
1091	add	$acc14,$acc14,$acc14
1092	add	$acc15,$acc15,$acc15
1093	and	$acc08,$acc08,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
1094	and	$acc09,$acc09,$mask1b
1095	and	$acc10,$acc10,$mask1b
1096	and	$acc11,$acc11,$mask1b
1097	xor	$acc08,$acc08,$acc12	# r8
1098	xor	$acc09,$acc09,$acc13
1099	xor	$acc10,$acc10,$acc14
1100	xor	$acc11,$acc11,$acc15
1101
1102	xor	$acc00,$acc00,$s0	# r2^r0
1103	xor	$acc01,$acc01,$s1
1104	xor	$acc02,$acc02,$s2
1105	xor	$acc03,$acc03,$s3
1106	xor	$acc04,$acc04,$s0	# r4^r0
1107	xor	$acc05,$acc05,$s1
1108	xor	$acc06,$acc06,$s2
1109	xor	$acc07,$acc07,$s3
1110___
1111$code.=<<___;
1112	rotrwi	$s0,$s0,8		# = ROTATE(r0,8)
1113	rotrwi	$s1,$s1,8
1114	rotrwi	$s2,$s2,8
1115	rotrwi	$s3,$s3,8
1116	xor	$s0,$s0,$acc00		# ^= r2^r0
1117	xor	$s1,$s1,$acc01
1118	xor	$s2,$s2,$acc02
1119	xor	$s3,$s3,$acc03
1120	xor	$acc00,$acc00,$acc08
1121	xor	$acc01,$acc01,$acc09
1122	xor	$acc02,$acc02,$acc10
1123	xor	$acc03,$acc03,$acc11
1124	xor	$s0,$s0,$acc04		# ^= r4^r0
1125	xor	$s1,$s1,$acc05
1126	xor	$s2,$s2,$acc06
1127	xor	$s3,$s3,$acc07
1128	rotrwi	$acc00,$acc00,24
1129	rotrwi	$acc01,$acc01,24
1130	rotrwi	$acc02,$acc02,24
1131	rotrwi	$acc03,$acc03,24
1132	xor	$acc04,$acc04,$acc08
1133	xor	$acc05,$acc05,$acc09
1134	xor	$acc06,$acc06,$acc10
1135	xor	$acc07,$acc07,$acc11
1136	xor	$s0,$s0,$acc08		# ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
1137	xor	$s1,$s1,$acc09
1138	xor	$s2,$s2,$acc10
1139	xor	$s3,$s3,$acc11
1140	rotrwi	$acc04,$acc04,16
1141	rotrwi	$acc05,$acc05,16
1142	rotrwi	$acc06,$acc06,16
1143	rotrwi	$acc07,$acc07,16
1144	xor	$s0,$s0,$acc00		# ^= ROTATE(r8^r2^r0,24)
1145	xor	$s1,$s1,$acc01
1146	xor	$s2,$s2,$acc02
1147	xor	$s3,$s3,$acc03
1148	rotrwi	$acc08,$acc08,8
1149	rotrwi	$acc09,$acc09,8
1150	rotrwi	$acc10,$acc10,8
1151	rotrwi	$acc11,$acc11,8
1152	xor	$s0,$s0,$acc04		# ^= ROTATE(r8^r4^r0,16)
1153	xor	$s1,$s1,$acc05
1154	xor	$s2,$s2,$acc06
1155	xor	$s3,$s3,$acc07
1156	xor	$s0,$s0,$acc08		# ^= ROTATE(r8,8)
1157	xor	$s1,$s1,$acc09
1158	xor	$s2,$s2,$acc10
1159	xor	$s3,$s3,$acc11
1160
1161	b	Ldec_compact_loop
1162.align	4
1163Ldec_compact_done:
1164	xor	$s0,$s0,$t0
1165	xor	$s1,$s1,$t1
1166	xor	$s2,$s2,$t2
1167	xor	$s3,$s3,$t3
1168	blr
1169.long	0
1170.asciz	"AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
1171.align	7
1172___
1173
1174$code =~ s/\`([^\`]*)\`/eval $1/gem;
1175print $code;
1176close STDOUT;
1177