1221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom/* crypto/aes/aes_core.c -*- mode:C; c-file-style: "eay" -*- */
2221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom/**
3221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom * rijndael-alg-fst.c
4221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom *
5221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom * @version 3.0 (December 2000)
6221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom *
7221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom * Optimised ANSI C code for the Rijndael cipher (now AES)
8221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom *
9221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
10221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
11221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom * @author Paulo Barreto <paulo.barreto@terra.com.br>
12221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom *
13221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom * This code is hereby placed in the public domain.
14221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom *
15221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
16221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
19221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
24221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
25221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom */
27221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
28221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom/*
29221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom * This is experimental x86[_64] derivative. It assumes little-endian
30221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom * byte order and expects CPU to sustain unaligned memory references.
31221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom * It is used as playground for cache-time attack mitigations and
32221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom * serves as reference C implementation for x86[_64] assembler.
33221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom *
34221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom *					<appro@fy.chalmers.se>
35221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom */
36221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
37221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
38221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#ifndef AES_DEBUG
39221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# ifndef NDEBUG
40221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#  define NDEBUG
41221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# endif
42221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#endif
43221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#include <assert.h>
44221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
45221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#include <stdlib.h>
46221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#include <openssl/aes.h>
47221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#include "aes_locl.h"
48221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
49221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom/*
50221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom * These two parameters control which table, 256-byte or 2KB, is
51221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom * referenced in outer and respectively inner rounds.
52221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom */
53221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define AES_COMPACT_IN_OUTER_ROUNDS
54221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#ifdef  AES_COMPACT_IN_OUTER_ROUNDS
55221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom/* AES_COMPACT_IN_OUTER_ROUNDS costs ~30% in performance, while
56221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom * adding AES_COMPACT_IN_INNER_ROUNDS reduces benchmark *further*
57221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom * by factor of ~2. */
58221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# undef  AES_COMPACT_IN_INNER_ROUNDS
59221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#endif
60221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
61221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#if 1
62221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromstatic void prefetch256(const void *table)
63221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom{
64221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	volatile unsigned long *t=(void *)table,ret;
65221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	unsigned long sum;
66221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	int i;
67221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
68221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	/* 32 is common least cache-line size */
69221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	for (sum=0,i=0;i<256/sizeof(t[0]);i+=32/sizeof(t[0]))	sum ^= t[i];
70221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
71221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	ret = sum;
72221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom}
73221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#else
74221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# define prefetch256(t)
75221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#endif
76221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
77221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#undef GETU32
78221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define GETU32(p) (*((u32*)(p)))
79221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
80221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
81221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromtypedef unsigned __int64 u64;
82221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define U64(C)	C##UI64
83221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#elif defined(__arch64__)
84221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromtypedef unsigned long u64;
85221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define U64(C)	C##UL
86221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#else
87221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromtypedef unsigned long long u64;
88221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define U64(C)	C##ULL
89221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#endif
90221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
91221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#undef ROTATE
92221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#if defined(_MSC_VER) || defined(__ICC)
93221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# define ROTATE(a,n)	_lrotl(a,n)
94221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#elif defined(__GNUC__) && __GNUC__>=2
95221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
96221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#   define ROTATE(a,n)	({ register unsigned int ret;	\
97221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				asm (			\
98221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				"roll %1,%0"		\
99221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				: "=r"(ret)		\
100221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				: "I"(n), "0"(a)	\
101221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				: "cc");		\
102221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			   ret;				\
103221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			})
104221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom# endif
105221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#endif
106221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom/*
107221304ee937bc0910948a8be1320cb8cc4eb6d36Brian CarlstromTe [x] = S [x].[02, 01, 01, 03, 02, 01, 01, 03];
108221304ee937bc0910948a8be1320cb8cc4eb6d36Brian CarlstromTe0[x] = S [x].[02, 01, 01, 03];
109221304ee937bc0910948a8be1320cb8cc4eb6d36Brian CarlstromTe1[x] = S [x].[03, 02, 01, 01];
110221304ee937bc0910948a8be1320cb8cc4eb6d36Brian CarlstromTe2[x] = S [x].[01, 03, 02, 01];
111221304ee937bc0910948a8be1320cb8cc4eb6d36Brian CarlstromTe3[x] = S [x].[01, 01, 03, 02];
112221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom*/
113221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define Te0 (u32)((u64*)((u8*)Te+0))
114221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define Te1 (u32)((u64*)((u8*)Te+3))
115221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define Te2 (u32)((u64*)((u8*)Te+2))
116221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define Te3 (u32)((u64*)((u8*)Te+1))
117221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom/*
118221304ee937bc0910948a8be1320cb8cc4eb6d36Brian CarlstromTd [x] = Si[x].[0e, 09, 0d, 0b, 0e, 09, 0d, 0b];
119221304ee937bc0910948a8be1320cb8cc4eb6d36Brian CarlstromTd0[x] = Si[x].[0e, 09, 0d, 0b];
120221304ee937bc0910948a8be1320cb8cc4eb6d36Brian CarlstromTd1[x] = Si[x].[0b, 0e, 09, 0d];
121221304ee937bc0910948a8be1320cb8cc4eb6d36Brian CarlstromTd2[x] = Si[x].[0d, 0b, 0e, 09];
122221304ee937bc0910948a8be1320cb8cc4eb6d36Brian CarlstromTd3[x] = Si[x].[09, 0d, 0b, 0e];
123221304ee937bc0910948a8be1320cb8cc4eb6d36Brian CarlstromTd4[x] = Si[x].[01];
124221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom*/
125221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define Td0 (u32)((u64*)((u8*)Td+0))
126221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define Td1 (u32)((u64*)((u8*)Td+3))
127221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define Td2 (u32)((u64*)((u8*)Td+2))
128221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#define Td3 (u32)((u64*)((u8*)Td+1))
129221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
130221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromstatic const u64 Te[256] = {
131221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xa56363c6a56363c6), U64(0x847c7cf8847c7cf8),
132221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x997777ee997777ee), U64(0x8d7b7bf68d7b7bf6),
133221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x0df2f2ff0df2f2ff), U64(0xbd6b6bd6bd6b6bd6),
134221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xb16f6fdeb16f6fde), U64(0x54c5c59154c5c591),
135221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x5030306050303060), U64(0x0301010203010102),
136221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xa96767cea96767ce), U64(0x7d2b2b567d2b2b56),
137221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x19fefee719fefee7), U64(0x62d7d7b562d7d7b5),
138221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xe6abab4de6abab4d), U64(0x9a7676ec9a7676ec),
139221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x45caca8f45caca8f), U64(0x9d82821f9d82821f),
140221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x40c9c98940c9c989), U64(0x877d7dfa877d7dfa),
141221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x15fafaef15fafaef), U64(0xeb5959b2eb5959b2),
142221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xc947478ec947478e), U64(0x0bf0f0fb0bf0f0fb),
143221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xecadad41ecadad41), U64(0x67d4d4b367d4d4b3),
144221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xfda2a25ffda2a25f), U64(0xeaafaf45eaafaf45),
145221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xbf9c9c23bf9c9c23), U64(0xf7a4a453f7a4a453),
146221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x967272e4967272e4), U64(0x5bc0c09b5bc0c09b),
147221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xc2b7b775c2b7b775), U64(0x1cfdfde11cfdfde1),
148221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xae93933dae93933d), U64(0x6a26264c6a26264c),
149221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x5a36366c5a36366c), U64(0x413f3f7e413f3f7e),
150221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x02f7f7f502f7f7f5), U64(0x4fcccc834fcccc83),
151221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x5c3434685c343468), U64(0xf4a5a551f4a5a551),
152221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x34e5e5d134e5e5d1), U64(0x08f1f1f908f1f1f9),
153221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x937171e2937171e2), U64(0x73d8d8ab73d8d8ab),
154221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x5331316253313162), U64(0x3f15152a3f15152a),
155221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x0c0404080c040408), U64(0x52c7c79552c7c795),
156221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x6523234665232346), U64(0x5ec3c39d5ec3c39d),
157221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x2818183028181830), U64(0xa1969637a1969637),
158221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x0f05050a0f05050a), U64(0xb59a9a2fb59a9a2f),
159221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x0907070e0907070e), U64(0x3612122436121224),
160221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x9b80801b9b80801b), U64(0x3de2e2df3de2e2df),
161221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x26ebebcd26ebebcd), U64(0x6927274e6927274e),
162221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xcdb2b27fcdb2b27f), U64(0x9f7575ea9f7575ea),
163221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x1b0909121b090912), U64(0x9e83831d9e83831d),
164221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x742c2c58742c2c58), U64(0x2e1a1a342e1a1a34),
165221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x2d1b1b362d1b1b36), U64(0xb26e6edcb26e6edc),
166221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xee5a5ab4ee5a5ab4), U64(0xfba0a05bfba0a05b),
167221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xf65252a4f65252a4), U64(0x4d3b3b764d3b3b76),
168221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x61d6d6b761d6d6b7), U64(0xceb3b37dceb3b37d),
169221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x7b2929527b292952), U64(0x3ee3e3dd3ee3e3dd),
170221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x712f2f5e712f2f5e), U64(0x9784841397848413),
171221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xf55353a6f55353a6), U64(0x68d1d1b968d1d1b9),
172221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x0000000000000000), U64(0x2cededc12cededc1),
173221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x6020204060202040), U64(0x1ffcfce31ffcfce3),
174221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xc8b1b179c8b1b179), U64(0xed5b5bb6ed5b5bb6),
175221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xbe6a6ad4be6a6ad4), U64(0x46cbcb8d46cbcb8d),
176221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xd9bebe67d9bebe67), U64(0x4b3939724b393972),
177221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xde4a4a94de4a4a94), U64(0xd44c4c98d44c4c98),
178221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xe85858b0e85858b0), U64(0x4acfcf854acfcf85),
179221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x6bd0d0bb6bd0d0bb), U64(0x2aefefc52aefefc5),
180221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xe5aaaa4fe5aaaa4f), U64(0x16fbfbed16fbfbed),
181221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xc5434386c5434386), U64(0xd74d4d9ad74d4d9a),
182221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x5533336655333366), U64(0x9485851194858511),
183221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xcf45458acf45458a), U64(0x10f9f9e910f9f9e9),
184221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x0602020406020204), U64(0x817f7ffe817f7ffe),
185221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xf05050a0f05050a0), U64(0x443c3c78443c3c78),
186221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xba9f9f25ba9f9f25), U64(0xe3a8a84be3a8a84b),
187221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xf35151a2f35151a2), U64(0xfea3a35dfea3a35d),
188221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xc0404080c0404080), U64(0x8a8f8f058a8f8f05),
189221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xad92923fad92923f), U64(0xbc9d9d21bc9d9d21),
190221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x4838387048383870), U64(0x04f5f5f104f5f5f1),
191221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xdfbcbc63dfbcbc63), U64(0xc1b6b677c1b6b677),
192221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x75dadaaf75dadaaf), U64(0x6321214263212142),
193221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x3010102030101020), U64(0x1affffe51affffe5),
194221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x0ef3f3fd0ef3f3fd), U64(0x6dd2d2bf6dd2d2bf),
195221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x4ccdcd814ccdcd81), U64(0x140c0c18140c0c18),
196221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x3513132635131326), U64(0x2fececc32fececc3),
197221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xe15f5fbee15f5fbe), U64(0xa2979735a2979735),
198221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xcc444488cc444488), U64(0x3917172e3917172e),
199221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x57c4c49357c4c493), U64(0xf2a7a755f2a7a755),
200221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x827e7efc827e7efc), U64(0x473d3d7a473d3d7a),
201221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xac6464c8ac6464c8), U64(0xe75d5dbae75d5dba),
202221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x2b1919322b191932), U64(0x957373e6957373e6),
203221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xa06060c0a06060c0), U64(0x9881811998818119),
204221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xd14f4f9ed14f4f9e), U64(0x7fdcdca37fdcdca3),
205221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x6622224466222244), U64(0x7e2a2a547e2a2a54),
206221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xab90903bab90903b), U64(0x8388880b8388880b),
207221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xca46468cca46468c), U64(0x29eeeec729eeeec7),
208221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xd3b8b86bd3b8b86b), U64(0x3c1414283c141428),
209221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x79dedea779dedea7), U64(0xe25e5ebce25e5ebc),
210221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x1d0b0b161d0b0b16), U64(0x76dbdbad76dbdbad),
211221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x3be0e0db3be0e0db), U64(0x5632326456323264),
212221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x4e3a3a744e3a3a74), U64(0x1e0a0a141e0a0a14),
213221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xdb494992db494992), U64(0x0a06060c0a06060c),
214221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x6c2424486c242448), U64(0xe45c5cb8e45c5cb8),
215221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x5dc2c29f5dc2c29f), U64(0x6ed3d3bd6ed3d3bd),
216221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xefacac43efacac43), U64(0xa66262c4a66262c4),
217221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xa8919139a8919139), U64(0xa4959531a4959531),
218221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x37e4e4d337e4e4d3), U64(0x8b7979f28b7979f2),
219221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x32e7e7d532e7e7d5), U64(0x43c8c88b43c8c88b),
220221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x5937376e5937376e), U64(0xb76d6ddab76d6dda),
221221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x8c8d8d018c8d8d01), U64(0x64d5d5b164d5d5b1),
222221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xd24e4e9cd24e4e9c), U64(0xe0a9a949e0a9a949),
223221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xb46c6cd8b46c6cd8), U64(0xfa5656acfa5656ac),
224221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x07f4f4f307f4f4f3), U64(0x25eaeacf25eaeacf),
225221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xaf6565caaf6565ca), U64(0x8e7a7af48e7a7af4),
226221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xe9aeae47e9aeae47), U64(0x1808081018080810),
227221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xd5baba6fd5baba6f), U64(0x887878f0887878f0),
228221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x6f25254a6f25254a), U64(0x722e2e5c722e2e5c),
229221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x241c1c38241c1c38), U64(0xf1a6a657f1a6a657),
230221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xc7b4b473c7b4b473), U64(0x51c6c69751c6c697),
231221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x23e8e8cb23e8e8cb), U64(0x7cdddda17cdddda1),
232221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x9c7474e89c7474e8), U64(0x211f1f3e211f1f3e),
233221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xdd4b4b96dd4b4b96), U64(0xdcbdbd61dcbdbd61),
234221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x868b8b0d868b8b0d), U64(0x858a8a0f858a8a0f),
235221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x907070e0907070e0), U64(0x423e3e7c423e3e7c),
236221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xc4b5b571c4b5b571), U64(0xaa6666ccaa6666cc),
237221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xd8484890d8484890), U64(0x0503030605030306),
238221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x01f6f6f701f6f6f7), U64(0x120e0e1c120e0e1c),
239221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xa36161c2a36161c2), U64(0x5f35356a5f35356a),
240221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xf95757aef95757ae), U64(0xd0b9b969d0b9b969),
241221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x9186861791868617), U64(0x58c1c19958c1c199),
242221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x271d1d3a271d1d3a), U64(0xb99e9e27b99e9e27),
243221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x38e1e1d938e1e1d9), U64(0x13f8f8eb13f8f8eb),
244221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xb398982bb398982b), U64(0x3311112233111122),
245221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xbb6969d2bb6969d2), U64(0x70d9d9a970d9d9a9),
246221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x898e8e07898e8e07), U64(0xa7949433a7949433),
247221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xb69b9b2db69b9b2d), U64(0x221e1e3c221e1e3c),
248221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x9287871592878715), U64(0x20e9e9c920e9e9c9),
249221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x49cece8749cece87), U64(0xff5555aaff5555aa),
250221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x7828285078282850), U64(0x7adfdfa57adfdfa5),
251221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x8f8c8c038f8c8c03), U64(0xf8a1a159f8a1a159),
252221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x8089890980898909), U64(0x170d0d1a170d0d1a),
253221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xdabfbf65dabfbf65), U64(0x31e6e6d731e6e6d7),
254221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xc6424284c6424284), U64(0xb86868d0b86868d0),
255221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xc3414182c3414182), U64(0xb0999929b0999929),
256221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x772d2d5a772d2d5a), U64(0x110f0f1e110f0f1e),
257221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xcbb0b07bcbb0b07b), U64(0xfc5454a8fc5454a8),
258221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xd6bbbb6dd6bbbb6d), U64(0x3a16162c3a16162c)
259221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom};
260221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
261221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromstatic const u8 Te4[256] = {
262221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x63U, 0x7cU, 0x77U, 0x7bU, 0xf2U, 0x6bU, 0x6fU, 0xc5U,
263221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x30U, 0x01U, 0x67U, 0x2bU, 0xfeU, 0xd7U, 0xabU, 0x76U,
264221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0xcaU, 0x82U, 0xc9U, 0x7dU, 0xfaU, 0x59U, 0x47U, 0xf0U,
265221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0xadU, 0xd4U, 0xa2U, 0xafU, 0x9cU, 0xa4U, 0x72U, 0xc0U,
266221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0xb7U, 0xfdU, 0x93U, 0x26U, 0x36U, 0x3fU, 0xf7U, 0xccU,
267221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x34U, 0xa5U, 0xe5U, 0xf1U, 0x71U, 0xd8U, 0x31U, 0x15U,
268221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x04U, 0xc7U, 0x23U, 0xc3U, 0x18U, 0x96U, 0x05U, 0x9aU,
269221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x07U, 0x12U, 0x80U, 0xe2U, 0xebU, 0x27U, 0xb2U, 0x75U,
270221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x09U, 0x83U, 0x2cU, 0x1aU, 0x1bU, 0x6eU, 0x5aU, 0xa0U,
271221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x52U, 0x3bU, 0xd6U, 0xb3U, 0x29U, 0xe3U, 0x2fU, 0x84U,
272221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x53U, 0xd1U, 0x00U, 0xedU, 0x20U, 0xfcU, 0xb1U, 0x5bU,
273221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x6aU, 0xcbU, 0xbeU, 0x39U, 0x4aU, 0x4cU, 0x58U, 0xcfU,
274221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0xd0U, 0xefU, 0xaaU, 0xfbU, 0x43U, 0x4dU, 0x33U, 0x85U,
275221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x45U, 0xf9U, 0x02U, 0x7fU, 0x50U, 0x3cU, 0x9fU, 0xa8U,
276221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x51U, 0xa3U, 0x40U, 0x8fU, 0x92U, 0x9dU, 0x38U, 0xf5U,
277221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0xbcU, 0xb6U, 0xdaU, 0x21U, 0x10U, 0xffU, 0xf3U, 0xd2U,
278221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0xcdU, 0x0cU, 0x13U, 0xecU, 0x5fU, 0x97U, 0x44U, 0x17U,
279221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0xc4U, 0xa7U, 0x7eU, 0x3dU, 0x64U, 0x5dU, 0x19U, 0x73U,
280221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x60U, 0x81U, 0x4fU, 0xdcU, 0x22U, 0x2aU, 0x90U, 0x88U,
281221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x46U, 0xeeU, 0xb8U, 0x14U, 0xdeU, 0x5eU, 0x0bU, 0xdbU,
282221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0xe0U, 0x32U, 0x3aU, 0x0aU, 0x49U, 0x06U, 0x24U, 0x5cU,
283221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0xc2U, 0xd3U, 0xacU, 0x62U, 0x91U, 0x95U, 0xe4U, 0x79U,
284221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0xe7U, 0xc8U, 0x37U, 0x6dU, 0x8dU, 0xd5U, 0x4eU, 0xa9U,
285221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x6cU, 0x56U, 0xf4U, 0xeaU, 0x65U, 0x7aU, 0xaeU, 0x08U,
286221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0xbaU, 0x78U, 0x25U, 0x2eU, 0x1cU, 0xa6U, 0xb4U, 0xc6U,
287221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0xe8U, 0xddU, 0x74U, 0x1fU, 0x4bU, 0xbdU, 0x8bU, 0x8aU,
288221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x70U, 0x3eU, 0xb5U, 0x66U, 0x48U, 0x03U, 0xf6U, 0x0eU,
289221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x61U, 0x35U, 0x57U, 0xb9U, 0x86U, 0xc1U, 0x1dU, 0x9eU,
290221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0xe1U, 0xf8U, 0x98U, 0x11U, 0x69U, 0xd9U, 0x8eU, 0x94U,
291221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x9bU, 0x1eU, 0x87U, 0xe9U, 0xceU, 0x55U, 0x28U, 0xdfU,
292221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x8cU, 0xa1U, 0x89U, 0x0dU, 0xbfU, 0xe6U, 0x42U, 0x68U,
293221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x41U, 0x99U, 0x2dU, 0x0fU, 0xb0U, 0x54U, 0xbbU, 0x16U
294221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom};
295221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
296221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromstatic const u64 Td[256] = {
297221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x50a7f45150a7f451), U64(0x5365417e5365417e),
298221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xc3a4171ac3a4171a), U64(0x965e273a965e273a),
299221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xcb6bab3bcb6bab3b), U64(0xf1459d1ff1459d1f),
300221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xab58faacab58faac), U64(0x9303e34b9303e34b),
301221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x55fa302055fa3020), U64(0xf66d76adf66d76ad),
302221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x9176cc889176cc88), U64(0x254c02f5254c02f5),
303221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xfcd7e54ffcd7e54f), U64(0xd7cb2ac5d7cb2ac5),
304221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x8044352680443526), U64(0x8fa362b58fa362b5),
305221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x495ab1de495ab1de), U64(0x671bba25671bba25),
306221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x980eea45980eea45), U64(0xe1c0fe5de1c0fe5d),
307221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x02752fc302752fc3), U64(0x12f04c8112f04c81),
308221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xa397468da397468d), U64(0xc6f9d36bc6f9d36b),
309221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xe75f8f03e75f8f03), U64(0x959c9215959c9215),
310221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xeb7a6dbfeb7a6dbf), U64(0xda595295da595295),
311221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x2d83bed42d83bed4), U64(0xd3217458d3217458),
312221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x2969e0492969e049), U64(0x44c8c98e44c8c98e),
313221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x6a89c2756a89c275), U64(0x78798ef478798ef4),
314221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x6b3e58996b3e5899), U64(0xdd71b927dd71b927),
315221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xb64fe1beb64fe1be), U64(0x17ad88f017ad88f0),
316221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x66ac20c966ac20c9), U64(0xb43ace7db43ace7d),
317221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x184adf63184adf63), U64(0x82311ae582311ae5),
318221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x6033519760335197), U64(0x457f5362457f5362),
319221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xe07764b1e07764b1), U64(0x84ae6bbb84ae6bbb),
320221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x1ca081fe1ca081fe), U64(0x942b08f9942b08f9),
321221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x5868487058684870), U64(0x19fd458f19fd458f),
322221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x876cde94876cde94), U64(0xb7f87b52b7f87b52),
323221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x23d373ab23d373ab), U64(0xe2024b72e2024b72),
324221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x578f1fe3578f1fe3), U64(0x2aab55662aab5566),
325221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x0728ebb20728ebb2), U64(0x03c2b52f03c2b52f),
326221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x9a7bc5869a7bc586), U64(0xa50837d3a50837d3),
327221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xf2872830f2872830), U64(0xb2a5bf23b2a5bf23),
328221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xba6a0302ba6a0302), U64(0x5c8216ed5c8216ed),
329221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x2b1ccf8a2b1ccf8a), U64(0x92b479a792b479a7),
330221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xf0f207f3f0f207f3), U64(0xa1e2694ea1e2694e),
331221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xcdf4da65cdf4da65), U64(0xd5be0506d5be0506),
332221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x1f6234d11f6234d1), U64(0x8afea6c48afea6c4),
333221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x9d532e349d532e34), U64(0xa055f3a2a055f3a2),
334221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x32e18a0532e18a05), U64(0x75ebf6a475ebf6a4),
335221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x39ec830b39ec830b), U64(0xaaef6040aaef6040),
336221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x069f715e069f715e), U64(0x51106ebd51106ebd),
337221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xf98a213ef98a213e), U64(0x3d06dd963d06dd96),
338221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xae053eddae053edd), U64(0x46bde64d46bde64d),
339221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xb58d5491b58d5491), U64(0x055dc471055dc471),
340221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x6fd406046fd40604), U64(0xff155060ff155060),
341221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x24fb981924fb9819), U64(0x97e9bdd697e9bdd6),
342221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xcc434089cc434089), U64(0x779ed967779ed967),
343221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xbd42e8b0bd42e8b0), U64(0x888b8907888b8907),
344221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x385b19e7385b19e7), U64(0xdbeec879dbeec879),
345221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x470a7ca1470a7ca1), U64(0xe90f427ce90f427c),
346221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xc91e84f8c91e84f8), U64(0x0000000000000000),
347221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x8386800983868009), U64(0x48ed2b3248ed2b32),
348221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xac70111eac70111e), U64(0x4e725a6c4e725a6c),
349221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xfbff0efdfbff0efd), U64(0x5638850f5638850f),
350221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x1ed5ae3d1ed5ae3d), U64(0x27392d3627392d36),
351221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x64d90f0a64d90f0a), U64(0x21a65c6821a65c68),
352221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xd1545b9bd1545b9b), U64(0x3a2e36243a2e3624),
353221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xb1670a0cb1670a0c), U64(0x0fe757930fe75793),
354221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xd296eeb4d296eeb4), U64(0x9e919b1b9e919b1b),
355221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x4fc5c0804fc5c080), U64(0xa220dc61a220dc61),
356221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x694b775a694b775a), U64(0x161a121c161a121c),
357221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x0aba93e20aba93e2), U64(0xe52aa0c0e52aa0c0),
358221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x43e0223c43e0223c), U64(0x1d171b121d171b12),
359221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x0b0d090e0b0d090e), U64(0xadc78bf2adc78bf2),
360221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xb9a8b62db9a8b62d), U64(0xc8a91e14c8a91e14),
361221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x8519f1578519f157), U64(0x4c0775af4c0775af),
362221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xbbdd99eebbdd99ee), U64(0xfd607fa3fd607fa3),
363221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x9f2601f79f2601f7), U64(0xbcf5725cbcf5725c),
364221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xc53b6644c53b6644), U64(0x347efb5b347efb5b),
365221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x7629438b7629438b), U64(0xdcc623cbdcc623cb),
366221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x68fcedb668fcedb6), U64(0x63f1e4b863f1e4b8),
367221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xcadc31d7cadc31d7), U64(0x1085634210856342),
368221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x4022971340229713), U64(0x2011c6842011c684),
369221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x7d244a857d244a85), U64(0xf83dbbd2f83dbbd2),
370221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x1132f9ae1132f9ae), U64(0x6da129c76da129c7),
371221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x4b2f9e1d4b2f9e1d), U64(0xf330b2dcf330b2dc),
372221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xec52860dec52860d), U64(0xd0e3c177d0e3c177),
373221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x6c16b32b6c16b32b), U64(0x99b970a999b970a9),
374221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xfa489411fa489411), U64(0x2264e9472264e947),
375221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xc48cfca8c48cfca8), U64(0x1a3ff0a01a3ff0a0),
376221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xd82c7d56d82c7d56), U64(0xef903322ef903322),
377221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xc74e4987c74e4987), U64(0xc1d138d9c1d138d9),
378221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xfea2ca8cfea2ca8c), U64(0x360bd498360bd498),
379221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xcf81f5a6cf81f5a6), U64(0x28de7aa528de7aa5),
380221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x268eb7da268eb7da), U64(0xa4bfad3fa4bfad3f),
381221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xe49d3a2ce49d3a2c), U64(0x0d9278500d927850),
382221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x9bcc5f6a9bcc5f6a), U64(0x62467e5462467e54),
383221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xc2138df6c2138df6), U64(0xe8b8d890e8b8d890),
384221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x5ef7392e5ef7392e), U64(0xf5afc382f5afc382),
385221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xbe805d9fbe805d9f), U64(0x7c93d0697c93d069),
386221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xa92dd56fa92dd56f), U64(0xb31225cfb31225cf),
387221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x3b99acc83b99acc8), U64(0xa77d1810a77d1810),
388221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x6e639ce86e639ce8), U64(0x7bbb3bdb7bbb3bdb),
389221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x097826cd097826cd), U64(0xf418596ef418596e),
390221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x01b79aec01b79aec), U64(0xa89a4f83a89a4f83),
391221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x656e95e6656e95e6), U64(0x7ee6ffaa7ee6ffaa),
392221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x08cfbc2108cfbc21), U64(0xe6e815efe6e815ef),
393221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xd99be7bad99be7ba), U64(0xce366f4ace366f4a),
394221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xd4099fead4099fea), U64(0xd67cb029d67cb029),
395221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xafb2a431afb2a431), U64(0x31233f2a31233f2a),
396221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x3094a5c63094a5c6), U64(0xc066a235c066a235),
397221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x37bc4e7437bc4e74), U64(0xa6ca82fca6ca82fc),
398221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xb0d090e0b0d090e0), U64(0x15d8a73315d8a733),
399221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x4a9804f14a9804f1), U64(0xf7daec41f7daec41),
400221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x0e50cd7f0e50cd7f), U64(0x2ff691172ff69117),
401221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x8dd64d768dd64d76), U64(0x4db0ef434db0ef43),
402221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x544daacc544daacc), U64(0xdf0496e4df0496e4),
403221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xe3b5d19ee3b5d19e), U64(0x1b886a4c1b886a4c),
404221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xb81f2cc1b81f2cc1), U64(0x7f5165467f516546),
405221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x04ea5e9d04ea5e9d), U64(0x5d358c015d358c01),
406221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x737487fa737487fa), U64(0x2e410bfb2e410bfb),
407221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x5a1d67b35a1d67b3), U64(0x52d2db9252d2db92),
408221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x335610e9335610e9), U64(0x1347d66d1347d66d),
409221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x8c61d79a8c61d79a), U64(0x7a0ca1377a0ca137),
410221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x8e14f8598e14f859), U64(0x893c13eb893c13eb),
411221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xee27a9ceee27a9ce), U64(0x35c961b735c961b7),
412221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xede51ce1ede51ce1), U64(0x3cb1477a3cb1477a),
413221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x59dfd29c59dfd29c), U64(0x3f73f2553f73f255),
414221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x79ce141879ce1418), U64(0xbf37c773bf37c773),
415221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0xeacdf753eacdf753), U64(0x5baafd5f5baafd5f),
416221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x146f3ddf146f3ddf), U64(0x86db447886db4478),
417221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x81f3afca81f3afca), U64(0x3ec468b93ec468b9),
418221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x2c3424382c342438), U64(0x5f40a3c25f40a3c2),
419221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x72c31d1672c31d16), U64(0x0c25e2bc0c25e2bc),
420221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x8b493c288b493c28), U64(0x41950dff41950dff),
421221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x7101a8397101a839), U64(0xdeb30c08deb30c08),
422221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x9ce4b4d89ce4b4d8), U64(0x90c1566490c15664),
423221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x6184cb7b6184cb7b), U64(0x70b632d570b632d5),
424221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    U64(0x745c6c48745c6c48), U64(0x4257b8d04257b8d0)
425221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom};
426221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromstatic const u8 Td4[256] = {
427221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U,
428221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU,
429221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U,
430221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU,
431221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU,
432221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU,
433221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U,
434221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U,
435221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U,
436221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U,
437221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU,
438221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U,
439221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU,
440221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U,
441221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U,
442221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU,
443221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU,
444221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U,
445221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U,
446221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU,
447221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U,
448221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU,
449221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U,
450221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U,
451221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U,
452221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU,
453221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU,
454221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU,
455221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U,
456221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U,
457221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U,
458221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU
459221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom};
460221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
461221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromstatic const u32 rcon[] = {
462221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x00000001U, 0x00000002U, 0x00000004U, 0x00000008U,
463221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x00000010U, 0x00000020U, 0x00000040U, 0x00000080U,
464221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    0x0000001bU, 0x00000036U, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
465221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom};
466221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
467221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom/**
468221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom * Expand the cipher key into the encryption key schedule.
469221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom */
470221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromint AES_set_encrypt_key(const unsigned char *userKey, const int bits,
471221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			AES_KEY *key) {
472221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
473221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	u32 *rk;
474221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom   	int i = 0;
475221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	u32 temp;
476221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
477221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	if (!userKey || !key)
478221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		return -1;
479221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	if (bits != 128 && bits != 192 && bits != 256)
480221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		return -2;
481221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
482221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	rk = key->rd_key;
483221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
484221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	if (bits==128)
485221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		key->rounds = 10;
486221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	else if (bits==192)
487221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		key->rounds = 12;
488221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	else
489221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		key->rounds = 14;
490221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
491221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	rk[0] = GETU32(userKey     );
492221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	rk[1] = GETU32(userKey +  4);
493221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	rk[2] = GETU32(userKey +  8);
494221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	rk[3] = GETU32(userKey + 12);
495221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	if (bits == 128) {
496221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		while (1) {
497221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			temp  = rk[3];
498221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			rk[4] = rk[0] ^
499221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				(Te4[(temp >>  8) & 0xff]      ) ^
500221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				(Te4[(temp >> 16) & 0xff] <<  8) ^
501221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				(Te4[(temp >> 24)       ] << 16) ^
502221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				(Te4[(temp      ) & 0xff] << 24) ^
503221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				rcon[i];
504221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			rk[5] = rk[1] ^ rk[4];
505221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			rk[6] = rk[2] ^ rk[5];
506221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			rk[7] = rk[3] ^ rk[6];
507221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			if (++i == 10) {
508221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				return 0;
509221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			}
510221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			rk += 4;
511221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		}
512221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	}
513221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	rk[4] = GETU32(userKey + 16);
514221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	rk[5] = GETU32(userKey + 20);
515221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	if (bits == 192) {
516221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		while (1) {
517221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			temp = rk[ 5];
518221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			rk[ 6] = rk[ 0] ^
519221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				(Te4[(temp >>  8) & 0xff]      ) ^
520221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				(Te4[(temp >> 16) & 0xff] <<  8) ^
521221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				(Te4[(temp >> 24)       ] << 16) ^
522221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				(Te4[(temp      ) & 0xff] << 24) ^
523221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				rcon[i];
524221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			rk[ 7] = rk[ 1] ^ rk[ 6];
525221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			rk[ 8] = rk[ 2] ^ rk[ 7];
526221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			rk[ 9] = rk[ 3] ^ rk[ 8];
527221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			if (++i == 8) {
528221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				return 0;
529221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			}
530221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			rk[10] = rk[ 4] ^ rk[ 9];
531221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			rk[11] = rk[ 5] ^ rk[10];
532221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			rk += 6;
533221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		}
534221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	}
535221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	rk[6] = GETU32(userKey + 24);
536221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	rk[7] = GETU32(userKey + 28);
537221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	if (bits == 256) {
538221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		while (1) {
539221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			temp = rk[ 7];
540221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			rk[ 8] = rk[ 0] ^
541221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				(Te4[(temp >>  8) & 0xff]      ) ^
542221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				(Te4[(temp >> 16) & 0xff] <<  8) ^
543221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				(Te4[(temp >> 24)       ] << 16) ^
544221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				(Te4[(temp      ) & 0xff] << 24) ^
545221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				rcon[i];
546221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			rk[ 9] = rk[ 1] ^ rk[ 8];
547221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			rk[10] = rk[ 2] ^ rk[ 9];
548221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			rk[11] = rk[ 3] ^ rk[10];
549221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			if (++i == 7) {
550221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				return 0;
551221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			}
552221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			temp = rk[11];
553221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			rk[12] = rk[ 4] ^
554221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				(Te4[(temp      ) & 0xff]      ) ^
555221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				(Te4[(temp >>  8) & 0xff] <<  8) ^
556221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				(Te4[(temp >> 16) & 0xff] << 16) ^
557221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				(Te4[(temp >> 24)       ] << 24);
558221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			rk[13] = rk[ 5] ^ rk[12];
559221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			rk[14] = rk[ 6] ^ rk[13];
560221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			rk[15] = rk[ 7] ^ rk[14];
561221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
562221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			rk += 8;
563221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom        	}
564221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	}
565221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	return 0;
566221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom}
567221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
568221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom/**
569221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom * Expand the cipher key into the decryption key schedule.
570221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom */
571221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromint AES_set_decrypt_key(const unsigned char *userKey, const int bits,
572221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			 AES_KEY *key) {
573221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
574221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom        u32 *rk;
575221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	int i, j, status;
576221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	u32 temp;
577221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
578221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	/* first, start with an encryption schedule */
579221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	status = AES_set_encrypt_key(userKey, bits, key);
580221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	if (status < 0)
581221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		return status;
582221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
583221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	rk = key->rd_key;
584221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
585221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	/* invert the order of the round keys: */
586221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
587221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
588221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
589221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
590221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
591221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	}
592221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	/* apply the inverse MixColumn transform to all round keys but the first and the last: */
593221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	for (i = 1; i < (key->rounds); i++) {
594221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		rk += 4;
595221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#if 1
596221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		for (j = 0; j < 4; j++) {
597221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
598221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
599221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			tp1 = rk[j];
600221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			m = tp1 & 0x80808080;
601221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
602221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				((m - (m >> 7)) & 0x1b1b1b1b);
603221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			m = tp2 & 0x80808080;
604221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
605221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				((m - (m >> 7)) & 0x1b1b1b1b);
606221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			m = tp4 & 0x80808080;
607221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
608221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				((m - (m >> 7)) & 0x1b1b1b1b);
609221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			tp9 = tp8 ^ tp1;
610221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			tpb = tp9 ^ tp2;
611221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			tpd = tp9 ^ tp4;
612221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			tpe = tp8 ^ tp4 ^ tp2;
613221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#if defined(ROTATE)
614221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			rk[j] = tpe ^ ROTATE(tpd,16) ^
615221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				ROTATE(tp9,8) ^ ROTATE(tpb,24);
616221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#else
617221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			rk[j] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
618221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				(tp9 >> 24) ^ (tp9 << 8) ^
619221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				(tpb >> 8) ^ (tpb << 24);
620221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#endif
621221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		}
622221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#else
623221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		rk[0] =
624221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			Td0[Te2[(rk[0]      ) & 0xff] & 0xff] ^
625221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			Td1[Te2[(rk[0] >>  8) & 0xff] & 0xff] ^
626221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			Td2[Te2[(rk[0] >> 16) & 0xff] & 0xff] ^
627221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			Td3[Te2[(rk[0] >> 24)       ] & 0xff];
628221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		rk[1] =
629221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			Td0[Te2[(rk[1]      ) & 0xff] & 0xff] ^
630221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			Td1[Te2[(rk[1] >>  8) & 0xff] & 0xff] ^
631221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			Td2[Te2[(rk[1] >> 16) & 0xff] & 0xff] ^
632221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			Td3[Te2[(rk[1] >> 24)       ] & 0xff];
633221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		rk[2] =
634221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			Td0[Te2[(rk[2]      ) & 0xff] & 0xff] ^
635221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			Td1[Te2[(rk[2] >>  8) & 0xff] & 0xff] ^
636221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			Td2[Te2[(rk[2] >> 16) & 0xff] & 0xff] ^
637221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			Td3[Te2[(rk[2] >> 24)       ] & 0xff];
638221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		rk[3] =
639221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			Td0[Te2[(rk[3]      ) & 0xff] & 0xff] ^
640221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			Td1[Te2[(rk[3] >>  8) & 0xff] & 0xff] ^
641221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			Td2[Te2[(rk[3] >> 16) & 0xff] & 0xff] ^
642221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			Td3[Te2[(rk[3] >> 24)       ] & 0xff];
643221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#endif
644221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	}
645221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	return 0;
646221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom}
647221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
648221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom/*
649221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom * Encrypt a single block
650221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom * in and out can overlap
651221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom */
652221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromvoid AES_encrypt(const unsigned char *in, unsigned char *out,
653221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		 const AES_KEY *key) {
654221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
655221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	const u32 *rk;
656221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	u32 s0, s1, s2, s3, t[4];
657221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	int r;
658221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
659221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	assert(in && out && key);
660221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	rk = key->rd_key;
661221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
662221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	/*
663221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	 * map byte array block to cipher state
664221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	 * and add initial round key:
665221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	 */
666221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	s0 = GETU32(in     ) ^ rk[0];
667221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	s1 = GETU32(in +  4) ^ rk[1];
668221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	s2 = GETU32(in +  8) ^ rk[2];
669221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	s3 = GETU32(in + 12) ^ rk[3];
670221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
671221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#if defined(AES_COMPACT_IN_OUTER_ROUNDS)
672221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	prefetch256(Te4);
673221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
674221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	t[0] =	Te4[(s0      ) & 0xff]       ^
675221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s1 >>  8) & 0xff] <<  8 ^
676221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s2 >> 16) & 0xff] << 16 ^
677221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s3 >> 24)       ] << 24;
678221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	t[1] =	Te4[(s1      ) & 0xff]       ^
679221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s2 >>  8) & 0xff] <<  8 ^
680221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s3 >> 16) & 0xff] << 16 ^
681221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s0 >> 24)       ] << 24;
682221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	t[2] =	Te4[(s2      ) & 0xff]       ^
683221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s3 >>  8) & 0xff] <<  8 ^
684221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s0 >> 16) & 0xff] << 16 ^
685221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s1 >> 24)       ] << 24;
686221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	t[3] =	Te4[(s3      ) & 0xff]       ^
687221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s0 >>  8) & 0xff] <<  8 ^
688221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s1 >> 16) & 0xff] << 16 ^
689221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s2 >> 24)       ] << 24;
690221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
691221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	/* now do the linear transform using words */
692221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	{	int i;
693221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		u32 r0, r1, r2;
694221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
695221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		for (i = 0; i < 4; i++) {
696221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			r0 = t[i];
697221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			r1 = r0 & 0x80808080;
698221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			r2 = ((r0 & 0x7f7f7f7f) << 1) ^
699221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				((r1 - (r1 >> 7)) & 0x1b1b1b1b);
700221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#if defined(ROTATE)
701221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			t[i] = r2 ^ ROTATE(r2,24) ^ ROTATE(r0,24) ^
702221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				ROTATE(r0,16) ^ ROTATE(r0,8);
703221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#else
704221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			t[i] = r2 ^ ((r2 ^ r0) << 24) ^ ((r2 ^ r0) >> 8) ^
705221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				(r0 << 16) ^ (r0 >> 16) ^
706221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				(r0 << 8) ^ (r0 >> 24);
707221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#endif
708221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			t[i] ^= rk[4+i];
709221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		}
710221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	}
711221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#else
712221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	t[0] =	Te0[(s0      ) & 0xff] ^
713221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te1[(s1 >>  8) & 0xff] ^
714221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te2[(s2 >> 16) & 0xff] ^
715221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te3[(s3 >> 24)       ] ^
716221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		rk[4];
717221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	t[1] =	Te0[(s1      ) & 0xff] ^
718221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te1[(s2 >>  8) & 0xff] ^
719221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te2[(s3 >> 16) & 0xff] ^
720221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te3[(s0 >> 24)       ] ^
721221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		rk[5];
722221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	t[2] =	Te0[(s2      ) & 0xff] ^
723221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te1[(s3 >>  8) & 0xff] ^
724221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te2[(s0 >> 16) & 0xff] ^
725221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te3[(s1 >> 24)       ] ^
726221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		rk[6];
727221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	t[3] =	Te0[(s3      ) & 0xff] ^
728221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te1[(s0 >>  8) & 0xff] ^
729221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te2[(s1 >> 16) & 0xff] ^
730221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te3[(s2 >> 24)       ] ^
731221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		rk[7];
732221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#endif
733221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
734221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
735221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    /*
736221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom     * Nr - 2 full rounds:
737221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom     */
738221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    for (rk+=8,r=key->rounds-2; r>0; rk+=4,r--) {
739221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#if defined(AES_COMPACT_IN_INNER_ROUNDS)
740221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	t[0] =	Te4[(s0      ) & 0xff]       ^
741221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s1 >>  8) & 0xff] <<  8 ^
742221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s2 >> 16) & 0xff] << 16 ^
743221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s3 >> 24)       ] << 24;
744221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	t[1] =	Te4[(s1      ) & 0xff]       ^
745221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s2 >>  8) & 0xff] <<  8 ^
746221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s3 >> 16) & 0xff] << 16 ^
747221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s0 >> 24)       ] << 24;
748221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	t[2] =	Te4[(s2      ) & 0xff]       ^
749221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s3 >>  8) & 0xff] <<  8 ^
750221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s0 >> 16) & 0xff] << 16 ^
751221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s1 >> 24)       ] << 24;
752221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	t[3] =	Te4[(s3      ) & 0xff]       ^
753221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s0 >>  8) & 0xff] <<  8 ^
754221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s1 >> 16) & 0xff] << 16 ^
755221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s2 >> 24)       ] << 24;
756221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
757221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	/* now do the linear transform using words */
758221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	{	int i;
759221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		u32 r0, r1, r2;
760221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
761221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		for (i = 0; i < 4; i++) {
762221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			r0 = t[i];
763221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			r1 = r0 & 0x80808080;
764221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			r2 = ((r0 & 0x7f7f7f7f) << 1) ^
765221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				((r1 - (r1 >> 7)) & 0x1b1b1b1b);
766221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#if defined(ROTATE)
767221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			t[i] = r2 ^ ROTATE(r2,24) ^ ROTATE(r0,24) ^
768221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				ROTATE(r0,16) ^ ROTATE(r0,8);
769221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#else
770221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			t[i] = r2 ^ ((r2 ^ r0) << 24) ^ ((r2 ^ r0) >> 8) ^
771221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				(r0 << 16) ^ (r0 >> 16) ^
772221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				(r0 << 8) ^ (r0 >> 24);
773221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#endif
774221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			t[i] ^= rk[i];
775221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		}
776221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	}
777221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#else
778221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	t[0] =	Te0[(s0      ) & 0xff] ^
779221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te1[(s1 >>  8) & 0xff] ^
780221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te2[(s2 >> 16) & 0xff] ^
781221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te3[(s3 >> 24)       ] ^
782221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		rk[0];
783221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	t[1] =	Te0[(s1      ) & 0xff] ^
784221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te1[(s2 >>  8) & 0xff] ^
785221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te2[(s3 >> 16) & 0xff] ^
786221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te3[(s0 >> 24)       ] ^
787221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		rk[1];
788221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	t[2] =	Te0[(s2      ) & 0xff] ^
789221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te1[(s3 >>  8) & 0xff] ^
790221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te2[(s0 >> 16) & 0xff] ^
791221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te3[(s1 >> 24)       ] ^
792221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		rk[2];
793221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	t[3] =	Te0[(s3      ) & 0xff] ^
794221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te1[(s0 >>  8) & 0xff] ^
795221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te2[(s1 >> 16) & 0xff] ^
796221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te3[(s2 >> 24)       ] ^
797221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		rk[3];
798221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#endif
799221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
800221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    }
801221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    /*
802221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	 * apply last round and
803221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	 * map cipher state to byte array block:
804221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	 */
805221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#if defined(AES_COMPACT_IN_OUTER_ROUNDS)
806221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	prefetch256(Te4);
807221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
808221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	*(u32*)(out+0) =
809221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s0      ) & 0xff]       ^
810221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s1 >>  8) & 0xff] <<  8 ^
811221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s2 >> 16) & 0xff] << 16 ^
812221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s3 >> 24)       ] << 24 ^
813221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		rk[0];
814221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	*(u32*)(out+4) =
815221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s1      ) & 0xff]       ^
816221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s2 >>  8) & 0xff] <<  8 ^
817221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s3 >> 16) & 0xff] << 16 ^
818221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s0 >> 24)       ] << 24 ^
819221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		rk[1];
820221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	*(u32*)(out+8) =
821221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s2      ) & 0xff]       ^
822221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s3 >>  8) & 0xff] <<  8 ^
823221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s0 >> 16) & 0xff] << 16 ^
824221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s1 >> 24)       ] << 24 ^
825221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		rk[2];
826221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	*(u32*)(out+12) =
827221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s3      ) & 0xff]       ^
828221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s0 >>  8) & 0xff] <<  8 ^
829221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s1 >> 16) & 0xff] << 16 ^
830221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Te4[(s2 >> 24)       ] << 24 ^
831221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		rk[3];
832221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#else
833221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	*(u32*)(out+0) =
834221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		(Te2[(s0      ) & 0xff] & 0x000000ffU) ^
835221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		(Te3[(s1 >>  8) & 0xff] & 0x0000ff00U) ^
836221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		(Te0[(s2 >> 16) & 0xff] & 0x00ff0000U) ^
837221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		(Te1[(s3 >> 24)       ] & 0xff000000U) ^
838221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		rk[0];
839221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	*(u32*)(out+4) =
840221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		(Te2[(s1      ) & 0xff] & 0x000000ffU) ^
841221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		(Te3[(s2 >>  8) & 0xff] & 0x0000ff00U) ^
842221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		(Te0[(s3 >> 16) & 0xff] & 0x00ff0000U) ^
843221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		(Te1[(s0 >> 24)       ] & 0xff000000U) ^
844221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		rk[1];
845221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	*(u32*)(out+8) =
846221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		(Te2[(s2      ) & 0xff] & 0x000000ffU) ^
847221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		(Te3[(s3 >>  8) & 0xff] & 0x0000ff00U) ^
848221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		(Te0[(s0 >> 16) & 0xff] & 0x00ff0000U) ^
849221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		(Te1[(s1 >> 24)       ] & 0xff000000U) ^
850221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		rk[2];
851221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	*(u32*)(out+12) =
852221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		(Te2[(s3      ) & 0xff] & 0x000000ffU) ^
853221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		(Te3[(s0 >>  8) & 0xff] & 0x0000ff00U) ^
854221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		(Te0[(s1 >> 16) & 0xff] & 0x00ff0000U) ^
855221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		(Te1[(s2 >> 24)       ] & 0xff000000U) ^
856221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		rk[3];
857221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#endif
858221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom}
859221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
860221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom/*
861221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom * Decrypt a single block
862221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom * in and out can overlap
863221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom */
864221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstromvoid AES_decrypt(const unsigned char *in, unsigned char *out,
865221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		 const AES_KEY *key) {
866221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
867221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	const u32 *rk;
868221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	u32 s0, s1, s2, s3, t[4];
869221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	int r;
870221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
871221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	assert(in && out && key);
872221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	rk = key->rd_key;
873221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
874221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	/*
875221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	 * map byte array block to cipher state
876221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	 * and add initial round key:
877221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	 */
878221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	s0 = GETU32(in     ) ^ rk[0];
879221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	s1 = GETU32(in +  4) ^ rk[1];
880221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	s2 = GETU32(in +  8) ^ rk[2];
881221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	s3 = GETU32(in + 12) ^ rk[3];
882221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
883221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#if defined(AES_COMPACT_IN_OUTER_ROUNDS)
884221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	prefetch256(Td4);
885221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
886221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom        t[0] =	Td4[(s0      ) & 0xff]       ^
887221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td4[(s3 >>  8) & 0xff] <<  8 ^
888221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td4[(s2 >> 16) & 0xff] << 16 ^
889221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td4[(s1 >> 24)       ] << 24;
890221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom        t[1] =	Td4[(s1      ) & 0xff]       ^
891221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td4[(s0 >>  8) & 0xff] <<  8 ^
892221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td4[(s3 >> 16) & 0xff] << 16 ^
893221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td4[(s2 >> 24)       ] << 24;
894221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom        t[2] =	Td4[(s2      ) & 0xff]       ^
895221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td4[(s1 >>  8) & 0xff] <<  8 ^
896221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td4[(s0 >> 16) & 0xff] << 16 ^
897221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td4[(s3 >> 24)       ] << 24;
898221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom        t[3] =	Td4[(s3      ) & 0xff]       ^
899221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td4[(s2 >>  8) & 0xff] <<  8 ^
900221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td4[(s1 >> 16) & 0xff] << 16 ^
901221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td4[(s0 >> 24)       ] << 24;
902221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
903221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	/* now do the linear transform using words */
904221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	{	int i;
905221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
906221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
907221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		for (i = 0; i < 4; i++) {
908221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			tp1 = t[i];
909221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			m = tp1 & 0x80808080;
910221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
911221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				((m - (m >> 7)) & 0x1b1b1b1b);
912221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			m = tp2 & 0x80808080;
913221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
914221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				((m - (m >> 7)) & 0x1b1b1b1b);
915221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			m = tp4 & 0x80808080;
916221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
917221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				((m - (m >> 7)) & 0x1b1b1b1b);
918221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			tp9 = tp8 ^ tp1;
919221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			tpb = tp9 ^ tp2;
920221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			tpd = tp9 ^ tp4;
921221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			tpe = tp8 ^ tp4 ^ tp2;
922221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#if defined(ROTATE)
923221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			t[i] = tpe ^ ROTATE(tpd,16) ^
924221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				ROTATE(tp9,8) ^ ROTATE(tpb,24);
925221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#else
926221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			t[i] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
927221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				(tp9 >> 24) ^ (tp9 << 8) ^
928221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				(tpb >> 8) ^ (tpb << 24);
929221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#endif
930221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			t[i] ^= rk[4+i];
931221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		}
932221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	}
933221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#else
934221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	t[0] =	Td0[(s0      ) & 0xff] ^
935221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td1[(s3 >>  8) & 0xff] ^
936221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td2[(s2 >> 16) & 0xff] ^
937221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td3[(s1 >> 24)       ] ^
938221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		rk[4];
939221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	t[1] =	Td0[(s1      ) & 0xff] ^
940221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td1[(s0 >>  8) & 0xff] ^
941221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td2[(s3 >> 16) & 0xff] ^
942221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td3[(s2 >> 24)       ] ^
943221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		rk[5];
944221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	t[2] =	Td0[(s2      ) & 0xff] ^
945221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td1[(s1 >>  8) & 0xff] ^
946221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td2[(s0 >> 16) & 0xff] ^
947221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td3[(s3 >> 24)       ] ^
948221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		rk[6];
949221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	t[3] =	Td0[(s3      ) & 0xff] ^
950221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td1[(s2 >>  8) & 0xff] ^
951221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td2[(s1 >> 16) & 0xff] ^
952221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td3[(s0 >> 24)       ] ^
953221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		rk[7];
954221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#endif
955221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
956221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
957221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    /*
958221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom     * Nr - 2 full rounds:
959221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom     */
960221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    for (rk+=8,r=key->rounds-2; r>0; rk+=4,r--) {
961221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#if defined(AES_COMPACT_IN_INNER_ROUNDS)
962221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom        t[0] =	Td4[(s0      ) & 0xff]       ^
963221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td4[(s3 >>  8) & 0xff] <<  8 ^
964221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td4[(s2 >> 16) & 0xff] << 16 ^
965221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td4[(s1 >> 24)       ] << 24;
966221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom        t[1] =	Td4[(s1      ) & 0xff]       ^
967221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td4[(s0 >>  8) & 0xff] <<  8 ^
968221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td4[(s3 >> 16) & 0xff] << 16 ^
969221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td4[(s2 >> 24)       ] << 24;
970221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom        t[2] =	Td4[(s2      ) & 0xff]       ^
971221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td4[(s1 >>  8) & 0xff] <<  8 ^
972221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td4[(s0 >> 16) & 0xff] << 16 ^
973221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td4[(s3 >> 24)       ] << 24;
974221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom        t[3] =	Td4[(s3      ) & 0xff]       ^
975221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td4[(s2 >>  8) & 0xff] <<  8 ^
976221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td4[(s1 >> 16) & 0xff] << 16 ^
977221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td4[(s0 >> 24)       ] << 24;
978221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
979221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	/* now do the linear transform using words */
980221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	{	int i;
981221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
982221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
983221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		for (i = 0; i < 4; i++) {
984221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			tp1 = t[i];
985221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			m = tp1 & 0x80808080;
986221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
987221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				((m - (m >> 7)) & 0x1b1b1b1b);
988221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			m = tp2 & 0x80808080;
989221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
990221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				((m - (m >> 7)) & 0x1b1b1b1b);
991221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			m = tp4 & 0x80808080;
992221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
993221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				((m - (m >> 7)) & 0x1b1b1b1b);
994221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			tp9 = tp8 ^ tp1;
995221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			tpb = tp9 ^ tp2;
996221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			tpd = tp9 ^ tp4;
997221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			tpe = tp8 ^ tp4 ^ tp2;
998221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#if defined(ROTATE)
999221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			t[i] = tpe ^ ROTATE(tpd,16) ^
1000221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				ROTATE(tp9,8) ^ ROTATE(tpb,24);
1001221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#else
1002221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			t[i] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
1003221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				(tp9 >> 24) ^ (tp9 << 8) ^
1004221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom				(tpb >> 8) ^ (tpb << 24);
1005221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#endif
1006221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom			t[i] ^= rk[i];
1007221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		}
1008221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	}
1009221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#else
1010221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	t[0] =	Td0[(s0      ) & 0xff] ^
1011221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td1[(s3 >>  8) & 0xff] ^
1012221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td2[(s2 >> 16) & 0xff] ^
1013221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td3[(s1 >> 24)       ] ^
1014221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		rk[0];
1015221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	t[1] =	Td0[(s1      ) & 0xff] ^
1016221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td1[(s0 >>  8) & 0xff] ^
1017221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td2[(s3 >> 16) & 0xff] ^
1018221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td3[(s2 >> 24)       ] ^
1019221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		rk[1];
1020221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	t[2] =	Td0[(s2      ) & 0xff] ^
1021221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td1[(s1 >>  8) & 0xff] ^
1022221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td2[(s0 >> 16) & 0xff] ^
1023221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td3[(s3 >> 24)       ] ^
1024221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		rk[2];
1025221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	t[3] =	Td0[(s3      ) & 0xff] ^
1026221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td1[(s2 >>  8) & 0xff] ^
1027221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td2[(s1 >> 16) & 0xff] ^
1028221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		Td3[(s0 >> 24)       ] ^
1029221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		rk[3];
1030221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom#endif
1031221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
1032221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    }
1033221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom    /*
1034221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	 * apply last round and
1035221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	 * map cipher state to byte array block:
1036221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	 */
1037221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	prefetch256(Td4);
1038221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom
1039221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	*(u32*)(out+0) =
1040221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		(Td4[(s0      ) & 0xff])	^
1041221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		(Td4[(s3 >>  8) & 0xff] <<  8) ^
1042221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		(Td4[(s2 >> 16) & 0xff] << 16) ^
1043221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		(Td4[(s1 >> 24)       ] << 24) ^
1044221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		rk[0];
1045221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	*(u32*)(out+4) =
1046221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		(Td4[(s1      ) & 0xff])	 ^
1047221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		(Td4[(s0 >>  8) & 0xff] <<  8) ^
1048221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		(Td4[(s3 >> 16) & 0xff] << 16) ^
1049221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		(Td4[(s2 >> 24)       ] << 24) ^
1050221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		rk[1];
1051221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	*(u32*)(out+8) =
1052221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		(Td4[(s2      ) & 0xff])	 ^
1053221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		(Td4[(s1 >>  8) & 0xff] <<  8) ^
1054221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		(Td4[(s0 >> 16) & 0xff] << 16) ^
1055221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		(Td4[(s3 >> 24)       ] << 24) ^
1056221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		rk[2];
1057221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom	*(u32*)(out+12) =
1058221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		(Td4[(s3      ) & 0xff])	 ^
1059221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		(Td4[(s2 >>  8) & 0xff] <<  8) ^
1060221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		(Td4[(s1 >> 16) & 0xff] << 16) ^
1061221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		(Td4[(s0 >> 24)       ] << 24) ^
1062221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom		rk[3];
1063221304ee937bc0910948a8be1320cb8cc4eb6d36Brian Carlstrom}
1064