1cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky/* Aes.c -- AES encryption / decryption
2f955a79a9fffb09826cf7547f70d08c3798a2f50Tetsuo Osaka2016-05-21 : Igor Pavlov : Public domain */
3cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky
4cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky#include "Precomp.h"
5cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky
6cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky#include "Aes.h"
7cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky#include "CpuArch.h"
8cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky
9cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbeckystatic UInt32 T[256 * 4];
10f955a79a9fffb09826cf7547f70d08c3798a2f50Tetsuo Osakastatic const Byte Sbox[256] = {
11cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
12cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
13cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
14cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
15cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
16cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
17cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
18cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
19cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
20cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
21cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
22cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
23cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
24cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
25cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
26cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16};
27cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky
28cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbeckyvoid MY_FAST_CALL AesCbc_Encode(UInt32 *ivAes, Byte *data, size_t numBlocks);
29cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbeckyvoid MY_FAST_CALL AesCbc_Decode(UInt32 *ivAes, Byte *data, size_t numBlocks);
30cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbeckyvoid MY_FAST_CALL AesCtr_Code(UInt32 *ivAes, Byte *data, size_t numBlocks);
31cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky
32cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbeckyvoid MY_FAST_CALL AesCbc_Encode_Intel(UInt32 *ivAes, Byte *data, size_t numBlocks);
33cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbeckyvoid MY_FAST_CALL AesCbc_Decode_Intel(UInt32 *ivAes, Byte *data, size_t numBlocks);
34cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbeckyvoid MY_FAST_CALL AesCtr_Code_Intel(UInt32 *ivAes, Byte *data, size_t numBlocks);
35cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky
36cd66d540cead3f8200b0c73bad9c276d67896c3dDavid SrbeckyAES_CODE_FUNC g_AesCbc_Encode;
37cd66d540cead3f8200b0c73bad9c276d67896c3dDavid SrbeckyAES_CODE_FUNC g_AesCbc_Decode;
38cd66d540cead3f8200b0c73bad9c276d67896c3dDavid SrbeckyAES_CODE_FUNC g_AesCtr_Code;
39cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky
40cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbeckystatic UInt32 D[256 * 4];
41cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbeckystatic Byte InvS[256];
42cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky
43f955a79a9fffb09826cf7547f70d08c3798a2f50Tetsuo Osakastatic const Byte Rcon[11] = { 0x00, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36 };
44cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky
45cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky#define xtime(x) ((((x) << 1) ^ (((x) & 0x80) != 0 ? 0x1B : 0)) & 0xFF)
46cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky
47cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky#define Ui32(a0, a1, a2, a3) ((UInt32)(a0) | ((UInt32)(a1) << 8) | ((UInt32)(a2) << 16) | ((UInt32)(a3) << 24))
48cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky
49cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky#define gb0(x) ( (x)          & 0xFF)
50cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky#define gb1(x) (((x) >> ( 8)) & 0xFF)
51cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky#define gb2(x) (((x) >> (16)) & 0xFF)
52cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky#define gb3(x) (((x) >> (24)) & 0xFF)
53cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky
54cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbeckyvoid AesGenTables(void)
55cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky{
56cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  unsigned i;
57cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  for (i = 0; i < 256; i++)
58cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    InvS[Sbox[i]] = (Byte)i;
59f955a79a9fffb09826cf7547f70d08c3798a2f50Tetsuo Osaka
60cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  for (i = 0; i < 256; i++)
61cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  {
62cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    {
63cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky      UInt32 a1 = Sbox[i];
64cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky      UInt32 a2 = xtime(a1);
65cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky      UInt32 a3 = a2 ^ a1;
66cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky      T[        i] = Ui32(a2, a1, a1, a3);
67cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky      T[0x100 + i] = Ui32(a3, a2, a1, a1);
68cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky      T[0x200 + i] = Ui32(a1, a3, a2, a1);
69cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky      T[0x300 + i] = Ui32(a1, a1, a3, a2);
70cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    }
71cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    {
72cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky      UInt32 a1 = InvS[i];
73cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky      UInt32 a2 = xtime(a1);
74cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky      UInt32 a4 = xtime(a2);
75cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky      UInt32 a8 = xtime(a4);
76cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky      UInt32 a9 = a8 ^ a1;
77cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky      UInt32 aB = a8 ^ a2 ^ a1;
78cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky      UInt32 aD = a8 ^ a4 ^ a1;
79cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky      UInt32 aE = a8 ^ a4 ^ a2;
80cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky      D[        i] = Ui32(aE, a9, aD, aB);
81cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky      D[0x100 + i] = Ui32(aB, aE, a9, aD);
82cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky      D[0x200 + i] = Ui32(aD, aB, aE, a9);
83cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky      D[0x300 + i] = Ui32(a9, aD, aB, aE);
84cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    }
85cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  }
86f955a79a9fffb09826cf7547f70d08c3798a2f50Tetsuo Osaka
87cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  g_AesCbc_Encode = AesCbc_Encode;
88cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  g_AesCbc_Decode = AesCbc_Decode;
89cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  g_AesCtr_Code = AesCtr_Code;
90f955a79a9fffb09826cf7547f70d08c3798a2f50Tetsuo Osaka
91cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  #ifdef MY_CPU_X86_OR_AMD64
92cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  if (CPU_Is_Aes_Supported())
93cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  {
94cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    g_AesCbc_Encode = AesCbc_Encode_Intel;
95cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    g_AesCbc_Decode = AesCbc_Decode_Intel;
96cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    g_AesCtr_Code = AesCtr_Code_Intel;
97cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  }
98cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  #endif
99cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky}
100cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky
101f955a79a9fffb09826cf7547f70d08c3798a2f50Tetsuo Osaka
102cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky#define HT(i, x, s) (T + (x << 8))[gb ## x(s[(i + x) & 3])]
103f955a79a9fffb09826cf7547f70d08c3798a2f50Tetsuo Osaka
104cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky#define HT4(m, i, s, p) m[i] = \
105cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    HT(i, 0, s) ^ \
106cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    HT(i, 1, s) ^ \
107cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    HT(i, 2, s) ^ \
108cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    HT(i, 3, s) ^ w[p + i]
109f955a79a9fffb09826cf7547f70d08c3798a2f50Tetsuo Osaka
110cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky#define HT16(m, s, p) \
111cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    HT4(m, 0, s, p); \
112cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    HT4(m, 1, s, p); \
113f955a79a9fffb09826cf7547f70d08c3798a2f50Tetsuo Osaka    HT4(m, 2, s, p); \
114f955a79a9fffb09826cf7547f70d08c3798a2f50Tetsuo Osaka    HT4(m, 3, s, p); \
115cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky
116cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky#define FT(i, x) Sbox[gb ## x(m[(i + x) & 3])]
117cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky#define FT4(i) dest[i] = Ui32(FT(i, 0), FT(i, 1), FT(i, 2), FT(i, 3)) ^ w[i];
118cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky
119f955a79a9fffb09826cf7547f70d08c3798a2f50Tetsuo Osaka
120cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky#define HD(i, x, s) (D + (x << 8))[gb ## x(s[(i - x) & 3])]
121f955a79a9fffb09826cf7547f70d08c3798a2f50Tetsuo Osaka
122cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky#define HD4(m, i, s, p) m[i] = \
123cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    HD(i, 0, s) ^ \
124cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    HD(i, 1, s) ^ \
125cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    HD(i, 2, s) ^ \
126cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    HD(i, 3, s) ^ w[p + i];
127f955a79a9fffb09826cf7547f70d08c3798a2f50Tetsuo Osaka
128cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky#define HD16(m, s, p) \
129cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    HD4(m, 0, s, p); \
130f955a79a9fffb09826cf7547f70d08c3798a2f50Tetsuo Osaka    HD4(m, 1, s, p); \
131cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    HD4(m, 2, s, p); \
132cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    HD4(m, 3, s, p); \
133cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky
134cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky#define FD(i, x) InvS[gb ## x(m[(i - x) & 3])]
135cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky#define FD4(i) dest[i] = Ui32(FD(i, 0), FD(i, 1), FD(i, 2), FD(i, 3)) ^ w[i];
136cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky
137cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbeckyvoid MY_FAST_CALL Aes_SetKey_Enc(UInt32 *w, const Byte *key, unsigned keySize)
138cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky{
139cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  unsigned i, wSize;
140cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  wSize = keySize + 28;
141cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  keySize /= 4;
142cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  w[0] = ((UInt32)keySize / 2) + 3;
143cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  w += 4;
144cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky
145cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  for (i = 0; i < keySize; i++, key += 4)
146cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    w[i] = GetUi32(key);
147cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky
148cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  for (; i < wSize; i++)
149cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  {
150cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    UInt32 t = w[i - 1];
151cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    unsigned rem = i % keySize;
152cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    if (rem == 0)
153cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky      t = Ui32(Sbox[gb1(t)] ^ Rcon[i / keySize], Sbox[gb2(t)], Sbox[gb3(t)], Sbox[gb0(t)]);
154cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    else if (keySize > 6 && rem == 4)
155cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky      t = Ui32(Sbox[gb0(t)], Sbox[gb1(t)], Sbox[gb2(t)], Sbox[gb3(t)]);
156cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    w[i] = w[i - keySize] ^ t;
157cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  }
158cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky}
159cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky
160cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbeckyvoid MY_FAST_CALL Aes_SetKey_Dec(UInt32 *w, const Byte *key, unsigned keySize)
161cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky{
162cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  unsigned i, num;
163cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  Aes_SetKey_Enc(w, key, keySize);
164cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  num = keySize + 20;
165cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  w += 8;
166cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  for (i = 0; i < num; i++)
167cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  {
168cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    UInt32 r = w[i];
169cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    w[i] =
170f955a79a9fffb09826cf7547f70d08c3798a2f50Tetsuo Osaka      D[        (unsigned)Sbox[gb0(r)]] ^
171f955a79a9fffb09826cf7547f70d08c3798a2f50Tetsuo Osaka      D[0x100 + (unsigned)Sbox[gb1(r)]] ^
172f955a79a9fffb09826cf7547f70d08c3798a2f50Tetsuo Osaka      D[0x200 + (unsigned)Sbox[gb2(r)]] ^
173f955a79a9fffb09826cf7547f70d08c3798a2f50Tetsuo Osaka      D[0x300 + (unsigned)Sbox[gb3(r)]];
174cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  }
175cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky}
176cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky
177cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky/* Aes_Encode and Aes_Decode functions work with little-endian words.
178cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  src and dest are pointers to 4 UInt32 words.
179f955a79a9fffb09826cf7547f70d08c3798a2f50Tetsuo Osaka  src and dest can point to same block */
180cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky
181cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbeckystatic void Aes_Encode(const UInt32 *w, UInt32 *dest, const UInt32 *src)
182cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky{
183cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  UInt32 s[4];
184cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  UInt32 m[4];
185cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  UInt32 numRounds2 = w[0];
186cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  w += 4;
187cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  s[0] = src[0] ^ w[0];
188cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  s[1] = src[1] ^ w[1];
189cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  s[2] = src[2] ^ w[2];
190cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  s[3] = src[3] ^ w[3];
191cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  w += 4;
192cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  for (;;)
193cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  {
194cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    HT16(m, s, 0);
195cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    if (--numRounds2 == 0)
196cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky      break;
197cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    HT16(s, m, 4);
198cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    w += 8;
199cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  }
200cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  w += 4;
201cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  FT4(0); FT4(1); FT4(2); FT4(3);
202cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky}
203cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky
204cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbeckystatic void Aes_Decode(const UInt32 *w, UInt32 *dest, const UInt32 *src)
205cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky{
206cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  UInt32 s[4];
207cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  UInt32 m[4];
208cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  UInt32 numRounds2 = w[0];
209cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  w += 4 + numRounds2 * 8;
210cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  s[0] = src[0] ^ w[0];
211cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  s[1] = src[1] ^ w[1];
212cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  s[2] = src[2] ^ w[2];
213cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  s[3] = src[3] ^ w[3];
214cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  for (;;)
215cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  {
216cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    w -= 8;
217cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    HD16(m, s, 4);
218cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    if (--numRounds2 == 0)
219cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky      break;
220cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    HD16(s, m, 0);
221cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  }
222cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  FD4(0); FD4(1); FD4(2); FD4(3);
223cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky}
224cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky
225cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbeckyvoid AesCbc_Init(UInt32 *p, const Byte *iv)
226cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky{
227cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  unsigned i;
228cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  for (i = 0; i < 4; i++)
229cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    p[i] = GetUi32(iv + i * 4);
230cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky}
231cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky
232cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbeckyvoid MY_FAST_CALL AesCbc_Encode(UInt32 *p, Byte *data, size_t numBlocks)
233cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky{
234cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  for (; numBlocks != 0; numBlocks--, data += AES_BLOCK_SIZE)
235cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  {
236cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    p[0] ^= GetUi32(data);
237cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    p[1] ^= GetUi32(data + 4);
238cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    p[2] ^= GetUi32(data + 8);
239cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    p[3] ^= GetUi32(data + 12);
240cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky
241cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    Aes_Encode(p + 4, p, p);
242cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky
243cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    SetUi32(data,      p[0]);
244cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    SetUi32(data + 4,  p[1]);
245cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    SetUi32(data + 8,  p[2]);
246cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    SetUi32(data + 12, p[3]);
247cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  }
248cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky}
249cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky
250cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbeckyvoid MY_FAST_CALL AesCbc_Decode(UInt32 *p, Byte *data, size_t numBlocks)
251cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky{
252cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  UInt32 in[4], out[4];
253cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  for (; numBlocks != 0; numBlocks--, data += AES_BLOCK_SIZE)
254cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  {
255cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    in[0] = GetUi32(data);
256cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    in[1] = GetUi32(data + 4);
257cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    in[2] = GetUi32(data + 8);
258cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    in[3] = GetUi32(data + 12);
259cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky
260cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    Aes_Decode(p + 4, out, in);
261cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky
262cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    SetUi32(data,      p[0] ^ out[0]);
263cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    SetUi32(data + 4,  p[1] ^ out[1]);
264cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    SetUi32(data + 8,  p[2] ^ out[2]);
265cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    SetUi32(data + 12, p[3] ^ out[3]);
266cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky
267cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    p[0] = in[0];
268cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    p[1] = in[1];
269cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    p[2] = in[2];
270cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    p[3] = in[3];
271cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  }
272cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky}
273cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky
274cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbeckyvoid MY_FAST_CALL AesCtr_Code(UInt32 *p, Byte *data, size_t numBlocks)
275cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky{
276cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  for (; numBlocks != 0; numBlocks--)
277cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  {
278cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    UInt32 temp[4];
279cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    Byte buf[16];
280cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    int i;
281f955a79a9fffb09826cf7547f70d08c3798a2f50Tetsuo Osaka
282cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    if (++p[0] == 0)
283cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky      p[1]++;
284f955a79a9fffb09826cf7547f70d08c3798a2f50Tetsuo Osaka
285cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    Aes_Encode(p + 4, temp, p);
286f955a79a9fffb09826cf7547f70d08c3798a2f50Tetsuo Osaka
287cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    SetUi32(buf,      temp[0]);
288cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    SetUi32(buf + 4,  temp[1]);
289cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    SetUi32(buf + 8,  temp[2]);
290cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    SetUi32(buf + 12, temp[3]);
291f955a79a9fffb09826cf7547f70d08c3798a2f50Tetsuo Osaka
292cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky    for (i = 0; i < 16; i++)
293cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky      *data++ ^= buf[i];
294cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky  }
295cd66d540cead3f8200b0c73bad9c276d67896c3dDavid Srbecky}
296