1/*
2 ---------------------------------------------------------------------------
3 Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
4
5 LICENSE TERMS
6
7 The redistribution and use of this software (with or without changes)
8 is allowed without the payment of fees or royalties provided that:
9
10  1. source code distributions include the above copyright notice, this
11     list of conditions and the following disclaimer;
12
13  2. binary distributions include the above copyright notice, this list
14     of conditions and the following disclaimer in their documentation;
15
16  3. the name of the copyright holder is not used to endorse products
17     built using this software without specific written permission.
18
19 DISCLAIMER
20
21 This software is provided 'as is' with no explicit or implied warranties
22 in respect of its properties, including, but not limited to, correctness
23 and/or fitness for purpose.
24 ---------------------------------------------------------------------------
25 Issue 09/09/2006
26
27 This is an AES implementation that uses only 8-bit byte operations on the
28 cipher state (there are options to use 32-bit types if available).
29
30 The combination of mix columns and byte substitution used here is based on
31 that developed by Karl Malbrain. His contribution is acknowledged.
32 */
33
34/* define if you have a fast memcpy function on your system */
35#if 1
36#  define HAVE_MEMCPY
37#  include <string.h>
38#if 0
39#  if defined( _MSC_VER )
40#    include <intrin.h>
41#    pragma intrinsic( memcpy )
42#  endif
43#endif
44#endif
45
46#include <stdlib.h>
47
48/* define if you have fast 32-bit types on your system */
49#if 1
50#  define HAVE_UINT_32T
51#endif
52
53/* define if you don't want any tables */
54#if 1
55#  define USE_TABLES
56#endif
57
58/*  On Intel Core 2 duo VERSION_1 is faster */
59
60/* alternative versions (test for performance on your system) */
61#if 1
62#  define VERSION_1
63#endif
64
65#include "aes.h"
66
67#if defined( HAVE_UINT_32T )
68  typedef unsigned long uint_32t;
69#endif
70
71/* functions for finite field multiplication in the AES Galois field    */
72
73#define WPOLY   0x011b
74#define BPOLY     0x1b
75#define DPOLY   0x008d
76
77#define f1(x)   (x)
78#define f2(x)   ((x << 1) ^ (((x >> 7) & 1) * WPOLY))
79#define f4(x)   ((x << 2) ^ (((x >> 6) & 1) * WPOLY) ^ (((x >> 6) & 2) * WPOLY))
80#define f8(x)   ((x << 3) ^ (((x >> 5) & 1) * WPOLY) ^ (((x >> 5) & 2) * WPOLY) \
81                          ^ (((x >> 5) & 4) * WPOLY))
82#define d2(x)   (((x) >> 1) ^ ((x) & 1 ? DPOLY : 0))
83
84#define f3(x)   (f2(x) ^ x)
85#define f9(x)   (f8(x) ^ x)
86#define fb(x)   (f8(x) ^ f2(x) ^ x)
87#define fd(x)   (f8(x) ^ f4(x) ^ x)
88#define fe(x)   (f8(x) ^ f4(x) ^ f2(x))
89
90#if defined( USE_TABLES )
91
92#define sb_data(w) {    /* S Box data values */                            \
93    w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\
94    w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\
95    w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\
96    w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\
97    w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\
98    w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\
99    w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\
100    w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\
101    w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\
102    w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\
103    w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\
104    w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\
105    w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\
106    w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\
107    w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\
108    w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\
109    w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\
110    w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\
111    w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\
112    w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\
113    w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\
114    w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\
115    w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\
116    w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\
117    w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\
118    w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\
119    w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\
120    w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\
121    w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\
122    w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\
123    w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\
124    w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) }
125
126#define isb_data(w) {   /* inverse S Box data values */                    \
127    w(0x52), w(0x09), w(0x6a), w(0xd5), w(0x30), w(0x36), w(0xa5), w(0x38),\
128    w(0xbf), w(0x40), w(0xa3), w(0x9e), w(0x81), w(0xf3), w(0xd7), w(0xfb),\
129    w(0x7c), w(0xe3), w(0x39), w(0x82), w(0x9b), w(0x2f), w(0xff), w(0x87),\
130    w(0x34), w(0x8e), w(0x43), w(0x44), w(0xc4), w(0xde), w(0xe9), w(0xcb),\
131    w(0x54), w(0x7b), w(0x94), w(0x32), w(0xa6), w(0xc2), w(0x23), w(0x3d),\
132    w(0xee), w(0x4c), w(0x95), w(0x0b), w(0x42), w(0xfa), w(0xc3), w(0x4e),\
133    w(0x08), w(0x2e), w(0xa1), w(0x66), w(0x28), w(0xd9), w(0x24), w(0xb2),\
134    w(0x76), w(0x5b), w(0xa2), w(0x49), w(0x6d), w(0x8b), w(0xd1), w(0x25),\
135    w(0x72), w(0xf8), w(0xf6), w(0x64), w(0x86), w(0x68), w(0x98), w(0x16),\
136    w(0xd4), w(0xa4), w(0x5c), w(0xcc), w(0x5d), w(0x65), w(0xb6), w(0x92),\
137    w(0x6c), w(0x70), w(0x48), w(0x50), w(0xfd), w(0xed), w(0xb9), w(0xda),\
138    w(0x5e), w(0x15), w(0x46), w(0x57), w(0xa7), w(0x8d), w(0x9d), w(0x84),\
139    w(0x90), w(0xd8), w(0xab), w(0x00), w(0x8c), w(0xbc), w(0xd3), w(0x0a),\
140    w(0xf7), w(0xe4), w(0x58), w(0x05), w(0xb8), w(0xb3), w(0x45), w(0x06),\
141    w(0xd0), w(0x2c), w(0x1e), w(0x8f), w(0xca), w(0x3f), w(0x0f), w(0x02),\
142    w(0xc1), w(0xaf), w(0xbd), w(0x03), w(0x01), w(0x13), w(0x8a), w(0x6b),\
143    w(0x3a), w(0x91), w(0x11), w(0x41), w(0x4f), w(0x67), w(0xdc), w(0xea),\
144    w(0x97), w(0xf2), w(0xcf), w(0xce), w(0xf0), w(0xb4), w(0xe6), w(0x73),\
145    w(0x96), w(0xac), w(0x74), w(0x22), w(0xe7), w(0xad), w(0x35), w(0x85),\
146    w(0xe2), w(0xf9), w(0x37), w(0xe8), w(0x1c), w(0x75), w(0xdf), w(0x6e),\
147    w(0x47), w(0xf1), w(0x1a), w(0x71), w(0x1d), w(0x29), w(0xc5), w(0x89),\
148    w(0x6f), w(0xb7), w(0x62), w(0x0e), w(0xaa), w(0x18), w(0xbe), w(0x1b),\
149    w(0xfc), w(0x56), w(0x3e), w(0x4b), w(0xc6), w(0xd2), w(0x79), w(0x20),\
150    w(0x9a), w(0xdb), w(0xc0), w(0xfe), w(0x78), w(0xcd), w(0x5a), w(0xf4),\
151    w(0x1f), w(0xdd), w(0xa8), w(0x33), w(0x88), w(0x07), w(0xc7), w(0x31),\
152    w(0xb1), w(0x12), w(0x10), w(0x59), w(0x27), w(0x80), w(0xec), w(0x5f),\
153    w(0x60), w(0x51), w(0x7f), w(0xa9), w(0x19), w(0xb5), w(0x4a), w(0x0d),\
154    w(0x2d), w(0xe5), w(0x7a), w(0x9f), w(0x93), w(0xc9), w(0x9c), w(0xef),\
155    w(0xa0), w(0xe0), w(0x3b), w(0x4d), w(0xae), w(0x2a), w(0xf5), w(0xb0),\
156    w(0xc8), w(0xeb), w(0xbb), w(0x3c), w(0x83), w(0x53), w(0x99), w(0x61),\
157    w(0x17), w(0x2b), w(0x04), w(0x7e), w(0xba), w(0x77), w(0xd6), w(0x26),\
158    w(0xe1), w(0x69), w(0x14), w(0x63), w(0x55), w(0x21), w(0x0c), w(0x7d) }
159
160#define mm_data(w) {    /* basic data for forming finite field tables */   \
161    w(0x00), w(0x01), w(0x02), w(0x03), w(0x04), w(0x05), w(0x06), w(0x07),\
162    w(0x08), w(0x09), w(0x0a), w(0x0b), w(0x0c), w(0x0d), w(0x0e), w(0x0f),\
163    w(0x10), w(0x11), w(0x12), w(0x13), w(0x14), w(0x15), w(0x16), w(0x17),\
164    w(0x18), w(0x19), w(0x1a), w(0x1b), w(0x1c), w(0x1d), w(0x1e), w(0x1f),\
165    w(0x20), w(0x21), w(0x22), w(0x23), w(0x24), w(0x25), w(0x26), w(0x27),\
166    w(0x28), w(0x29), w(0x2a), w(0x2b), w(0x2c), w(0x2d), w(0x2e), w(0x2f),\
167    w(0x30), w(0x31), w(0x32), w(0x33), w(0x34), w(0x35), w(0x36), w(0x37),\
168    w(0x38), w(0x39), w(0x3a), w(0x3b), w(0x3c), w(0x3d), w(0x3e), w(0x3f),\
169    w(0x40), w(0x41), w(0x42), w(0x43), w(0x44), w(0x45), w(0x46), w(0x47),\
170    w(0x48), w(0x49), w(0x4a), w(0x4b), w(0x4c), w(0x4d), w(0x4e), w(0x4f),\
171    w(0x50), w(0x51), w(0x52), w(0x53), w(0x54), w(0x55), w(0x56), w(0x57),\
172    w(0x58), w(0x59), w(0x5a), w(0x5b), w(0x5c), w(0x5d), w(0x5e), w(0x5f),\
173    w(0x60), w(0x61), w(0x62), w(0x63), w(0x64), w(0x65), w(0x66), w(0x67),\
174    w(0x68), w(0x69), w(0x6a), w(0x6b), w(0x6c), w(0x6d), w(0x6e), w(0x6f),\
175    w(0x70), w(0x71), w(0x72), w(0x73), w(0x74), w(0x75), w(0x76), w(0x77),\
176    w(0x78), w(0x79), w(0x7a), w(0x7b), w(0x7c), w(0x7d), w(0x7e), w(0x7f),\
177    w(0x80), w(0x81), w(0x82), w(0x83), w(0x84), w(0x85), w(0x86), w(0x87),\
178    w(0x88), w(0x89), w(0x8a), w(0x8b), w(0x8c), w(0x8d), w(0x8e), w(0x8f),\
179    w(0x90), w(0x91), w(0x92), w(0x93), w(0x94), w(0x95), w(0x96), w(0x97),\
180    w(0x98), w(0x99), w(0x9a), w(0x9b), w(0x9c), w(0x9d), w(0x9e), w(0x9f),\
181    w(0xa0), w(0xa1), w(0xa2), w(0xa3), w(0xa4), w(0xa5), w(0xa6), w(0xa7),\
182    w(0xa8), w(0xa9), w(0xaa), w(0xab), w(0xac), w(0xad), w(0xae), w(0xaf),\
183    w(0xb0), w(0xb1), w(0xb2), w(0xb3), w(0xb4), w(0xb5), w(0xb6), w(0xb7),\
184    w(0xb8), w(0xb9), w(0xba), w(0xbb), w(0xbc), w(0xbd), w(0xbe), w(0xbf),\
185    w(0xc0), w(0xc1), w(0xc2), w(0xc3), w(0xc4), w(0xc5), w(0xc6), w(0xc7),\
186    w(0xc8), w(0xc9), w(0xca), w(0xcb), w(0xcc), w(0xcd), w(0xce), w(0xcf),\
187    w(0xd0), w(0xd1), w(0xd2), w(0xd3), w(0xd4), w(0xd5), w(0xd6), w(0xd7),\
188    w(0xd8), w(0xd9), w(0xda), w(0xdb), w(0xdc), w(0xdd), w(0xde), w(0xdf),\
189    w(0xe0), w(0xe1), w(0xe2), w(0xe3), w(0xe4), w(0xe5), w(0xe6), w(0xe7),\
190    w(0xe8), w(0xe9), w(0xea), w(0xeb), w(0xec), w(0xed), w(0xee), w(0xef),\
191    w(0xf0), w(0xf1), w(0xf2), w(0xf3), w(0xf4), w(0xf5), w(0xf6), w(0xf7),\
192    w(0xf8), w(0xf9), w(0xfa), w(0xfb), w(0xfc), w(0xfd), w(0xfe), w(0xff) }
193
194static const uint_8t sbox[256]  =  sb_data(f1);
195static const uint_8t isbox[256] = isb_data(f1);
196
197static const uint_8t gfm2_sbox[256] = sb_data(f2);
198static const uint_8t gfm3_sbox[256] = sb_data(f3);
199
200static const uint_8t gfmul_9[256] = mm_data(f9);
201static const uint_8t gfmul_b[256] = mm_data(fb);
202static const uint_8t gfmul_d[256] = mm_data(fd);
203static const uint_8t gfmul_e[256] = mm_data(fe);
204
205#define s_box(x)     sbox[(x)]
206#define is_box(x)    isbox[(x)]
207#define gfm2_sb(x)   gfm2_sbox[(x)]
208#define gfm3_sb(x)   gfm3_sbox[(x)]
209#define gfm_9(x)     gfmul_9[(x)]
210#define gfm_b(x)     gfmul_b[(x)]
211#define gfm_d(x)     gfmul_d[(x)]
212#define gfm_e(x)     gfmul_e[(x)]
213
214#else
215
216/* this is the high bit of x right shifted by 1 */
217/* position. Since the starting polynomial has  */
218/* 9 bits (0x11b), this right shift keeps the   */
219/* values of all top bits within a byte         */
220
221static uint_8t hibit(const uint_8t x)
222{   uint_8t r = (uint_8t)((x >> 1) | (x >> 2));
223
224    r |= (r >> 2);
225    r |= (r >> 4);
226    return (r + 1) >> 1;
227}
228
229/* return the inverse of the finite field element x */
230
231static uint_8t gf_inv(const uint_8t x)
232{   uint_8t p1 = x, p2 = BPOLY, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0;
233
234    if(x < 2)
235        return x;
236
237    for( ; ; )
238    {
239        if(n1)
240            while(n2 >= n1)             /* divide polynomial p2 by p1    */
241            {
242                n2 /= n1;               /* shift smaller polynomial left */
243                p2 ^= (p1 * n2) & 0xff; /* and remove from larger one    */
244                v2 ^= (v1 * n2);        /* shift accumulated value and   */
245                n2 = hibit(p2);         /* add into result               */
246            }
247        else
248            return v1;
249
250        if(n2)                          /* repeat with values swapped    */
251            while(n1 >= n2)
252            {
253                n1 /= n2;
254                p1 ^= p2 * n1;
255                v1 ^= v2 * n1;
256                n1 = hibit(p1);
257            }
258        else
259            return v2;
260    }
261}
262
263/* The forward and inverse affine transformations used in the S-box */
264uint_8t fwd_affine(const uint_8t x)
265{
266#if defined( HAVE_UINT_32T )
267    uint_32t w = x;
268    w ^= (w << 1) ^ (w << 2) ^ (w << 3) ^ (w << 4);
269    return 0x63 ^ ((w ^ (w >> 8)) & 0xff);
270#else
271    return 0x63 ^ x ^ (x << 1) ^ (x << 2) ^ (x << 3) ^ (x << 4)
272                    ^ (x >> 7) ^ (x >> 6) ^ (x >> 5) ^ (x >> 4);
273#endif
274}
275
276uint_8t inv_affine(const uint_8t x)
277{
278#if defined( HAVE_UINT_32T )
279    uint_32t w = x;
280    w = (w << 1) ^ (w << 3) ^ (w << 6);
281    return 0x05 ^ ((w ^ (w >> 8)) & 0xff);
282#else
283    return 0x05 ^ (x << 1) ^ (x << 3) ^ (x << 6)
284                ^ (x >> 7) ^ (x >> 5) ^ (x >> 2);
285#endif
286}
287
288#define s_box(x)   fwd_affine(gf_inv(x))
289#define is_box(x)  gf_inv(inv_affine(x))
290#define gfm2_sb(x) f2(s_box(x))
291#define gfm3_sb(x) f3(s_box(x))
292#define gfm_9(x)   f9(x)
293#define gfm_b(x)   fb(x)
294#define gfm_d(x)   fd(x)
295#define gfm_e(x)   fe(x)
296
297#endif
298
299#if defined( HAVE_MEMCPY )
300#  define block_copy_nn(d, s, l)    memcpy(d, s, l)
301#  define block_copy(d, s)          memcpy(d, s, N_BLOCK)
302#else
303#  define block_copy_nn(d, s, l)    copy_block_nn(d, s, l)
304#  define block_copy(d, s)          copy_block(d, s)
305#endif
306
307#if !defined( HAVE_MEMCPY )
308static void copy_block( void *d, const void *s )
309{
310#if defined( HAVE_UINT_32T )
311    ((uint_32t*)d)[ 0] = ((uint_32t*)s)[ 0];
312    ((uint_32t*)d)[ 1] = ((uint_32t*)s)[ 1];
313    ((uint_32t*)d)[ 2] = ((uint_32t*)s)[ 2];
314    ((uint_32t*)d)[ 3] = ((uint_32t*)s)[ 3];
315#else
316    ((uint_8t*)d)[ 0] = ((uint_8t*)s)[ 0];
317    ((uint_8t*)d)[ 1] = ((uint_8t*)s)[ 1];
318    ((uint_8t*)d)[ 2] = ((uint_8t*)s)[ 2];
319    ((uint_8t*)d)[ 3] = ((uint_8t*)s)[ 3];
320    ((uint_8t*)d)[ 4] = ((uint_8t*)s)[ 4];
321    ((uint_8t*)d)[ 5] = ((uint_8t*)s)[ 5];
322    ((uint_8t*)d)[ 6] = ((uint_8t*)s)[ 6];
323    ((uint_8t*)d)[ 7] = ((uint_8t*)s)[ 7];
324    ((uint_8t*)d)[ 8] = ((uint_8t*)s)[ 8];
325    ((uint_8t*)d)[ 9] = ((uint_8t*)s)[ 9];
326    ((uint_8t*)d)[10] = ((uint_8t*)s)[10];
327    ((uint_8t*)d)[11] = ((uint_8t*)s)[11];
328    ((uint_8t*)d)[12] = ((uint_8t*)s)[12];
329    ((uint_8t*)d)[13] = ((uint_8t*)s)[13];
330    ((uint_8t*)d)[14] = ((uint_8t*)s)[14];
331    ((uint_8t*)d)[15] = ((uint_8t*)s)[15];
332#endif
333}
334
335static void copy_block_nn( void * d, const void *s, uint_8t nn )
336{
337    while( nn-- )
338        *((uint_8t*)d)++ = *((uint_8t*)s)++;
339}
340#endif
341
342static void xor_block( void *d, const void *s )
343{
344#if defined( HAVE_UINT_32T )
345    ((uint_32t*)d)[ 0] ^= ((uint_32t*)s)[ 0];
346    ((uint_32t*)d)[ 1] ^= ((uint_32t*)s)[ 1];
347    ((uint_32t*)d)[ 2] ^= ((uint_32t*)s)[ 2];
348    ((uint_32t*)d)[ 3] ^= ((uint_32t*)s)[ 3];
349#else
350    ((uint_8t*)d)[ 0] ^= ((uint_8t*)s)[ 0];
351    ((uint_8t*)d)[ 1] ^= ((uint_8t*)s)[ 1];
352    ((uint_8t*)d)[ 2] ^= ((uint_8t*)s)[ 2];
353    ((uint_8t*)d)[ 3] ^= ((uint_8t*)s)[ 3];
354    ((uint_8t*)d)[ 4] ^= ((uint_8t*)s)[ 4];
355    ((uint_8t*)d)[ 5] ^= ((uint_8t*)s)[ 5];
356    ((uint_8t*)d)[ 6] ^= ((uint_8t*)s)[ 6];
357    ((uint_8t*)d)[ 7] ^= ((uint_8t*)s)[ 7];
358    ((uint_8t*)d)[ 8] ^= ((uint_8t*)s)[ 8];
359    ((uint_8t*)d)[ 9] ^= ((uint_8t*)s)[ 9];
360    ((uint_8t*)d)[10] ^= ((uint_8t*)s)[10];
361    ((uint_8t*)d)[11] ^= ((uint_8t*)s)[11];
362    ((uint_8t*)d)[12] ^= ((uint_8t*)s)[12];
363    ((uint_8t*)d)[13] ^= ((uint_8t*)s)[13];
364    ((uint_8t*)d)[14] ^= ((uint_8t*)s)[14];
365    ((uint_8t*)d)[15] ^= ((uint_8t*)s)[15];
366#endif
367}
368
369static void copy_and_key( void *d, const void *s, const void *k )
370{
371#if defined( HAVE_UINT_32T )
372    ((uint_32t*)d)[ 0] = ((uint_32t*)s)[ 0] ^ ((uint_32t*)k)[ 0];
373    ((uint_32t*)d)[ 1] = ((uint_32t*)s)[ 1] ^ ((uint_32t*)k)[ 1];
374    ((uint_32t*)d)[ 2] = ((uint_32t*)s)[ 2] ^ ((uint_32t*)k)[ 2];
375    ((uint_32t*)d)[ 3] = ((uint_32t*)s)[ 3] ^ ((uint_32t*)k)[ 3];
376#elif 1
377    ((uint_8t*)d)[ 0] = ((uint_8t*)s)[ 0] ^ ((uint_8t*)k)[ 0];
378    ((uint_8t*)d)[ 1] = ((uint_8t*)s)[ 1] ^ ((uint_8t*)k)[ 1];
379    ((uint_8t*)d)[ 2] = ((uint_8t*)s)[ 2] ^ ((uint_8t*)k)[ 2];
380    ((uint_8t*)d)[ 3] = ((uint_8t*)s)[ 3] ^ ((uint_8t*)k)[ 3];
381    ((uint_8t*)d)[ 4] = ((uint_8t*)s)[ 4] ^ ((uint_8t*)k)[ 4];
382    ((uint_8t*)d)[ 5] = ((uint_8t*)s)[ 5] ^ ((uint_8t*)k)[ 5];
383    ((uint_8t*)d)[ 6] = ((uint_8t*)s)[ 6] ^ ((uint_8t*)k)[ 6];
384    ((uint_8t*)d)[ 7] = ((uint_8t*)s)[ 7] ^ ((uint_8t*)k)[ 7];
385    ((uint_8t*)d)[ 8] = ((uint_8t*)s)[ 8] ^ ((uint_8t*)k)[ 8];
386    ((uint_8t*)d)[ 9] = ((uint_8t*)s)[ 9] ^ ((uint_8t*)k)[ 9];
387    ((uint_8t*)d)[10] = ((uint_8t*)s)[10] ^ ((uint_8t*)k)[10];
388    ((uint_8t*)d)[11] = ((uint_8t*)s)[11] ^ ((uint_8t*)k)[11];
389    ((uint_8t*)d)[12] = ((uint_8t*)s)[12] ^ ((uint_8t*)k)[12];
390    ((uint_8t*)d)[13] = ((uint_8t*)s)[13] ^ ((uint_8t*)k)[13];
391    ((uint_8t*)d)[14] = ((uint_8t*)s)[14] ^ ((uint_8t*)k)[14];
392    ((uint_8t*)d)[15] = ((uint_8t*)s)[15] ^ ((uint_8t*)k)[15];
393#else
394    block_copy(d, s);
395    xor_block(d, k);
396#endif
397}
398
399static void add_round_key( uint_8t d[N_BLOCK], const uint_8t k[N_BLOCK] )
400{
401    xor_block(d, k);
402}
403
404static void shift_sub_rows( uint_8t st[N_BLOCK] )
405{   uint_8t tt;
406
407    st[ 0] = s_box(st[ 0]); st[ 4] = s_box(st[ 4]);
408    st[ 8] = s_box(st[ 8]); st[12] = s_box(st[12]);
409
410    tt = st[1]; st[ 1] = s_box(st[ 5]); st[ 5] = s_box(st[ 9]);
411    st[ 9] = s_box(st[13]); st[13] = s_box( tt );
412
413    tt = st[2]; st[ 2] = s_box(st[10]); st[10] = s_box( tt );
414    tt = st[6]; st[ 6] = s_box(st[14]); st[14] = s_box( tt );
415
416    tt = st[15]; st[15] = s_box(st[11]); st[11] = s_box(st[ 7]);
417    st[ 7] = s_box(st[ 3]); st[ 3] = s_box( tt );
418}
419
420static void inv_shift_sub_rows( uint_8t st[N_BLOCK] )
421{   uint_8t tt;
422
423    st[ 0] = is_box(st[ 0]); st[ 4] = is_box(st[ 4]);
424    st[ 8] = is_box(st[ 8]); st[12] = is_box(st[12]);
425
426    tt = st[13]; st[13] = is_box(st[9]); st[ 9] = is_box(st[5]);
427    st[ 5] = is_box(st[1]); st[ 1] = is_box( tt );
428
429    tt = st[2]; st[ 2] = is_box(st[10]); st[10] = is_box( tt );
430    tt = st[6]; st[ 6] = is_box(st[14]); st[14] = is_box( tt );
431
432    tt = st[3]; st[ 3] = is_box(st[ 7]); st[ 7] = is_box(st[11]);
433    st[11] = is_box(st[15]); st[15] = is_box( tt );
434}
435
436#if defined( VERSION_1 )
437  static void mix_sub_columns( uint_8t dt[N_BLOCK] )
438  { uint_8t st[N_BLOCK];
439    block_copy(st, dt);
440#else
441  static void mix_sub_columns( uint_8t dt[N_BLOCK], uint_8t st[N_BLOCK] )
442  {
443#endif
444    dt[ 0] = gfm2_sb(st[0]) ^ gfm3_sb(st[5]) ^ s_box(st[10]) ^ s_box(st[15]);
445    dt[ 1] = s_box(st[0]) ^ gfm2_sb(st[5]) ^ gfm3_sb(st[10]) ^ s_box(st[15]);
446    dt[ 2] = s_box(st[0]) ^ s_box(st[5]) ^ gfm2_sb(st[10]) ^ gfm3_sb(st[15]);
447    dt[ 3] = gfm3_sb(st[0]) ^ s_box(st[5]) ^ s_box(st[10]) ^ gfm2_sb(st[15]);
448
449    dt[ 4] = gfm2_sb(st[4]) ^ gfm3_sb(st[9]) ^ s_box(st[14]) ^ s_box(st[3]);
450    dt[ 5] = s_box(st[4]) ^ gfm2_sb(st[9]) ^ gfm3_sb(st[14]) ^ s_box(st[3]);
451    dt[ 6] = s_box(st[4]) ^ s_box(st[9]) ^ gfm2_sb(st[14]) ^ gfm3_sb(st[3]);
452    dt[ 7] = gfm3_sb(st[4]) ^ s_box(st[9]) ^ s_box(st[14]) ^ gfm2_sb(st[3]);
453
454    dt[ 8] = gfm2_sb(st[8]) ^ gfm3_sb(st[13]) ^ s_box(st[2]) ^ s_box(st[7]);
455    dt[ 9] = s_box(st[8]) ^ gfm2_sb(st[13]) ^ gfm3_sb(st[2]) ^ s_box(st[7]);
456    dt[10] = s_box(st[8]) ^ s_box(st[13]) ^ gfm2_sb(st[2]) ^ gfm3_sb(st[7]);
457    dt[11] = gfm3_sb(st[8]) ^ s_box(st[13]) ^ s_box(st[2]) ^ gfm2_sb(st[7]);
458
459    dt[12] = gfm2_sb(st[12]) ^ gfm3_sb(st[1]) ^ s_box(st[6]) ^ s_box(st[11]);
460    dt[13] = s_box(st[12]) ^ gfm2_sb(st[1]) ^ gfm3_sb(st[6]) ^ s_box(st[11]);
461    dt[14] = s_box(st[12]) ^ s_box(st[1]) ^ gfm2_sb(st[6]) ^ gfm3_sb(st[11]);
462    dt[15] = gfm3_sb(st[12]) ^ s_box(st[1]) ^ s_box(st[6]) ^ gfm2_sb(st[11]);
463  }
464
465#if defined( VERSION_1 )
466  static void inv_mix_sub_columns( uint_8t dt[N_BLOCK] )
467  { uint_8t st[N_BLOCK];
468    block_copy(st, dt);
469#else
470  static void inv_mix_sub_columns( uint_8t dt[N_BLOCK], uint_8t st[N_BLOCK] )
471  {
472#endif
473    dt[ 0] = is_box(gfm_e(st[ 0]) ^ gfm_b(st[ 1]) ^ gfm_d(st[ 2]) ^ gfm_9(st[ 3]));
474    dt[ 5] = is_box(gfm_9(st[ 0]) ^ gfm_e(st[ 1]) ^ gfm_b(st[ 2]) ^ gfm_d(st[ 3]));
475    dt[10] = is_box(gfm_d(st[ 0]) ^ gfm_9(st[ 1]) ^ gfm_e(st[ 2]) ^ gfm_b(st[ 3]));
476    dt[15] = is_box(gfm_b(st[ 0]) ^ gfm_d(st[ 1]) ^ gfm_9(st[ 2]) ^ gfm_e(st[ 3]));
477
478    dt[ 4] = is_box(gfm_e(st[ 4]) ^ gfm_b(st[ 5]) ^ gfm_d(st[ 6]) ^ gfm_9(st[ 7]));
479    dt[ 9] = is_box(gfm_9(st[ 4]) ^ gfm_e(st[ 5]) ^ gfm_b(st[ 6]) ^ gfm_d(st[ 7]));
480    dt[14] = is_box(gfm_d(st[ 4]) ^ gfm_9(st[ 5]) ^ gfm_e(st[ 6]) ^ gfm_b(st[ 7]));
481    dt[ 3] = is_box(gfm_b(st[ 4]) ^ gfm_d(st[ 5]) ^ gfm_9(st[ 6]) ^ gfm_e(st[ 7]));
482
483    dt[ 8] = is_box(gfm_e(st[ 8]) ^ gfm_b(st[ 9]) ^ gfm_d(st[10]) ^ gfm_9(st[11]));
484    dt[13] = is_box(gfm_9(st[ 8]) ^ gfm_e(st[ 9]) ^ gfm_b(st[10]) ^ gfm_d(st[11]));
485    dt[ 2] = is_box(gfm_d(st[ 8]) ^ gfm_9(st[ 9]) ^ gfm_e(st[10]) ^ gfm_b(st[11]));
486    dt[ 7] = is_box(gfm_b(st[ 8]) ^ gfm_d(st[ 9]) ^ gfm_9(st[10]) ^ gfm_e(st[11]));
487
488    dt[12] = is_box(gfm_e(st[12]) ^ gfm_b(st[13]) ^ gfm_d(st[14]) ^ gfm_9(st[15]));
489    dt[ 1] = is_box(gfm_9(st[12]) ^ gfm_e(st[13]) ^ gfm_b(st[14]) ^ gfm_d(st[15]));
490    dt[ 6] = is_box(gfm_d(st[12]) ^ gfm_9(st[13]) ^ gfm_e(st[14]) ^ gfm_b(st[15]));
491    dt[11] = is_box(gfm_b(st[12]) ^ gfm_d(st[13]) ^ gfm_9(st[14]) ^ gfm_e(st[15]));
492  }
493
494#if defined( AES_ENC_PREKEYED ) || defined( AES_DEC_PREKEYED )
495
496/*  Set the cipher key for the pre-keyed version */
497
498return_type aes_set_key( const unsigned char key[], length_type keylen, aes_context ctx[1] )
499{
500    uint_8t cc, rc, hi;
501
502    switch( keylen )
503    {
504    case 16:
505    case 128:
506        keylen = 16;
507        break;
508    case 24:
509    case 192:
510        keylen = 24;
511        break;
512    case 32:
513    /*    case 256:           length in bits (256 = 8*32) */
514        keylen = 32;
515        break;
516    default:
517        ctx->rnd = 0;
518        return (return_type)-1;
519    }
520    block_copy_nn(ctx->ksch, key, keylen);
521    hi = (keylen + 28) << 2;
522    ctx->rnd = (hi >> 4) - 1;
523    for( cc = keylen, rc = 1; cc < hi; cc += 4 )
524    {   uint_8t tt, t0, t1, t2, t3;
525
526        t0 = ctx->ksch[cc - 4];
527        t1 = ctx->ksch[cc - 3];
528        t2 = ctx->ksch[cc - 2];
529        t3 = ctx->ksch[cc - 1];
530        if( cc % keylen == 0 )
531        {
532            tt = t0;
533            t0 = s_box(t1) ^ rc;
534            t1 = s_box(t2);
535            t2 = s_box(t3);
536            t3 = s_box(tt);
537            rc = f2(rc);
538        }
539        else if( keylen > 24 && cc % keylen == 16 )
540        {
541            t0 = s_box(t0);
542            t1 = s_box(t1);
543            t2 = s_box(t2);
544            t3 = s_box(t3);
545        }
546        tt = cc - keylen;
547        ctx->ksch[cc + 0] = ctx->ksch[tt + 0] ^ t0;
548        ctx->ksch[cc + 1] = ctx->ksch[tt + 1] ^ t1;
549        ctx->ksch[cc + 2] = ctx->ksch[tt + 2] ^ t2;
550        ctx->ksch[cc + 3] = ctx->ksch[tt + 3] ^ t3;
551    }
552    return 0;
553}
554
555#endif
556
557#if defined( AES_ENC_PREKEYED )
558
559/*  Encrypt a single block of 16 bytes */
560
561return_type aes_encrypt( const unsigned char in[N_BLOCK], unsigned char  out[N_BLOCK], const aes_context ctx[1] )
562{
563    if( ctx->rnd )
564    {
565        uint_8t s1[N_BLOCK], r;
566        copy_and_key( s1, in, ctx->ksch );
567
568        for( r = 1 ; r < ctx->rnd ; ++r )
569#if defined( VERSION_1 )
570        {
571            mix_sub_columns( s1 );
572            add_round_key( s1, ctx->ksch + r * N_BLOCK);
573        }
574#else
575        {   uint_8t s2[N_BLOCK];
576            mix_sub_columns( s2, s1 );
577            copy_and_key( s1, s2, ctx->ksch + r * N_BLOCK);
578        }
579#endif
580        shift_sub_rows( s1 );
581        copy_and_key( out, s1, ctx->ksch + r * N_BLOCK );
582    }
583    else
584        return (return_type)-1;
585    return 0;
586}
587
588/* CBC encrypt a number of blocks (input and return an IV) */
589
590return_type aes_cbc_encrypt( const unsigned char *in, unsigned char *out,
591                         int n_block, unsigned char iv[N_BLOCK], const aes_context ctx[1] )
592{
593
594    while(n_block--)
595    {
596        xor_block(iv, in);
597        if(aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
598			return EXIT_FAILURE;
599        memcpy(out, iv, N_BLOCK);
600        in += N_BLOCK;
601        out += N_BLOCK;
602    }
603    return EXIT_SUCCESS;
604}
605
606#endif
607
608#if defined( AES_DEC_PREKEYED )
609
610/*  Decrypt a single block of 16 bytes */
611
612return_type aes_decrypt( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK], const aes_context ctx[1] )
613{
614    if( ctx->rnd )
615    {
616        uint_8t s1[N_BLOCK], r;
617        copy_and_key( s1, in, ctx->ksch + ctx->rnd * N_BLOCK );
618        inv_shift_sub_rows( s1 );
619
620        for( r = ctx->rnd ; --r ; )
621#if defined( VERSION_1 )
622        {
623            add_round_key( s1, ctx->ksch + r * N_BLOCK );
624            inv_mix_sub_columns( s1 );
625        }
626#else
627        {   uint_8t s2[N_BLOCK];
628            copy_and_key( s2, s1, ctx->ksch + r * N_BLOCK );
629            inv_mix_sub_columns( s1, s2 );
630        }
631#endif
632        copy_and_key( out, s1, ctx->ksch );
633    }
634    else
635        return (return_type)-1;
636    return 0;
637}
638
639/* CBC decrypt a number of blocks (input and return an IV) */
640
641return_type aes_cbc_decrypt( const unsigned char *in, unsigned char *out,
642                         int n_block, unsigned char iv[N_BLOCK], const aes_context ctx[1] )
643{
644    while(n_block--)
645    {   uint_8t tmp[N_BLOCK];
646
647        memcpy(tmp, in, N_BLOCK);
648        if(aes_decrypt(in, out, ctx) != EXIT_SUCCESS)
649			return EXIT_FAILURE;
650        xor_block(out, iv);
651        memcpy(iv, tmp, N_BLOCK);
652        in += N_BLOCK;
653        out += N_BLOCK;
654    }
655    return EXIT_SUCCESS;
656}
657
658#endif
659
660#if defined( AES_ENC_128_OTFK )
661
662/*  The 'on the fly' encryption key update for for 128 bit keys */
663
664static void update_encrypt_key_128( uint_8t k[N_BLOCK], uint_8t *rc )
665{   uint_8t cc;
666
667    k[0] ^= s_box(k[13]) ^ *rc;
668    k[1] ^= s_box(k[14]);
669    k[2] ^= s_box(k[15]);
670    k[3] ^= s_box(k[12]);
671    *rc = f2( *rc );
672
673    for(cc = 4; cc < 16; cc += 4 )
674    {
675        k[cc + 0] ^= k[cc - 4];
676        k[cc + 1] ^= k[cc - 3];
677        k[cc + 2] ^= k[cc - 2];
678        k[cc + 3] ^= k[cc - 1];
679    }
680}
681
682/*  Encrypt a single block of 16 bytes with 'on the fly' 128 bit keying */
683
684void aes_encrypt_128( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK],
685                     const unsigned char key[N_BLOCK], unsigned char o_key[N_BLOCK] )
686{   uint_8t s1[N_BLOCK], r, rc = 1;
687
688    if(o_key != key)
689        block_copy( o_key, key );
690    copy_and_key( s1, in, o_key );
691
692    for( r = 1 ; r < 10 ; ++r )
693#if defined( VERSION_1 )
694    {
695        mix_sub_columns( s1 );
696        update_encrypt_key_128( o_key, &rc );
697        add_round_key( s1, o_key );
698    }
699#else
700    {   uint_8t s2[N_BLOCK];
701        mix_sub_columns( s2, s1 );
702        update_encrypt_key_128( o_key, &rc );
703        copy_and_key( s1, s2, o_key );
704    }
705#endif
706
707    shift_sub_rows( s1 );
708    update_encrypt_key_128( o_key, &rc );
709    copy_and_key( out, s1, o_key );
710}
711
712#endif
713
714#if defined( AES_DEC_128_OTFK )
715
716/*  The 'on the fly' decryption key update for for 128 bit keys */
717
718static void update_decrypt_key_128( uint_8t k[N_BLOCK], uint_8t *rc )
719{   uint_8t cc;
720
721    for( cc = 12; cc > 0; cc -= 4 )
722    {
723        k[cc + 0] ^= k[cc - 4];
724        k[cc + 1] ^= k[cc - 3];
725        k[cc + 2] ^= k[cc - 2];
726        k[cc + 3] ^= k[cc - 1];
727    }
728    *rc = d2(*rc);
729    k[0] ^= s_box(k[13]) ^ *rc;
730    k[1] ^= s_box(k[14]);
731    k[2] ^= s_box(k[15]);
732    k[3] ^= s_box(k[12]);
733}
734
735/*  Decrypt a single block of 16 bytes with 'on the fly' 128 bit keying */
736
737void aes_decrypt_128( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK],
738                      const unsigned char key[N_BLOCK], unsigned char o_key[N_BLOCK] )
739{
740    uint_8t s1[N_BLOCK], r, rc = 0x6c;
741    if(o_key != key)
742        block_copy( o_key, key );
743
744    copy_and_key( s1, in, o_key );
745    inv_shift_sub_rows( s1 );
746
747    for( r = 10 ; --r ; )
748#if defined( VERSION_1 )
749    {
750        update_decrypt_key_128( o_key, &rc );
751        add_round_key( s1, o_key );
752        inv_mix_sub_columns( s1 );
753    }
754#else
755    {   uint_8t s2[N_BLOCK];
756        update_decrypt_key_128( o_key, &rc );
757        copy_and_key( s2, s1, o_key );
758        inv_mix_sub_columns( s1, s2 );
759    }
760#endif
761    update_decrypt_key_128( o_key, &rc );
762    copy_and_key( out, s1, o_key );
763}
764
765#endif
766
767#if defined( AES_ENC_256_OTFK )
768
769/*  The 'on the fly' encryption key update for for 256 bit keys */
770
771static void update_encrypt_key_256( uint_8t k[2 * N_BLOCK], uint_8t *rc )
772{   uint_8t cc;
773
774    k[0] ^= s_box(k[29]) ^ *rc;
775    k[1] ^= s_box(k[30]);
776    k[2] ^= s_box(k[31]);
777    k[3] ^= s_box(k[28]);
778    *rc = f2( *rc );
779
780    for(cc = 4; cc < 16; cc += 4)
781    {
782        k[cc + 0] ^= k[cc - 4];
783        k[cc + 1] ^= k[cc - 3];
784        k[cc + 2] ^= k[cc - 2];
785        k[cc + 3] ^= k[cc - 1];
786    }
787
788    k[16] ^= s_box(k[12]);
789    k[17] ^= s_box(k[13]);
790    k[18] ^= s_box(k[14]);
791    k[19] ^= s_box(k[15]);
792
793    for( cc = 20; cc < 32; cc += 4 )
794    {
795        k[cc + 0] ^= k[cc - 4];
796        k[cc + 1] ^= k[cc - 3];
797        k[cc + 2] ^= k[cc - 2];
798        k[cc + 3] ^= k[cc - 1];
799    }
800}
801
802/*  Encrypt a single block of 16 bytes with 'on the fly' 256 bit keying */
803
804void aes_encrypt_256( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK],
805                      const unsigned char key[2 * N_BLOCK], unsigned char o_key[2 * N_BLOCK] )
806{
807    uint_8t s1[N_BLOCK], r, rc = 1;
808    if(o_key != key)
809    {
810        block_copy( o_key, key );
811        block_copy( o_key + 16, key + 16 );
812    }
813    copy_and_key( s1, in, o_key );
814
815    for( r = 1 ; r < 14 ; ++r )
816#if defined( VERSION_1 )
817    {
818        mix_sub_columns(s1);
819        if( r & 1 )
820            add_round_key( s1, o_key + 16 );
821        else
822        {
823            update_encrypt_key_256( o_key, &rc );
824            add_round_key( s1, o_key );
825        }
826    }
827#else
828    {   uint_8t s2[N_BLOCK];
829        mix_sub_columns( s2, s1 );
830        if( r & 1 )
831            copy_and_key( s1, s2, o_key + 16 );
832        else
833        {
834            update_encrypt_key_256( o_key, &rc );
835            copy_and_key( s1, s2, o_key );
836        }
837    }
838#endif
839
840    shift_sub_rows( s1 );
841    update_encrypt_key_256( o_key, &rc );
842    copy_and_key( out, s1, o_key );
843}
844
845#endif
846
847#if defined( AES_DEC_256_OTFK )
848
849/*  The 'on the fly' encryption key update for for 256 bit keys */
850
851static void update_decrypt_key_256( uint_8t k[2 * N_BLOCK], uint_8t *rc )
852{   uint_8t cc;
853
854    for(cc = 28; cc > 16; cc -= 4)
855    {
856        k[cc + 0] ^= k[cc - 4];
857        k[cc + 1] ^= k[cc - 3];
858        k[cc + 2] ^= k[cc - 2];
859        k[cc + 3] ^= k[cc - 1];
860    }
861
862    k[16] ^= s_box(k[12]);
863    k[17] ^= s_box(k[13]);
864    k[18] ^= s_box(k[14]);
865    k[19] ^= s_box(k[15]);
866
867    for(cc = 12; cc > 0; cc -= 4)
868    {
869        k[cc + 0] ^= k[cc - 4];
870        k[cc + 1] ^= k[cc - 3];
871        k[cc + 2] ^= k[cc - 2];
872        k[cc + 3] ^= k[cc - 1];
873    }
874
875    *rc = d2(*rc);
876    k[0] ^= s_box(k[29]) ^ *rc;
877    k[1] ^= s_box(k[30]);
878    k[2] ^= s_box(k[31]);
879    k[3] ^= s_box(k[28]);
880}
881
882/*  Decrypt a single block of 16 bytes with 'on the fly'
883    256 bit keying
884*/
885void aes_decrypt_256( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK],
886                      const unsigned char key[2 * N_BLOCK], unsigned char o_key[2 * N_BLOCK] )
887{
888    uint_8t s1[N_BLOCK], r, rc = 0x80;
889
890    if(o_key != key)
891    {
892        block_copy( o_key, key );
893        block_copy( o_key + 16, key + 16 );
894    }
895
896    copy_and_key( s1, in, o_key );
897    inv_shift_sub_rows( s1 );
898
899    for( r = 14 ; --r ; )
900#if defined( VERSION_1 )
901    {
902        if( ( r & 1 ) )
903        {
904            update_decrypt_key_256( o_key, &rc );
905            add_round_key( s1, o_key + 16 );
906        }
907        else
908            add_round_key( s1, o_key );
909        inv_mix_sub_columns( s1 );
910    }
911#else
912    {   uint_8t s2[N_BLOCK];
913        if( ( r & 1 ) )
914        {
915            update_decrypt_key_256( o_key, &rc );
916            copy_and_key( s2, s1, o_key + 16 );
917        }
918        else
919            copy_and_key( s2, s1, o_key );
920        inv_mix_sub_columns( s1, s2 );
921    }
922#endif
923    copy_and_key( out, s1, o_key );
924}
925
926#endif
927