1/*
2 ---------------------------------------------------------------------------
3 Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
4
5 LICENSE TERMS
6
7 The redistribution and use of this software (with or without changes)
8 is allowed without the payment of fees or royalties provided that:
9
10  1. source code distributions include the above copyright notice, this
11     list of conditions and the following disclaimer;
12
13  2. binary distributions include the above copyright notice, this list
14     of conditions and the following disclaimer in their documentation;
15
16  3. the name of the copyright holder is not used to endorse products
17     built using this software without specific written permission.
18
19 DISCLAIMER
20
21 This software is provided 'as is' with no explicit or implied warranties
22 in respect of its properties, including, but not limited to, correctness
23 and/or fitness for purpose.
24 ---------------------------------------------------------------------------
25 Issue 09/09/2006
26
27 This is an AES implementation that uses only 8-bit byte operations on the
28 cipher state (there are options to use 32-bit types if available).
29
30 The combination of mix columns and byte substitution used here is based on
31 that developed by Karl Malbrain. His contribution is acknowledged.
32 */
33
34/* define if you have a fast memcpy function on your system */
35#if 1
36#define HAVE_MEMCPY
37#include <string.h>
38#if 0
39#if defined(_MSC_VER)
40#include <intrin.h>
41#pragma intrinsic(memcpy)
42#endif
43#endif
44#endif
45
46#include <stdlib.h>
47
48/* add the target configuration to allow using internal data types and
49 * compilation options */
50#include "bt_target.h"
51
52/* define if you have fast 32-bit types on your system */
53#if 1
54#define HAVE_UINT_32T
55#endif
56
57/* define if you don't want any tables */
58#if 1
59#define USE_TABLES
60#endif
61
62/*  On Intel Core 2 duo VERSION_1 is faster */
63
64/* alternative versions (test for performance on your system) */
65#if 1
66#define VERSION_1
67#endif
68
69#include "aes.h"
70
71#if defined(HAVE_UINT_32T)
72typedef uint32_t uint_32t;
73#endif
74
75/* functions for finite field multiplication in the AES Galois field    */
76
77#define WPOLY 0x011b
78#define BPOLY 0x1b
79#define DPOLY 0x008d
80
81#define f1(x) (x)
82#define f2(x) (((x) << 1) ^ ((((x) >> 7) & 1) * WPOLY))
83#define f4(x) \
84  (((x) << 2) ^ ((((x) >> 6) & 1) * WPOLY) ^ ((((x) >> 6) & 2) * WPOLY))
85#define f8(x)                                                             \
86  (((x) << 3) ^ ((((x) >> 5) & 1) * WPOLY) ^ ((((x) >> 5) & 2) * WPOLY) ^ \
87   ((((x) >> 5) & 4) * WPOLY))
88#define d2(x) (((x) >> 1) ^ ((x)&1 ? DPOLY : 0))
89
90#define f3(x) (f2(x) ^ (x))
91#define f9(x) (f8(x) ^ (x))
92#define fb(x) (f8(x) ^ f2(x) ^ (x))
93#define fd(x) (f8(x) ^ f4(x) ^ (x))
94#define fe(x) (f8(x) ^ f4(x) ^ f2(x))
95
96#if defined(USE_TABLES)
97
98#define sb_data(w)                                                          \
99  { /* S Box data values */                                                 \
100    w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5), \
101        w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab),      \
102        w(0x76), w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59),      \
103        w(0x47), w(0xf0), w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c),      \
104        w(0xa4), w(0x72), w(0xc0), w(0xb7), w(0xfd), w(0x93), w(0x26),      \
105        w(0x36), w(0x3f), w(0xf7), w(0xcc), w(0x34), w(0xa5), w(0xe5),      \
106        w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15), w(0x04), w(0xc7),      \
107        w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a), w(0x07),      \
108        w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),      \
109        w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a),      \
110        w(0xa0), w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3),      \
111        w(0x2f), w(0x84), w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20),      \
112        w(0xfc), w(0xb1), w(0x5b), w(0x6a), w(0xcb), w(0xbe), w(0x39),      \
113        w(0x4a), w(0x4c), w(0x58), w(0xcf), w(0xd0), w(0xef), w(0xaa),      \
114        w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85), w(0x45), w(0xf9),      \
115        w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8), w(0x51),      \
116        w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),      \
117        w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3),      \
118        w(0xd2), w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97),      \
119        w(0x44), w(0x17), w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64),      \
120        w(0x5d), w(0x19), w(0x73), w(0x60), w(0x81), w(0x4f), w(0xdc),      \
121        w(0x22), w(0x2a), w(0x90), w(0x88), w(0x46), w(0xee), w(0xb8),      \
122        w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb), w(0xe0), w(0x32),      \
123        w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c), w(0xc2),      \
124        w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),      \
125        w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e),      \
126        w(0xa9), w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a),      \
127        w(0xae), w(0x08), w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c),      \
128        w(0xa6), w(0xb4), w(0xc6), w(0xe8), w(0xdd), w(0x74), w(0x1f),      \
129        w(0x4b), w(0xbd), w(0x8b), w(0x8a), w(0x70), w(0x3e), w(0xb5),      \
130        w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e), w(0x61), w(0x35),      \
131        w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e), w(0xe1),      \
132        w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),      \
133        w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28),      \
134        w(0xdf), w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6),      \
135        w(0x42), w(0x68), w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0),      \
136        w(0x54), w(0xbb), w(0x16)                                           \
137  }
138
139#define isb_data(w)                                                         \
140  { /* inverse S Box data values */                                         \
141    w(0x52), w(0x09), w(0x6a), w(0xd5), w(0x30), w(0x36), w(0xa5), w(0x38), \
142        w(0xbf), w(0x40), w(0xa3), w(0x9e), w(0x81), w(0xf3), w(0xd7),      \
143        w(0xfb), w(0x7c), w(0xe3), w(0x39), w(0x82), w(0x9b), w(0x2f),      \
144        w(0xff), w(0x87), w(0x34), w(0x8e), w(0x43), w(0x44), w(0xc4),      \
145        w(0xde), w(0xe9), w(0xcb), w(0x54), w(0x7b), w(0x94), w(0x32),      \
146        w(0xa6), w(0xc2), w(0x23), w(0x3d), w(0xee), w(0x4c), w(0x95),      \
147        w(0x0b), w(0x42), w(0xfa), w(0xc3), w(0x4e), w(0x08), w(0x2e),      \
148        w(0xa1), w(0x66), w(0x28), w(0xd9), w(0x24), w(0xb2), w(0x76),      \
149        w(0x5b), w(0xa2), w(0x49), w(0x6d), w(0x8b), w(0xd1), w(0x25),      \
150        w(0x72), w(0xf8), w(0xf6), w(0x64), w(0x86), w(0x68), w(0x98),      \
151        w(0x16), w(0xd4), w(0xa4), w(0x5c), w(0xcc), w(0x5d), w(0x65),      \
152        w(0xb6), w(0x92), w(0x6c), w(0x70), w(0x48), w(0x50), w(0xfd),      \
153        w(0xed), w(0xb9), w(0xda), w(0x5e), w(0x15), w(0x46), w(0x57),      \
154        w(0xa7), w(0x8d), w(0x9d), w(0x84), w(0x90), w(0xd8), w(0xab),      \
155        w(0x00), w(0x8c), w(0xbc), w(0xd3), w(0x0a), w(0xf7), w(0xe4),      \
156        w(0x58), w(0x05), w(0xb8), w(0xb3), w(0x45), w(0x06), w(0xd0),      \
157        w(0x2c), w(0x1e), w(0x8f), w(0xca), w(0x3f), w(0x0f), w(0x02),      \
158        w(0xc1), w(0xaf), w(0xbd), w(0x03), w(0x01), w(0x13), w(0x8a),      \
159        w(0x6b), w(0x3a), w(0x91), w(0x11), w(0x41), w(0x4f), w(0x67),      \
160        w(0xdc), w(0xea), w(0x97), w(0xf2), w(0xcf), w(0xce), w(0xf0),      \
161        w(0xb4), w(0xe6), w(0x73), w(0x96), w(0xac), w(0x74), w(0x22),      \
162        w(0xe7), w(0xad), w(0x35), w(0x85), w(0xe2), w(0xf9), w(0x37),      \
163        w(0xe8), w(0x1c), w(0x75), w(0xdf), w(0x6e), w(0x47), w(0xf1),      \
164        w(0x1a), w(0x71), w(0x1d), w(0x29), w(0xc5), w(0x89), w(0x6f),      \
165        w(0xb7), w(0x62), w(0x0e), w(0xaa), w(0x18), w(0xbe), w(0x1b),      \
166        w(0xfc), w(0x56), w(0x3e), w(0x4b), w(0xc6), w(0xd2), w(0x79),      \
167        w(0x20), w(0x9a), w(0xdb), w(0xc0), w(0xfe), w(0x78), w(0xcd),      \
168        w(0x5a), w(0xf4), w(0x1f), w(0xdd), w(0xa8), w(0x33), w(0x88),      \
169        w(0x07), w(0xc7), w(0x31), w(0xb1), w(0x12), w(0x10), w(0x59),      \
170        w(0x27), w(0x80), w(0xec), w(0x5f), w(0x60), w(0x51), w(0x7f),      \
171        w(0xa9), w(0x19), w(0xb5), w(0x4a), w(0x0d), w(0x2d), w(0xe5),      \
172        w(0x7a), w(0x9f), w(0x93), w(0xc9), w(0x9c), w(0xef), w(0xa0),      \
173        w(0xe0), w(0x3b), w(0x4d), w(0xae), w(0x2a), w(0xf5), w(0xb0),      \
174        w(0xc8), w(0xeb), w(0xbb), w(0x3c), w(0x83), w(0x53), w(0x99),      \
175        w(0x61), w(0x17), w(0x2b), w(0x04), w(0x7e), w(0xba), w(0x77),      \
176        w(0xd6), w(0x26), w(0xe1), w(0x69), w(0x14), w(0x63), w(0x55),      \
177        w(0x21), w(0x0c), w(0x7d)                                           \
178  }
179
180#define mm_data(w)                                                          \
181  { /* basic data for forming finite field tables */                        \
182    w(0x00), w(0x01), w(0x02), w(0x03), w(0x04), w(0x05), w(0x06), w(0x07), \
183        w(0x08), w(0x09), w(0x0a), w(0x0b), w(0x0c), w(0x0d), w(0x0e),      \
184        w(0x0f), w(0x10), w(0x11), w(0x12), w(0x13), w(0x14), w(0x15),      \
185        w(0x16), w(0x17), w(0x18), w(0x19), w(0x1a), w(0x1b), w(0x1c),      \
186        w(0x1d), w(0x1e), w(0x1f), w(0x20), w(0x21), w(0x22), w(0x23),      \
187        w(0x24), w(0x25), w(0x26), w(0x27), w(0x28), w(0x29), w(0x2a),      \
188        w(0x2b), w(0x2c), w(0x2d), w(0x2e), w(0x2f), w(0x30), w(0x31),      \
189        w(0x32), w(0x33), w(0x34), w(0x35), w(0x36), w(0x37), w(0x38),      \
190        w(0x39), w(0x3a), w(0x3b), w(0x3c), w(0x3d), w(0x3e), w(0x3f),      \
191        w(0x40), w(0x41), w(0x42), w(0x43), w(0x44), w(0x45), w(0x46),      \
192        w(0x47), w(0x48), w(0x49), w(0x4a), w(0x4b), w(0x4c), w(0x4d),      \
193        w(0x4e), w(0x4f), w(0x50), w(0x51), w(0x52), w(0x53), w(0x54),      \
194        w(0x55), w(0x56), w(0x57), w(0x58), w(0x59), w(0x5a), w(0x5b),      \
195        w(0x5c), w(0x5d), w(0x5e), w(0x5f), w(0x60), w(0x61), w(0x62),      \
196        w(0x63), w(0x64), w(0x65), w(0x66), w(0x67), w(0x68), w(0x69),      \
197        w(0x6a), w(0x6b), w(0x6c), w(0x6d), w(0x6e), w(0x6f), w(0x70),      \
198        w(0x71), w(0x72), w(0x73), w(0x74), w(0x75), w(0x76), w(0x77),      \
199        w(0x78), w(0x79), w(0x7a), w(0x7b), w(0x7c), w(0x7d), w(0x7e),      \
200        w(0x7f), w(0x80), w(0x81), w(0x82), w(0x83), w(0x84), w(0x85),      \
201        w(0x86), w(0x87), w(0x88), w(0x89), w(0x8a), w(0x8b), w(0x8c),      \
202        w(0x8d), w(0x8e), w(0x8f), w(0x90), w(0x91), w(0x92), w(0x93),      \
203        w(0x94), w(0x95), w(0x96), w(0x97), w(0x98), w(0x99), w(0x9a),      \
204        w(0x9b), w(0x9c), w(0x9d), w(0x9e), w(0x9f), w(0xa0), w(0xa1),      \
205        w(0xa2), w(0xa3), w(0xa4), w(0xa5), w(0xa6), w(0xa7), w(0xa8),      \
206        w(0xa9), w(0xaa), w(0xab), w(0xac), w(0xad), w(0xae), w(0xaf),      \
207        w(0xb0), w(0xb1), w(0xb2), w(0xb3), w(0xb4), w(0xb5), w(0xb6),      \
208        w(0xb7), w(0xb8), w(0xb9), w(0xba), w(0xbb), w(0xbc), w(0xbd),      \
209        w(0xbe), w(0xbf), w(0xc0), w(0xc1), w(0xc2), w(0xc3), w(0xc4),      \
210        w(0xc5), w(0xc6), w(0xc7), w(0xc8), w(0xc9), w(0xca), w(0xcb),      \
211        w(0xcc), w(0xcd), w(0xce), w(0xcf), w(0xd0), w(0xd1), w(0xd2),      \
212        w(0xd3), w(0xd4), w(0xd5), w(0xd6), w(0xd7), w(0xd8), w(0xd9),      \
213        w(0xda), w(0xdb), w(0xdc), w(0xdd), w(0xde), w(0xdf), w(0xe0),      \
214        w(0xe1), w(0xe2), w(0xe3), w(0xe4), w(0xe5), w(0xe6), w(0xe7),      \
215        w(0xe8), w(0xe9), w(0xea), w(0xeb), w(0xec), w(0xed), w(0xee),      \
216        w(0xef), w(0xf0), w(0xf1), w(0xf2), w(0xf3), w(0xf4), w(0xf5),      \
217        w(0xf6), w(0xf7), w(0xf8), w(0xf9), w(0xfa), w(0xfb), w(0xfc),      \
218        w(0xfd), w(0xfe), w(0xff)                                           \
219  }
220
221static const uint_8t sbox[256] = sb_data(f1);
222static const uint_8t isbox[256] = isb_data(f1);
223
224static const uint_8t gfm2_sbox[256] = sb_data(f2);
225static const uint_8t gfm3_sbox[256] = sb_data(f3);
226
227static const uint_8t gfmul_9[256] = mm_data(f9);
228static const uint_8t gfmul_b[256] = mm_data(fb);
229static const uint_8t gfmul_d[256] = mm_data(fd);
230static const uint_8t gfmul_e[256] = mm_data(fe);
231
232#define s_box(x) sbox[(x)]
233#define is_box(x) isbox[(x)]
234#define gfm2_sb(x) gfm2_sbox[(x)]
235#define gfm3_sb(x) gfm3_sbox[(x)]
236#define gfm_9(x) gfmul_9[(x)]
237#define gfm_b(x) gfmul_b[(x)]
238#define gfm_d(x) gfmul_d[(x)]
239#define gfm_e(x) gfmul_e[(x)]
240
241#else
242
243/* this is the high bit of x right shifted by 1 */
244/* position. Since the starting polynomial has  */
245/* 9 bits (0x11b), this right shift keeps the   */
246/* values of all top bits within a byte         */
247
248static uint_8t hibit(const uint_8t x) {
249  uint_8t r = (uint_8t)((x >> 1) | (x >> 2));
250
251  r |= (r >> 2);
252  r |= (r >> 4);
253  return (r + 1) >> 1;
254}
255
256/* return the inverse of the finite field element x */
257
258static uint_8t gf_inv(const uint_8t x) {
259  uint_8t p1 = x, p2 = BPOLY, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0;
260
261  if (x < 2) return x;
262
263  for (;;) {
264    if (n1)
265      while (n2 >= n1) /* divide polynomial p2 by p1    */
266      {
267        n2 /= n1;               /* shift smaller polynomial left */
268        p2 ^= (p1 * n2) & 0xff; /* and remove from larger one    */
269        v2 ^= (v1 * n2);        /* shift accumulated value and   */
270        n2 = hibit(p2);         /* add into result               */
271      }
272    else
273      return v1;
274
275    if (n2) /* repeat with values swapped    */
276      while (n1 >= n2) {
277        n1 /= n2;
278        p1 ^= p2 * n1;
279        v1 ^= v2 * n1;
280        n1 = hibit(p1);
281      }
282    else
283      return v2;
284  }
285}
286
287/* The forward and inverse affine transformations used in the S-box */
288uint_8t fwd_affine(const uint_8t x) {
289#if defined(HAVE_UINT_32T)
290  uint_32t w = x;
291  w ^= (w << 1) ^ (w << 2) ^ (w << 3) ^ (w << 4);
292  return 0x63 ^ ((w ^ (w >> 8)) & 0xff);
293#else
294  return 0x63 ^ x ^ (x << 1) ^ (x << 2) ^ (x << 3) ^ (x << 4) ^ (x >> 7) ^
295         (x >> 6) ^ (x >> 5) ^ (x >> 4);
296#endif
297}
298
299uint_8t inv_affine(const uint_8t x) {
300#if defined(HAVE_UINT_32T)
301  uint_32t w = x;
302  w = (w << 1) ^ (w << 3) ^ (w << 6);
303  return 0x05 ^ ((w ^ (w >> 8)) & 0xff);
304#else
305  return 0x05 ^ (x << 1) ^ (x << 3) ^ (x << 6) ^ (x >> 7) ^ (x >> 5) ^ (x >> 2);
306#endif
307}
308
309#define s_box(x) fwd_affine(gf_inv(x))
310#define is_box(x) gf_inv(inv_affine(x))
311#define gfm2_sb(x) f2(s_box(x))
312#define gfm3_sb(x) f3(s_box(x))
313#define gfm_9(x) f9(x)
314#define gfm_b(x) fb(x)
315#define gfm_d(x) fd(x)
316#define gfm_e(x) fe(x)
317
318#endif
319
320#if defined(HAVE_MEMCPY)
321#define block_copy_nn(d, s, l) memcpy(d, s, l)
322#define block_copy(d, s) memcpy(d, s, N_BLOCK)
323#else
324#define block_copy_nn(d, s, l) copy_block_nn(d, s, l)
325#define block_copy(d, s) copy_block(d, s)
326#endif
327
328#if !defined(HAVE_MEMCPY)
329static void copy_block(void* d, const void* s) {
330#if defined(HAVE_UINT_32T)
331  ((uint_32t*)d)[0] = ((uint_32t*)s)[0];
332  ((uint_32t*)d)[1] = ((uint_32t*)s)[1];
333  ((uint_32t*)d)[2] = ((uint_32t*)s)[2];
334  ((uint_32t*)d)[3] = ((uint_32t*)s)[3];
335#else
336  ((uint_8t*)d)[0] = ((uint_8t*)s)[0];
337  ((uint_8t*)d)[1] = ((uint_8t*)s)[1];
338  ((uint_8t*)d)[2] = ((uint_8t*)s)[2];
339  ((uint_8t*)d)[3] = ((uint_8t*)s)[3];
340  ((uint_8t*)d)[4] = ((uint_8t*)s)[4];
341  ((uint_8t*)d)[5] = ((uint_8t*)s)[5];
342  ((uint_8t*)d)[6] = ((uint_8t*)s)[6];
343  ((uint_8t*)d)[7] = ((uint_8t*)s)[7];
344  ((uint_8t*)d)[8] = ((uint_8t*)s)[8];
345  ((uint_8t*)d)[9] = ((uint_8t*)s)[9];
346  ((uint_8t*)d)[10] = ((uint_8t*)s)[10];
347  ((uint_8t*)d)[11] = ((uint_8t*)s)[11];
348  ((uint_8t*)d)[12] = ((uint_8t*)s)[12];
349  ((uint_8t*)d)[13] = ((uint_8t*)s)[13];
350  ((uint_8t*)d)[14] = ((uint_8t*)s)[14];
351  ((uint_8t*)d)[15] = ((uint_8t*)s)[15];
352#endif
353}
354
355static void copy_block_nn(void* d, const void* s, uint_8t nn) {
356  while (nn--) *((uint_8t*)d)++ = *((uint_8t*)s)++;
357}
358#endif
359
360static void xor_block(void* d, const void* s) {
361#if defined(HAVE_UINT_32T)
362  ((uint_32t*)d)[0] ^= ((uint_32t*)s)[0];
363  ((uint_32t*)d)[1] ^= ((uint_32t*)s)[1];
364  ((uint_32t*)d)[2] ^= ((uint_32t*)s)[2];
365  ((uint_32t*)d)[3] ^= ((uint_32t*)s)[3];
366#else
367  ((uint_8t*)d)[0] ^= ((uint_8t*)s)[0];
368  ((uint_8t*)d)[1] ^= ((uint_8t*)s)[1];
369  ((uint_8t*)d)[2] ^= ((uint_8t*)s)[2];
370  ((uint_8t*)d)[3] ^= ((uint_8t*)s)[3];
371  ((uint_8t*)d)[4] ^= ((uint_8t*)s)[4];
372  ((uint_8t*)d)[5] ^= ((uint_8t*)s)[5];
373  ((uint_8t*)d)[6] ^= ((uint_8t*)s)[6];
374  ((uint_8t*)d)[7] ^= ((uint_8t*)s)[7];
375  ((uint_8t*)d)[8] ^= ((uint_8t*)s)[8];
376  ((uint_8t*)d)[9] ^= ((uint_8t*)s)[9];
377  ((uint_8t*)d)[10] ^= ((uint_8t*)s)[10];
378  ((uint_8t*)d)[11] ^= ((uint_8t*)s)[11];
379  ((uint_8t*)d)[12] ^= ((uint_8t*)s)[12];
380  ((uint_8t*)d)[13] ^= ((uint_8t*)s)[13];
381  ((uint_8t*)d)[14] ^= ((uint_8t*)s)[14];
382  ((uint_8t*)d)[15] ^= ((uint_8t*)s)[15];
383#endif
384}
385
386static void copy_and_key(void* d, const void* s, const void* k) {
387#if defined(HAVE_UINT_32T)
388  ((uint_32t*)d)[0] = ((uint_32t*)s)[0] ^ ((uint_32t*)k)[0];
389  ((uint_32t*)d)[1] = ((uint_32t*)s)[1] ^ ((uint_32t*)k)[1];
390  ((uint_32t*)d)[2] = ((uint_32t*)s)[2] ^ ((uint_32t*)k)[2];
391  ((uint_32t*)d)[3] = ((uint_32t*)s)[3] ^ ((uint_32t*)k)[3];
392#elif 1
393  ((uint_8t*)d)[0] = ((uint_8t*)s)[0] ^ ((uint_8t*)k)[0];
394  ((uint_8t*)d)[1] = ((uint_8t*)s)[1] ^ ((uint_8t*)k)[1];
395  ((uint_8t*)d)[2] = ((uint_8t*)s)[2] ^ ((uint_8t*)k)[2];
396  ((uint_8t*)d)[3] = ((uint_8t*)s)[3] ^ ((uint_8t*)k)[3];
397  ((uint_8t*)d)[4] = ((uint_8t*)s)[4] ^ ((uint_8t*)k)[4];
398  ((uint_8t*)d)[5] = ((uint_8t*)s)[5] ^ ((uint_8t*)k)[5];
399  ((uint_8t*)d)[6] = ((uint_8t*)s)[6] ^ ((uint_8t*)k)[6];
400  ((uint_8t*)d)[7] = ((uint_8t*)s)[7] ^ ((uint_8t*)k)[7];
401  ((uint_8t*)d)[8] = ((uint_8t*)s)[8] ^ ((uint_8t*)k)[8];
402  ((uint_8t*)d)[9] = ((uint_8t*)s)[9] ^ ((uint_8t*)k)[9];
403  ((uint_8t*)d)[10] = ((uint_8t*)s)[10] ^ ((uint_8t*)k)[10];
404  ((uint_8t*)d)[11] = ((uint_8t*)s)[11] ^ ((uint_8t*)k)[11];
405  ((uint_8t*)d)[12] = ((uint_8t*)s)[12] ^ ((uint_8t*)k)[12];
406  ((uint_8t*)d)[13] = ((uint_8t*)s)[13] ^ ((uint_8t*)k)[13];
407  ((uint_8t*)d)[14] = ((uint_8t*)s)[14] ^ ((uint_8t*)k)[14];
408  ((uint_8t*)d)[15] = ((uint_8t*)s)[15] ^ ((uint_8t*)k)[15];
409#else
410  block_copy(d, s);
411  xor_block(d, k);
412#endif
413}
414
415static void add_round_key(uint_8t d[N_BLOCK], const uint_8t k[N_BLOCK]) {
416  xor_block(d, k);
417}
418
419static void shift_sub_rows(uint_8t st[N_BLOCK]) {
420  uint_8t tt;
421
422  st[0] = s_box(st[0]);
423  st[4] = s_box(st[4]);
424  st[8] = s_box(st[8]);
425  st[12] = s_box(st[12]);
426
427  tt = st[1];
428  st[1] = s_box(st[5]);
429  st[5] = s_box(st[9]);
430  st[9] = s_box(st[13]);
431  st[13] = s_box(tt);
432
433  tt = st[2];
434  st[2] = s_box(st[10]);
435  st[10] = s_box(tt);
436  tt = st[6];
437  st[6] = s_box(st[14]);
438  st[14] = s_box(tt);
439
440  tt = st[15];
441  st[15] = s_box(st[11]);
442  st[11] = s_box(st[7]);
443  st[7] = s_box(st[3]);
444  st[3] = s_box(tt);
445}
446
447static void inv_shift_sub_rows(uint_8t st[N_BLOCK]) {
448  uint_8t tt;
449
450  st[0] = is_box(st[0]);
451  st[4] = is_box(st[4]);
452  st[8] = is_box(st[8]);
453  st[12] = is_box(st[12]);
454
455  tt = st[13];
456  st[13] = is_box(st[9]);
457  st[9] = is_box(st[5]);
458  st[5] = is_box(st[1]);
459  st[1] = is_box(tt);
460
461  tt = st[2];
462  st[2] = is_box(st[10]);
463  st[10] = is_box(tt);
464  tt = st[6];
465  st[6] = is_box(st[14]);
466  st[14] = is_box(tt);
467
468  tt = st[3];
469  st[3] = is_box(st[7]);
470  st[7] = is_box(st[11]);
471  st[11] = is_box(st[15]);
472  st[15] = is_box(tt);
473}
474
475#if defined(VERSION_1)
476static void mix_sub_columns(uint_8t dt[N_BLOCK]) {
477  uint_8t st[N_BLOCK];
478  block_copy(st, dt);
479#else
480static void mix_sub_columns(uint_8t dt[N_BLOCK], uint_8t st[N_BLOCK]) {
481#endif
482  dt[0] = gfm2_sb(st[0]) ^ gfm3_sb(st[5]) ^ s_box(st[10]) ^ s_box(st[15]);
483  dt[1] = s_box(st[0]) ^ gfm2_sb(st[5]) ^ gfm3_sb(st[10]) ^ s_box(st[15]);
484  dt[2] = s_box(st[0]) ^ s_box(st[5]) ^ gfm2_sb(st[10]) ^ gfm3_sb(st[15]);
485  dt[3] = gfm3_sb(st[0]) ^ s_box(st[5]) ^ s_box(st[10]) ^ gfm2_sb(st[15]);
486
487  dt[4] = gfm2_sb(st[4]) ^ gfm3_sb(st[9]) ^ s_box(st[14]) ^ s_box(st[3]);
488  dt[5] = s_box(st[4]) ^ gfm2_sb(st[9]) ^ gfm3_sb(st[14]) ^ s_box(st[3]);
489  dt[6] = s_box(st[4]) ^ s_box(st[9]) ^ gfm2_sb(st[14]) ^ gfm3_sb(st[3]);
490  dt[7] = gfm3_sb(st[4]) ^ s_box(st[9]) ^ s_box(st[14]) ^ gfm2_sb(st[3]);
491
492  dt[8] = gfm2_sb(st[8]) ^ gfm3_sb(st[13]) ^ s_box(st[2]) ^ s_box(st[7]);
493  dt[9] = s_box(st[8]) ^ gfm2_sb(st[13]) ^ gfm3_sb(st[2]) ^ s_box(st[7]);
494  dt[10] = s_box(st[8]) ^ s_box(st[13]) ^ gfm2_sb(st[2]) ^ gfm3_sb(st[7]);
495  dt[11] = gfm3_sb(st[8]) ^ s_box(st[13]) ^ s_box(st[2]) ^ gfm2_sb(st[7]);
496
497  dt[12] = gfm2_sb(st[12]) ^ gfm3_sb(st[1]) ^ s_box(st[6]) ^ s_box(st[11]);
498  dt[13] = s_box(st[12]) ^ gfm2_sb(st[1]) ^ gfm3_sb(st[6]) ^ s_box(st[11]);
499  dt[14] = s_box(st[12]) ^ s_box(st[1]) ^ gfm2_sb(st[6]) ^ gfm3_sb(st[11]);
500  dt[15] = gfm3_sb(st[12]) ^ s_box(st[1]) ^ s_box(st[6]) ^ gfm2_sb(st[11]);
501}
502
503#if defined(VERSION_1)
504static void inv_mix_sub_columns(uint_8t dt[N_BLOCK]) {
505  uint_8t st[N_BLOCK];
506  block_copy(st, dt);
507#else
508static void inv_mix_sub_columns(uint_8t dt[N_BLOCK], uint_8t st[N_BLOCK]) {
509#endif
510  dt[0] = is_box(gfm_e(st[0]) ^ gfm_b(st[1]) ^ gfm_d(st[2]) ^ gfm_9(st[3]));
511  dt[5] = is_box(gfm_9(st[0]) ^ gfm_e(st[1]) ^ gfm_b(st[2]) ^ gfm_d(st[3]));
512  dt[10] = is_box(gfm_d(st[0]) ^ gfm_9(st[1]) ^ gfm_e(st[2]) ^ gfm_b(st[3]));
513  dt[15] = is_box(gfm_b(st[0]) ^ gfm_d(st[1]) ^ gfm_9(st[2]) ^ gfm_e(st[3]));
514
515  dt[4] = is_box(gfm_e(st[4]) ^ gfm_b(st[5]) ^ gfm_d(st[6]) ^ gfm_9(st[7]));
516  dt[9] = is_box(gfm_9(st[4]) ^ gfm_e(st[5]) ^ gfm_b(st[6]) ^ gfm_d(st[7]));
517  dt[14] = is_box(gfm_d(st[4]) ^ gfm_9(st[5]) ^ gfm_e(st[6]) ^ gfm_b(st[7]));
518  dt[3] = is_box(gfm_b(st[4]) ^ gfm_d(st[5]) ^ gfm_9(st[6]) ^ gfm_e(st[7]));
519
520  dt[8] = is_box(gfm_e(st[8]) ^ gfm_b(st[9]) ^ gfm_d(st[10]) ^ gfm_9(st[11]));
521  dt[13] = is_box(gfm_9(st[8]) ^ gfm_e(st[9]) ^ gfm_b(st[10]) ^ gfm_d(st[11]));
522  dt[2] = is_box(gfm_d(st[8]) ^ gfm_9(st[9]) ^ gfm_e(st[10]) ^ gfm_b(st[11]));
523  dt[7] = is_box(gfm_b(st[8]) ^ gfm_d(st[9]) ^ gfm_9(st[10]) ^ gfm_e(st[11]));
524
525  dt[12] =
526      is_box(gfm_e(st[12]) ^ gfm_b(st[13]) ^ gfm_d(st[14]) ^ gfm_9(st[15]));
527  dt[1] = is_box(gfm_9(st[12]) ^ gfm_e(st[13]) ^ gfm_b(st[14]) ^ gfm_d(st[15]));
528  dt[6] = is_box(gfm_d(st[12]) ^ gfm_9(st[13]) ^ gfm_e(st[14]) ^ gfm_b(st[15]));
529  dt[11] =
530      is_box(gfm_b(st[12]) ^ gfm_d(st[13]) ^ gfm_9(st[14]) ^ gfm_e(st[15]));
531}
532
533#if defined(AES_ENC_PREKEYED) || defined(AES_DEC_PREKEYED)
534
535/*  Set the cipher key for the pre-keyed version */
536/*  NOTE: If the length_type used for the key length is an
537    unsigned 8-bit character, a key length of 256 bits must
538    be entered as a length in bytes (valid inputs are hence
539    128, 192, 16, 24 and 32).
540*/
541
542return_type aes_set_key(const unsigned char key[], length_type keylen,
543                        aes_context ctx[1]) {
544  uint_8t cc, rc, hi;
545
546  switch (keylen) {
547    case 16:
548    case 128: /* length in bits (128 = 8*16) */
549      keylen = 16;
550      break;
551    case 24:
552    case 192: /* length in bits (192 = 8*24) */
553      keylen = 24;
554      break;
555    case 32:
556      /*    case 256:           length in bits (256 = 8*32) */
557      keylen = 32;
558      break;
559    default:
560      ctx->rnd = 0;
561      return (return_type)-1;
562  }
563  block_copy_nn(ctx->ksch, key, keylen);
564  hi = (keylen + 28) << 2;
565  ctx->rnd = (hi >> 4) - 1;
566  for (cc = keylen, rc = 1; cc < hi; cc += 4) {
567    uint_8t tt, t0, t1, t2, t3;
568
569    t0 = ctx->ksch[cc - 4];
570    t1 = ctx->ksch[cc - 3];
571    t2 = ctx->ksch[cc - 2];
572    t3 = ctx->ksch[cc - 1];
573    if (cc % keylen == 0) {
574      tt = t0;
575      t0 = s_box(t1) ^ rc;
576      t1 = s_box(t2);
577      t2 = s_box(t3);
578      t3 = s_box(tt);
579      rc = f2(rc);
580    } else if (keylen > 24 && cc % keylen == 16) {
581      t0 = s_box(t0);
582      t1 = s_box(t1);
583      t2 = s_box(t2);
584      t3 = s_box(t3);
585    }
586    tt = cc - keylen;
587    ctx->ksch[cc + 0] = ctx->ksch[tt + 0] ^ t0;
588    ctx->ksch[cc + 1] = ctx->ksch[tt + 1] ^ t1;
589    ctx->ksch[cc + 2] = ctx->ksch[tt + 2] ^ t2;
590    ctx->ksch[cc + 3] = ctx->ksch[tt + 3] ^ t3;
591  }
592  return 0;
593}
594
595#endif
596
597#if defined(AES_ENC_PREKEYED)
598
599/*  Encrypt a single block of 16 bytes */
600
601return_type aes_encrypt(const unsigned char in[N_BLOCK],
602                        unsigned char out[N_BLOCK], const aes_context ctx[1]) {
603  if (ctx->rnd) {
604    uint_8t s1[N_BLOCK], r;
605    copy_and_key(s1, in, ctx->ksch);
606
607    for (r = 1; r < ctx->rnd; ++r)
608#if defined(VERSION_1)
609    {
610      mix_sub_columns(s1);
611      add_round_key(s1, ctx->ksch + r * N_BLOCK);
612    }
613#else
614    {
615      uint_8t s2[N_BLOCK];
616      mix_sub_columns(s2, s1);
617      copy_and_key(s1, s2, ctx->ksch + r * N_BLOCK);
618    }
619#endif
620    shift_sub_rows(s1);
621    copy_and_key(out, s1, ctx->ksch + r * N_BLOCK);
622  } else
623    return (return_type)-1;
624  return 0;
625}
626
627/* CBC encrypt a number of blocks (input and return an IV) */
628
629return_type aes_cbc_encrypt(const unsigned char* in, unsigned char* out,
630                            int n_block, unsigned char iv[N_BLOCK],
631                            const aes_context ctx[1]) {
632  while (n_block--) {
633    xor_block(iv, in);
634    if (aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS) return EXIT_FAILURE;
635    memcpy(out, iv, N_BLOCK);
636    in += N_BLOCK;
637    out += N_BLOCK;
638  }
639  return EXIT_SUCCESS;
640}
641
642#endif
643
644#if defined(AES_DEC_PREKEYED)
645
646/*  Decrypt a single block of 16 bytes */
647
648return_type aes_decrypt(const unsigned char in[N_BLOCK],
649                        unsigned char out[N_BLOCK], const aes_context ctx[1]) {
650  if (ctx->rnd) {
651    uint_8t s1[N_BLOCK], r;
652    copy_and_key(s1, in, ctx->ksch + ctx->rnd * N_BLOCK);
653    inv_shift_sub_rows(s1);
654
655    for (r = ctx->rnd; --r;)
656#if defined(VERSION_1)
657    {
658      add_round_key(s1, ctx->ksch + r * N_BLOCK);
659      inv_mix_sub_columns(s1);
660    }
661#else
662    {
663      uint_8t s2[N_BLOCK];
664      copy_and_key(s2, s1, ctx->ksch + r * N_BLOCK);
665      inv_mix_sub_columns(s1, s2);
666    }
667#endif
668    copy_and_key(out, s1, ctx->ksch);
669  } else
670    return (return_type)-1;
671  return 0;
672}
673
674/* CBC decrypt a number of blocks (input and return an IV) */
675
676return_type aes_cbc_decrypt(const unsigned char* in, unsigned char* out,
677                            int n_block, unsigned char iv[N_BLOCK],
678                            const aes_context ctx[1]) {
679  while (n_block--) {
680    uint_8t tmp[N_BLOCK];
681
682    memcpy(tmp, in, N_BLOCK);
683    if (aes_decrypt(in, out, ctx) != EXIT_SUCCESS) return EXIT_FAILURE;
684    xor_block(out, iv);
685    memcpy(iv, tmp, N_BLOCK);
686    in += N_BLOCK;
687    out += N_BLOCK;
688  }
689  return EXIT_SUCCESS;
690}
691
692#endif
693
694#if defined(AES_ENC_128_OTFK)
695
696/*  The 'on the fly' encryption key update for for 128 bit keys */
697
698static void update_encrypt_key_128(uint_8t k[N_BLOCK], uint_8t* rc) {
699  uint_8t cc;
700
701  k[0] ^= s_box(k[13]) ^ *rc;
702  k[1] ^= s_box(k[14]);
703  k[2] ^= s_box(k[15]);
704  k[3] ^= s_box(k[12]);
705  *rc = f2(*rc);
706
707  for (cc = 4; cc < 16; cc += 4) {
708    k[cc + 0] ^= k[cc - 4];
709    k[cc + 1] ^= k[cc - 3];
710    k[cc + 2] ^= k[cc - 2];
711    k[cc + 3] ^= k[cc - 1];
712  }
713}
714
715/*  Encrypt a single block of 16 bytes with 'on the fly' 128 bit keying */
716
717void aes_encrypt_128(const unsigned char in[N_BLOCK],
718                     unsigned char out[N_BLOCK],
719                     const unsigned char key[N_BLOCK],
720                     unsigned char o_key[N_BLOCK]) {
721  uint_8t s1[N_BLOCK], r, rc = 1;
722
723  if (o_key != key) block_copy(o_key, key);
724  copy_and_key(s1, in, o_key);
725
726  for (r = 1; r < 10; ++r)
727#if defined(VERSION_1)
728  {
729    mix_sub_columns(s1);
730    update_encrypt_key_128(o_key, &rc);
731    add_round_key(s1, o_key);
732  }
733#else
734  {
735    uint_8t s2[N_BLOCK];
736    mix_sub_columns(s2, s1);
737    update_encrypt_key_128(o_key, &rc);
738    copy_and_key(s1, s2, o_key);
739  }
740#endif
741
742  shift_sub_rows(s1);
743  update_encrypt_key_128(o_key, &rc);
744  copy_and_key(out, s1, o_key);
745}
746
747#endif
748
749#if defined(AES_DEC_128_OTFK)
750
751/*  The 'on the fly' decryption key update for for 128 bit keys */
752
753static void update_decrypt_key_128(uint_8t k[N_BLOCK], uint_8t* rc) {
754  uint_8t cc;
755
756  for (cc = 12; cc > 0; cc -= 4) {
757    k[cc + 0] ^= k[cc - 4];
758    k[cc + 1] ^= k[cc - 3];
759    k[cc + 2] ^= k[cc - 2];
760    k[cc + 3] ^= k[cc - 1];
761  }
762  *rc = d2(*rc);
763  k[0] ^= s_box(k[13]) ^ *rc;
764  k[1] ^= s_box(k[14]);
765  k[2] ^= s_box(k[15]);
766  k[3] ^= s_box(k[12]);
767}
768
769/*  Decrypt a single block of 16 bytes with 'on the fly' 128 bit keying */
770
771void aes_decrypt_128(const unsigned char in[N_BLOCK],
772                     unsigned char out[N_BLOCK],
773                     const unsigned char key[N_BLOCK],
774                     unsigned char o_key[N_BLOCK]) {
775  uint_8t s1[N_BLOCK], r, rc = 0x6c;
776  if (o_key != key) block_copy(o_key, key);
777
778  copy_and_key(s1, in, o_key);
779  inv_shift_sub_rows(s1);
780
781  for (r = 10; --r;)
782#if defined(VERSION_1)
783  {
784    update_decrypt_key_128(o_key, &rc);
785    add_round_key(s1, o_key);
786    inv_mix_sub_columns(s1);
787  }
788#else
789  {
790    uint_8t s2[N_BLOCK];
791    update_decrypt_key_128(o_key, &rc);
792    copy_and_key(s2, s1, o_key);
793    inv_mix_sub_columns(s1, s2);
794  }
795#endif
796  update_decrypt_key_128(o_key, &rc);
797  copy_and_key(out, s1, o_key);
798}
799
800#endif
801
802#if defined(AES_ENC_256_OTFK)
803
804/*  The 'on the fly' encryption key update for for 256 bit keys */
805
806static void update_encrypt_key_256(uint_8t k[2 * N_BLOCK], uint_8t* rc) {
807  uint_8t cc;
808
809  k[0] ^= s_box(k[29]) ^ *rc;
810  k[1] ^= s_box(k[30]);
811  k[2] ^= s_box(k[31]);
812  k[3] ^= s_box(k[28]);
813  *rc = f2(*rc);
814
815  for (cc = 4; cc < 16; cc += 4) {
816    k[cc + 0] ^= k[cc - 4];
817    k[cc + 1] ^= k[cc - 3];
818    k[cc + 2] ^= k[cc - 2];
819    k[cc + 3] ^= k[cc - 1];
820  }
821
822  k[16] ^= s_box(k[12]);
823  k[17] ^= s_box(k[13]);
824  k[18] ^= s_box(k[14]);
825  k[19] ^= s_box(k[15]);
826
827  for (cc = 20; cc < 32; cc += 4) {
828    k[cc + 0] ^= k[cc - 4];
829    k[cc + 1] ^= k[cc - 3];
830    k[cc + 2] ^= k[cc - 2];
831    k[cc + 3] ^= k[cc - 1];
832  }
833}
834
835/*  Encrypt a single block of 16 bytes with 'on the fly' 256 bit keying */
836
837void aes_encrypt_256(const unsigned char in[N_BLOCK],
838                     unsigned char out[N_BLOCK],
839                     const unsigned char key[2 * N_BLOCK],
840                     unsigned char o_key[2 * N_BLOCK]) {
841  uint_8t s1[N_BLOCK], r, rc = 1;
842  if (o_key != key) {
843    block_copy(o_key, key);
844    block_copy(o_key + 16, key + 16);
845  }
846  copy_and_key(s1, in, o_key);
847
848  for (r = 1; r < 14; ++r)
849#if defined(VERSION_1)
850  {
851    mix_sub_columns(s1);
852    if (r & 1)
853      add_round_key(s1, o_key + 16);
854    else {
855      update_encrypt_key_256(o_key, &rc);
856      add_round_key(s1, o_key);
857    }
858  }
859#else
860  {
861    uint_8t s2[N_BLOCK];
862    mix_sub_columns(s2, s1);
863    if (r & 1)
864      copy_and_key(s1, s2, o_key + 16);
865    else {
866      update_encrypt_key_256(o_key, &rc);
867      copy_and_key(s1, s2, o_key);
868    }
869  }
870#endif
871
872  shift_sub_rows(s1);
873  update_encrypt_key_256(o_key, &rc);
874  copy_and_key(out, s1, o_key);
875}
876
877#endif
878
879#if defined(AES_DEC_256_OTFK)
880
881/*  The 'on the fly' encryption key update for for 256 bit keys */
882
883static void update_decrypt_key_256(uint_8t k[2 * N_BLOCK], uint_8t* rc) {
884  uint_8t cc;
885
886  for (cc = 28; cc > 16; cc -= 4) {
887    k[cc + 0] ^= k[cc - 4];
888    k[cc + 1] ^= k[cc - 3];
889    k[cc + 2] ^= k[cc - 2];
890    k[cc + 3] ^= k[cc - 1];
891  }
892
893  k[16] ^= s_box(k[12]);
894  k[17] ^= s_box(k[13]);
895  k[18] ^= s_box(k[14]);
896  k[19] ^= s_box(k[15]);
897
898  for (cc = 12; cc > 0; cc -= 4) {
899    k[cc + 0] ^= k[cc - 4];
900    k[cc + 1] ^= k[cc - 3];
901    k[cc + 2] ^= k[cc - 2];
902    k[cc + 3] ^= k[cc - 1];
903  }
904
905  *rc = d2(*rc);
906  k[0] ^= s_box(k[29]) ^ *rc;
907  k[1] ^= s_box(k[30]);
908  k[2] ^= s_box(k[31]);
909  k[3] ^= s_box(k[28]);
910}
911
912/*  Decrypt a single block of 16 bytes with 'on the fly'
913    256 bit keying
914*/
915void aes_decrypt_256(const unsigned char in[N_BLOCK],
916                     unsigned char out[N_BLOCK],
917                     const unsigned char key[2 * N_BLOCK],
918                     unsigned char o_key[2 * N_BLOCK]) {
919  uint_8t s1[N_BLOCK], r, rc = 0x80;
920
921  if (o_key != key) {
922    block_copy(o_key, key);
923    block_copy(o_key + 16, key + 16);
924  }
925
926  copy_and_key(s1, in, o_key);
927  inv_shift_sub_rows(s1);
928
929  for (r = 14; --r;)
930#if defined(VERSION_1)
931  {
932    if ((r & 1)) {
933      update_decrypt_key_256(o_key, &rc);
934      add_round_key(s1, o_key + 16);
935    } else
936      add_round_key(s1, o_key);
937    inv_mix_sub_columns(s1);
938  }
939#else
940  {
941    uint_8t s2[N_BLOCK];
942    if ((r & 1)) {
943      update_decrypt_key_256(o_key, &rc);
944      copy_and_key(s2, s1, o_key + 16);
945    } else
946      copy_and_key(s2, s1, o_key);
947    inv_mix_sub_columns(s1, s2);
948  }
949#endif
950  copy_and_key(out, s1, o_key);
951}
952
953#endif
954