1package org.bouncycastle.crypto.engines; 2 3import org.bouncycastle.crypto.BlockCipher; 4import org.bouncycastle.crypto.CipherParameters; 5import org.bouncycastle.crypto.DataLengthException; 6import org.bouncycastle.crypto.params.KeyParameter; 7 8/** 9 * an implementation of the AES (Rijndael), from FIPS-197. 10 * <p> 11 * For further details see: <a href="http://csrc.nist.gov/encryption/aes/">http://csrc.nist.gov/encryption/aes/</a>. 12 * 13 * This implementation is based on optimizations from Dr. Brian Gladman's paper and C code at 14 * <a href="http://fp.gladman.plus.com/cryptography_technology/rijndael/">http://fp.gladman.plus.com/cryptography_technology/rijndael/</a> 15 * 16 * There are three levels of tradeoff of speed vs memory 17 * Because java has no preprocessor, they are written as three separate classes from which to choose 18 * 19 * The fastest uses 8Kbytes of static tables to precompute round calculations, 4 256 word tables for encryption 20 * and 4 for decryption. 21 * 22 * The middle performance version uses only one 256 word table for each, for a total of 2Kbytes, 23 * adding 12 rotate operations per round to compute the values contained in the other tables from 24 * the contents of the first 25 * 26 * The slowest version uses no static tables at all and computes the values 27 * in each round. 28 * <p> 29 * This file contains the slowest performance version with no static tables 30 * for round precomputation, but it has the smallest foot print. 31 * 32 */ 33public class AESLightEngine 34 implements BlockCipher 35{ 36 // The S box 37 private static final byte[] S = { 38 (byte)99, (byte)124, (byte)119, (byte)123, (byte)242, (byte)107, (byte)111, (byte)197, 39 (byte)48, (byte)1, (byte)103, (byte)43, (byte)254, (byte)215, (byte)171, (byte)118, 40 (byte)202, (byte)130, (byte)201, (byte)125, (byte)250, (byte)89, (byte)71, (byte)240, 41 (byte)173, (byte)212, (byte)162, (byte)175, (byte)156, (byte)164, (byte)114, (byte)192, 42 (byte)183, (byte)253, (byte)147, (byte)38, (byte)54, (byte)63, (byte)247, (byte)204, 43 (byte)52, (byte)165, (byte)229, (byte)241, (byte)113, (byte)216, (byte)49, (byte)21, 44 (byte)4, (byte)199, (byte)35, (byte)195, (byte)24, (byte)150, (byte)5, (byte)154, 45 (byte)7, (byte)18, (byte)128, (byte)226, (byte)235, (byte)39, (byte)178, (byte)117, 46 (byte)9, (byte)131, (byte)44, (byte)26, (byte)27, (byte)110, (byte)90, (byte)160, 47 (byte)82, (byte)59, (byte)214, (byte)179, (byte)41, (byte)227, (byte)47, (byte)132, 48 (byte)83, (byte)209, (byte)0, (byte)237, (byte)32, (byte)252, (byte)177, (byte)91, 49 (byte)106, (byte)203, (byte)190, (byte)57, (byte)74, (byte)76, (byte)88, (byte)207, 50 (byte)208, (byte)239, (byte)170, (byte)251, (byte)67, (byte)77, (byte)51, (byte)133, 51 (byte)69, (byte)249, (byte)2, (byte)127, (byte)80, (byte)60, (byte)159, (byte)168, 52 (byte)81, (byte)163, (byte)64, (byte)143, (byte)146, (byte)157, (byte)56, (byte)245, 53 (byte)188, (byte)182, (byte)218, (byte)33, (byte)16, (byte)255, (byte)243, (byte)210, 54 (byte)205, (byte)12, (byte)19, (byte)236, (byte)95, (byte)151, (byte)68, (byte)23, 55 (byte)196, (byte)167, (byte)126, (byte)61, (byte)100, (byte)93, (byte)25, (byte)115, 56 (byte)96, (byte)129, (byte)79, (byte)220, (byte)34, (byte)42, (byte)144, (byte)136, 57 (byte)70, (byte)238, (byte)184, (byte)20, (byte)222, (byte)94, (byte)11, (byte)219, 58 (byte)224, (byte)50, (byte)58, (byte)10, (byte)73, (byte)6, (byte)36, (byte)92, 59 (byte)194, (byte)211, (byte)172, (byte)98, (byte)145, (byte)149, (byte)228, (byte)121, 60 (byte)231, (byte)200, (byte)55, (byte)109, (byte)141, (byte)213, (byte)78, (byte)169, 61 (byte)108, (byte)86, (byte)244, (byte)234, (byte)101, (byte)122, (byte)174, (byte)8, 62 (byte)186, (byte)120, (byte)37, (byte)46, (byte)28, (byte)166, (byte)180, (byte)198, 63 (byte)232, (byte)221, (byte)116, (byte)31, (byte)75, (byte)189, (byte)139, (byte)138, 64 (byte)112, (byte)62, (byte)181, (byte)102, (byte)72, (byte)3, (byte)246, (byte)14, 65 (byte)97, (byte)53, (byte)87, (byte)185, (byte)134, (byte)193, (byte)29, (byte)158, 66 (byte)225, (byte)248, (byte)152, (byte)17, (byte)105, (byte)217, (byte)142, (byte)148, 67 (byte)155, (byte)30, (byte)135, (byte)233, (byte)206, (byte)85, (byte)40, (byte)223, 68 (byte)140, (byte)161, (byte)137, (byte)13, (byte)191, (byte)230, (byte)66, (byte)104, 69 (byte)65, (byte)153, (byte)45, (byte)15, (byte)176, (byte)84, (byte)187, (byte)22, 70 }; 71 72 // The inverse S-box 73 private static final byte[] Si = { 74 (byte)82, (byte)9, (byte)106, (byte)213, (byte)48, (byte)54, (byte)165, (byte)56, 75 (byte)191, (byte)64, (byte)163, (byte)158, (byte)129, (byte)243, (byte)215, (byte)251, 76 (byte)124, (byte)227, (byte)57, (byte)130, (byte)155, (byte)47, (byte)255, (byte)135, 77 (byte)52, (byte)142, (byte)67, (byte)68, (byte)196, (byte)222, (byte)233, (byte)203, 78 (byte)84, (byte)123, (byte)148, (byte)50, (byte)166, (byte)194, (byte)35, (byte)61, 79 (byte)238, (byte)76, (byte)149, (byte)11, (byte)66, (byte)250, (byte)195, (byte)78, 80 (byte)8, (byte)46, (byte)161, (byte)102, (byte)40, (byte)217, (byte)36, (byte)178, 81 (byte)118, (byte)91, (byte)162, (byte)73, (byte)109, (byte)139, (byte)209, (byte)37, 82 (byte)114, (byte)248, (byte)246, (byte)100, (byte)134, (byte)104, (byte)152, (byte)22, 83 (byte)212, (byte)164, (byte)92, (byte)204, (byte)93, (byte)101, (byte)182, (byte)146, 84 (byte)108, (byte)112, (byte)72, (byte)80, (byte)253, (byte)237, (byte)185, (byte)218, 85 (byte)94, (byte)21, (byte)70, (byte)87, (byte)167, (byte)141, (byte)157, (byte)132, 86 (byte)144, (byte)216, (byte)171, (byte)0, (byte)140, (byte)188, (byte)211, (byte)10, 87 (byte)247, (byte)228, (byte)88, (byte)5, (byte)184, (byte)179, (byte)69, (byte)6, 88 (byte)208, (byte)44, (byte)30, (byte)143, (byte)202, (byte)63, (byte)15, (byte)2, 89 (byte)193, (byte)175, (byte)189, (byte)3, (byte)1, (byte)19, (byte)138, (byte)107, 90 (byte)58, (byte)145, (byte)17, (byte)65, (byte)79, (byte)103, (byte)220, (byte)234, 91 (byte)151, (byte)242, (byte)207, (byte)206, (byte)240, (byte)180, (byte)230, (byte)115, 92 (byte)150, (byte)172, (byte)116, (byte)34, (byte)231, (byte)173, (byte)53, (byte)133, 93 (byte)226, (byte)249, (byte)55, (byte)232, (byte)28, (byte)117, (byte)223, (byte)110, 94 (byte)71, (byte)241, (byte)26, (byte)113, (byte)29, (byte)41, (byte)197, (byte)137, 95 (byte)111, (byte)183, (byte)98, (byte)14, (byte)170, (byte)24, (byte)190, (byte)27, 96 (byte)252, (byte)86, (byte)62, (byte)75, (byte)198, (byte)210, (byte)121, (byte)32, 97 (byte)154, (byte)219, (byte)192, (byte)254, (byte)120, (byte)205, (byte)90, (byte)244, 98 (byte)31, (byte)221, (byte)168, (byte)51, (byte)136, (byte)7, (byte)199, (byte)49, 99 (byte)177, (byte)18, (byte)16, (byte)89, (byte)39, (byte)128, (byte)236, (byte)95, 100 (byte)96, (byte)81, (byte)127, (byte)169, (byte)25, (byte)181, (byte)74, (byte)13, 101 (byte)45, (byte)229, (byte)122, (byte)159, (byte)147, (byte)201, (byte)156, (byte)239, 102 (byte)160, (byte)224, (byte)59, (byte)77, (byte)174, (byte)42, (byte)245, (byte)176, 103 (byte)200, (byte)235, (byte)187, (byte)60, (byte)131, (byte)83, (byte)153, (byte)97, 104 (byte)23, (byte)43, (byte)4, (byte)126, (byte)186, (byte)119, (byte)214, (byte)38, 105 (byte)225, (byte)105, (byte)20, (byte)99, (byte)85, (byte)33, (byte)12, (byte)125, 106 }; 107 108 // vector used in calculating key schedule (powers of x in GF(256)) 109 private static final int[] rcon = { 110 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 111 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91 }; 112 113 private int shift( 114 int r, 115 int shift) 116 { 117 return (r >>> shift) | (r << -shift); 118 } 119 120 /* multiply four bytes in GF(2^8) by 'x' {02} in parallel */ 121 122 private static final int m1 = 0x80808080; 123 private static final int m2 = 0x7f7f7f7f; 124 private static final int m3 = 0x0000001b; 125 126 private int FFmulX(int x) 127 { 128 return (((x & m2) << 1) ^ (((x & m1) >>> 7) * m3)); 129 } 130 131 /* 132 The following defines provide alternative definitions of FFmulX that might 133 give improved performance if a fast 32-bit multiply is not available. 134 135 private int FFmulX(int x) { int u = x & m1; u |= (u >> 1); return ((x & m2) << 1) ^ ((u >>> 3) | (u >>> 6)); } 136 private static final int m4 = 0x1b1b1b1b; 137 private int FFmulX(int x) { int u = x & m1; return ((x & m2) << 1) ^ ((u - (u >>> 7)) & m4); } 138 139 */ 140 141 private int mcol(int x) 142 { 143 int f2 = FFmulX(x); 144 return f2 ^ shift(x ^ f2, 8) ^ shift(x, 16) ^ shift(x, 24); 145 } 146 147 private int inv_mcol(int x) 148 { 149 int f2 = FFmulX(x); 150 int f4 = FFmulX(f2); 151 int f8 = FFmulX(f4); 152 int f9 = x ^ f8; 153 154 return f2 ^ f4 ^ f8 ^ shift(f2 ^ f9, 8) ^ shift(f4 ^ f9, 16) ^ shift(f9, 24); 155 } 156 157 158 private int subWord(int x) 159 { 160 return (S[x&255]&255 | ((S[(x>>8)&255]&255)<<8) | ((S[(x>>16)&255]&255)<<16) | S[(x>>24)&255]<<24); 161 } 162 163 /** 164 * Calculate the necessary round keys 165 * The number of calculations depends on key size and block size 166 * AES specified a fixed block size of 128 bits and key sizes 128/192/256 bits 167 * This code is written assuming those are the only possible values 168 */ 169 private int[][] generateWorkingKey( 170 byte[] key, 171 boolean forEncryption) 172 { 173 int KC = key.length / 4; // key length in words 174 int t; 175 176 if (((KC != 4) && (KC != 6) && (KC != 8)) || ((KC * 4) != key.length)) 177 { 178 throw new IllegalArgumentException("Key length not 128/192/256 bits."); 179 } 180 181 ROUNDS = KC + 6; // This is not always true for the generalized Rijndael that allows larger block sizes 182 int[][] W = new int[ROUNDS+1][4]; // 4 words in a block 183 184 // 185 // copy the key into the round key array 186 // 187 188 t = 0; 189 int i = 0; 190 while (i < key.length) 191 { 192 W[t >> 2][t & 3] = (key[i]&0xff) | ((key[i+1]&0xff) << 8) | ((key[i+2]&0xff) << 16) | (key[i+3] << 24); 193 i+=4; 194 t++; 195 } 196 197 // 198 // while not enough round key material calculated 199 // calculate new values 200 // 201 int k = (ROUNDS + 1) << 2; 202 for (i = KC; (i < k); i++) 203 { 204 int temp = W[(i-1)>>2][(i-1)&3]; 205 if ((i % KC) == 0) 206 { 207 temp = subWord(shift(temp, 8)) ^ rcon[(i / KC)-1]; 208 } 209 else if ((KC > 6) && ((i % KC) == 4)) 210 { 211 temp = subWord(temp); 212 } 213 214 W[i>>2][i&3] = W[(i - KC)>>2][(i-KC)&3] ^ temp; 215 } 216 217 if (!forEncryption) 218 { 219 for (int j = 1; j < ROUNDS; j++) 220 { 221 for (i = 0; i < 4; i++) 222 { 223 W[j][i] = inv_mcol(W[j][i]); 224 } 225 } 226 } 227 228 return W; 229 } 230 231 private int ROUNDS; 232 private int[][] WorkingKey = null; 233 private int C0, C1, C2, C3; 234 private boolean forEncryption; 235 236 private static final int BLOCK_SIZE = 16; 237 238 /** 239 * default constructor - 128 bit block size. 240 */ 241 public AESLightEngine() 242 { 243 } 244 245 /** 246 * initialise an AES cipher. 247 * 248 * @param forEncryption whether or not we are for encryption. 249 * @param params the parameters required to set up the cipher. 250 * @exception IllegalArgumentException if the params argument is 251 * inappropriate. 252 */ 253 public void init( 254 boolean forEncryption, 255 CipherParameters params) 256 { 257 if (params instanceof KeyParameter) 258 { 259 WorkingKey = generateWorkingKey(((KeyParameter)params).getKey(), forEncryption); 260 this.forEncryption = forEncryption; 261 return; 262 } 263 264 throw new IllegalArgumentException("invalid parameter passed to AES init - " + params.getClass().getName()); 265 } 266 267 public String getAlgorithmName() 268 { 269 return "AES"; 270 } 271 272 public int getBlockSize() 273 { 274 return BLOCK_SIZE; 275 } 276 277 public int processBlock( 278 byte[] in, 279 int inOff, 280 byte[] out, 281 int outOff) 282 { 283 if (WorkingKey == null) 284 { 285 throw new IllegalStateException("AES engine not initialised"); 286 } 287 288 if ((inOff + (32 / 2)) > in.length) 289 { 290 throw new DataLengthException("input buffer too short"); 291 } 292 293 if ((outOff + (32 / 2)) > out.length) 294 { 295 throw new DataLengthException("output buffer too short"); 296 } 297 298 if (forEncryption) 299 { 300 unpackBlock(in, inOff); 301 encryptBlock(WorkingKey); 302 packBlock(out, outOff); 303 } 304 else 305 { 306 unpackBlock(in, inOff); 307 decryptBlock(WorkingKey); 308 packBlock(out, outOff); 309 } 310 311 return BLOCK_SIZE; 312 } 313 314 public void reset() 315 { 316 } 317 318 private final void unpackBlock( 319 byte[] bytes, 320 int off) 321 { 322 int index = off; 323 324 C0 = (bytes[index++] & 0xff); 325 C0 |= (bytes[index++] & 0xff) << 8; 326 C0 |= (bytes[index++] & 0xff) << 16; 327 C0 |= bytes[index++] << 24; 328 329 C1 = (bytes[index++] & 0xff); 330 C1 |= (bytes[index++] & 0xff) << 8; 331 C1 |= (bytes[index++] & 0xff) << 16; 332 C1 |= bytes[index++] << 24; 333 334 C2 = (bytes[index++] & 0xff); 335 C2 |= (bytes[index++] & 0xff) << 8; 336 C2 |= (bytes[index++] & 0xff) << 16; 337 C2 |= bytes[index++] << 24; 338 339 C3 = (bytes[index++] & 0xff); 340 C3 |= (bytes[index++] & 0xff) << 8; 341 C3 |= (bytes[index++] & 0xff) << 16; 342 C3 |= bytes[index++] << 24; 343 } 344 345 private final void packBlock( 346 byte[] bytes, 347 int off) 348 { 349 int index = off; 350 351 bytes[index++] = (byte)C0; 352 bytes[index++] = (byte)(C0 >> 8); 353 bytes[index++] = (byte)(C0 >> 16); 354 bytes[index++] = (byte)(C0 >> 24); 355 356 bytes[index++] = (byte)C1; 357 bytes[index++] = (byte)(C1 >> 8); 358 bytes[index++] = (byte)(C1 >> 16); 359 bytes[index++] = (byte)(C1 >> 24); 360 361 bytes[index++] = (byte)C2; 362 bytes[index++] = (byte)(C2 >> 8); 363 bytes[index++] = (byte)(C2 >> 16); 364 bytes[index++] = (byte)(C2 >> 24); 365 366 bytes[index++] = (byte)C3; 367 bytes[index++] = (byte)(C3 >> 8); 368 bytes[index++] = (byte)(C3 >> 16); 369 bytes[index++] = (byte)(C3 >> 24); 370 } 371 372 private void encryptBlock(int[][] KW) 373 { 374 int r, r0, r1, r2, r3; 375 376 C0 ^= KW[0][0]; 377 C1 ^= KW[0][1]; 378 C2 ^= KW[0][2]; 379 C3 ^= KW[0][3]; 380 381 for (r = 1; r < ROUNDS - 1;) 382 { 383 r0 = mcol((S[C0&255]&255) ^ ((S[(C1>>8)&255]&255)<<8) ^ ((S[(C2>>16)&255]&255)<<16) ^ (S[(C3>>24)&255]<<24)) ^ KW[r][0]; 384 r1 = mcol((S[C1&255]&255) ^ ((S[(C2>>8)&255]&255)<<8) ^ ((S[(C3>>16)&255]&255)<<16) ^ (S[(C0>>24)&255]<<24)) ^ KW[r][1]; 385 r2 = mcol((S[C2&255]&255) ^ ((S[(C3>>8)&255]&255)<<8) ^ ((S[(C0>>16)&255]&255)<<16) ^ (S[(C1>>24)&255]<<24)) ^ KW[r][2]; 386 r3 = mcol((S[C3&255]&255) ^ ((S[(C0>>8)&255]&255)<<8) ^ ((S[(C1>>16)&255]&255)<<16) ^ (S[(C2>>24)&255]<<24)) ^ KW[r++][3]; 387 C0 = mcol((S[r0&255]&255) ^ ((S[(r1>>8)&255]&255)<<8) ^ ((S[(r2>>16)&255]&255)<<16) ^ (S[(r3>>24)&255]<<24)) ^ KW[r][0]; 388 C1 = mcol((S[r1&255]&255) ^ ((S[(r2>>8)&255]&255)<<8) ^ ((S[(r3>>16)&255]&255)<<16) ^ (S[(r0>>24)&255]<<24)) ^ KW[r][1]; 389 C2 = mcol((S[r2&255]&255) ^ ((S[(r3>>8)&255]&255)<<8) ^ ((S[(r0>>16)&255]&255)<<16) ^ (S[(r1>>24)&255]<<24)) ^ KW[r][2]; 390 C3 = mcol((S[r3&255]&255) ^ ((S[(r0>>8)&255]&255)<<8) ^ ((S[(r1>>16)&255]&255)<<16) ^ (S[(r2>>24)&255]<<24)) ^ KW[r++][3]; 391 } 392 393 r0 = mcol((S[C0&255]&255) ^ ((S[(C1>>8)&255]&255)<<8) ^ ((S[(C2>>16)&255]&255)<<16) ^ (S[(C3>>24)&255]<<24)) ^ KW[r][0]; 394 r1 = mcol((S[C1&255]&255) ^ ((S[(C2>>8)&255]&255)<<8) ^ ((S[(C3>>16)&255]&255)<<16) ^ (S[(C0>>24)&255]<<24)) ^ KW[r][1]; 395 r2 = mcol((S[C2&255]&255) ^ ((S[(C3>>8)&255]&255)<<8) ^ ((S[(C0>>16)&255]&255)<<16) ^ (S[(C1>>24)&255]<<24)) ^ KW[r][2]; 396 r3 = mcol((S[C3&255]&255) ^ ((S[(C0>>8)&255]&255)<<8) ^ ((S[(C1>>16)&255]&255)<<16) ^ (S[(C2>>24)&255]<<24)) ^ KW[r++][3]; 397 398 // the final round is a simple function of S 399 400 C0 = (S[r0&255]&255) ^ ((S[(r1>>8)&255]&255)<<8) ^ ((S[(r2>>16)&255]&255)<<16) ^ (S[(r3>>24)&255]<<24) ^ KW[r][0]; 401 C1 = (S[r1&255]&255) ^ ((S[(r2>>8)&255]&255)<<8) ^ ((S[(r3>>16)&255]&255)<<16) ^ (S[(r0>>24)&255]<<24) ^ KW[r][1]; 402 C2 = (S[r2&255]&255) ^ ((S[(r3>>8)&255]&255)<<8) ^ ((S[(r0>>16)&255]&255)<<16) ^ (S[(r1>>24)&255]<<24) ^ KW[r][2]; 403 C3 = (S[r3&255]&255) ^ ((S[(r0>>8)&255]&255)<<8) ^ ((S[(r1>>16)&255]&255)<<16) ^ (S[(r2>>24)&255]<<24) ^ KW[r][3]; 404 405 } 406 407 private final void decryptBlock(int[][] KW) 408 { 409 int r, r0, r1, r2, r3; 410 411 C0 ^= KW[ROUNDS][0]; 412 C1 ^= KW[ROUNDS][1]; 413 C2 ^= KW[ROUNDS][2]; 414 C3 ^= KW[ROUNDS][3]; 415 416 for (r = ROUNDS-1; r>1;) 417 { 418 r0 = inv_mcol((Si[C0&255]&255) ^ ((Si[(C3>>8)&255]&255)<<8) ^ ((Si[(C2>>16)&255]&255)<<16) ^ (Si[(C1>>24)&255]<<24)) ^ KW[r][0]; 419 r1 = inv_mcol((Si[C1&255]&255) ^ ((Si[(C0>>8)&255]&255)<<8) ^ ((Si[(C3>>16)&255]&255)<<16) ^ (Si[(C2>>24)&255]<<24)) ^ KW[r][1]; 420 r2 = inv_mcol((Si[C2&255]&255) ^ ((Si[(C1>>8)&255]&255)<<8) ^ ((Si[(C0>>16)&255]&255)<<16) ^ (Si[(C3>>24)&255]<<24)) ^ KW[r][2]; 421 r3 = inv_mcol((Si[C3&255]&255) ^ ((Si[(C2>>8)&255]&255)<<8) ^ ((Si[(C1>>16)&255]&255)<<16) ^ (Si[(C0>>24)&255]<<24)) ^ KW[r--][3]; 422 C0 = inv_mcol((Si[r0&255]&255) ^ ((Si[(r3>>8)&255]&255)<<8) ^ ((Si[(r2>>16)&255]&255)<<16) ^ (Si[(r1>>24)&255]<<24)) ^ KW[r][0]; 423 C1 = inv_mcol((Si[r1&255]&255) ^ ((Si[(r0>>8)&255]&255)<<8) ^ ((Si[(r3>>16)&255]&255)<<16) ^ (Si[(r2>>24)&255]<<24)) ^ KW[r][1]; 424 C2 = inv_mcol((Si[r2&255]&255) ^ ((Si[(r1>>8)&255]&255)<<8) ^ ((Si[(r0>>16)&255]&255)<<16) ^ (Si[(r3>>24)&255]<<24)) ^ KW[r][2]; 425 C3 = inv_mcol((Si[r3&255]&255) ^ ((Si[(r2>>8)&255]&255)<<8) ^ ((Si[(r1>>16)&255]&255)<<16) ^ (Si[(r0>>24)&255]<<24)) ^ KW[r--][3]; 426 } 427 428 r0 = inv_mcol((Si[C0&255]&255) ^ ((Si[(C3>>8)&255]&255)<<8) ^ ((Si[(C2>>16)&255]&255)<<16) ^ (Si[(C1>>24)&255]<<24)) ^ KW[r][0]; 429 r1 = inv_mcol((Si[C1&255]&255) ^ ((Si[(C0>>8)&255]&255)<<8) ^ ((Si[(C3>>16)&255]&255)<<16) ^ (Si[(C2>>24)&255]<<24)) ^ KW[r][1]; 430 r2 = inv_mcol((Si[C2&255]&255) ^ ((Si[(C1>>8)&255]&255)<<8) ^ ((Si[(C0>>16)&255]&255)<<16) ^ (Si[(C3>>24)&255]<<24)) ^ KW[r][2]; 431 r3 = inv_mcol((Si[C3&255]&255) ^ ((Si[(C2>>8)&255]&255)<<8) ^ ((Si[(C1>>16)&255]&255)<<16) ^ (Si[(C0>>24)&255]<<24)) ^ KW[r--][3]; 432 433 // the final round's table is a simple function of Si 434 435 C0 = (Si[r0&255]&255) ^ ((Si[(r3>>8)&255]&255)<<8) ^ ((Si[(r2>>16)&255]&255)<<16) ^ (Si[(r1>>24)&255]<<24) ^ KW[0][0]; 436 C1 = (Si[r1&255]&255) ^ ((Si[(r0>>8)&255]&255)<<8) ^ ((Si[(r3>>16)&255]&255)<<16) ^ (Si[(r2>>24)&255]<<24) ^ KW[0][1]; 437 C2 = (Si[r2&255]&255) ^ ((Si[(r1>>8)&255]&255)<<8) ^ ((Si[(r0>>16)&255]&255)<<16) ^ (Si[(r3>>24)&255]<<24) ^ KW[0][2]; 438 C3 = (Si[r3&255]&255) ^ ((Si[(r2>>8)&255]&255)<<8) ^ ((Si[(r1>>16)&255]&255)<<16) ^ (Si[(r0>>24)&255]<<24) ^ KW[0][3]; 439 } 440} 441