1/* ==================================================================== 2 * Copyright (c) 2008 The OpenSSL Project. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in 13 * the documentation and/or other materials provided with the 14 * distribution. 15 * 16 * 3. All advertising materials mentioning features or use of this 17 * software must display the following acknowledgment: 18 * "This product includes software developed by the OpenSSL Project 19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" 20 * 21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 22 * endorse or promote products derived from this software without 23 * prior written permission. For written permission, please contact 24 * openssl-core@openssl.org. 25 * 26 * 5. Products derived from this software may not be called "OpenSSL" 27 * nor may "OpenSSL" appear in their names without prior written 28 * permission of the OpenSSL Project. 29 * 30 * 6. Redistributions of any form whatsoever must retain the following 31 * acknowledgment: 32 * "This product includes software developed by the OpenSSL Project 33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)" 34 * 35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 46 * OF THE POSSIBILITY OF SUCH DAMAGE. 47 * ==================================================================== */ 48 49#include <openssl/base.h> 50 51#include <assert.h> 52#include <string.h> 53 54#include <openssl/mem.h> 55#include <openssl/cpu.h> 56 57#include "internal.h" 58#include "../../internal.h" 59 60#if !defined(OPENSSL_NO_ASM) && \ 61 (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \ 62 defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64) || \ 63 defined(OPENSSL_PPC64LE)) 64#define GHASH_ASM 65#endif 66 67#define PACK(s) ((size_t)(s) << (sizeof(size_t) * 8 - 16)) 68#define REDUCE1BIT(V) \ 69 do { \ 70 if (sizeof(size_t) == 8) { \ 71 uint64_t T = UINT64_C(0xe100000000000000) & (0 - ((V).lo & 1)); \ 72 (V).lo = ((V).hi << 63) | ((V).lo >> 1); \ 73 (V).hi = ((V).hi >> 1) ^ T; \ 74 } else { \ 75 uint32_t T = 0xe1000000U & (0 - (uint32_t)((V).lo & 1)); \ 76 (V).lo = ((V).hi << 63) | ((V).lo >> 1); \ 77 (V).hi = ((V).hi >> 1) ^ ((uint64_t)T << 32); \ 78 } \ 79 } while (0) 80 81// kSizeTWithoutLower4Bits is a mask that can be used to zero the lower four 82// bits of a |size_t|. 83static const size_t kSizeTWithoutLower4Bits = (size_t) -16; 84 85static void gcm_init_4bit(u128 Htable[16], uint64_t H[2]) { 86 u128 V; 87 88 Htable[0].hi = 0; 89 Htable[0].lo = 0; 90 V.hi = H[0]; 91 V.lo = H[1]; 92 93 Htable[8] = V; 94 REDUCE1BIT(V); 95 Htable[4] = V; 96 REDUCE1BIT(V); 97 Htable[2] = V; 98 REDUCE1BIT(V); 99 Htable[1] = V; 100 Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo; 101 V = Htable[4]; 102 Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo; 103 Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo; 104 Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo; 105 V = Htable[8]; 106 Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo; 107 Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo; 108 Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo; 109 Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo; 110 Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo; 111 Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo; 112 Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo; 113 114#if defined(GHASH_ASM) && defined(OPENSSL_ARM) 115 for (int j = 0; j < 16; ++j) { 116 V = Htable[j]; 117 Htable[j].hi = V.lo; 118 Htable[j].lo = V.hi; 119 } 120#endif 121} 122 123#if !defined(GHASH_ASM) || defined(OPENSSL_AARCH64) || defined(OPENSSL_PPC64LE) 124static const size_t rem_4bit[16] = { 125 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460), 126 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0), 127 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560), 128 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)}; 129 130static void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]) { 131 u128 Z; 132 int cnt = 15; 133 size_t rem, nlo, nhi; 134 135 nlo = ((const uint8_t *)Xi)[15]; 136 nhi = nlo >> 4; 137 nlo &= 0xf; 138 139 Z.hi = Htable[nlo].hi; 140 Z.lo = Htable[nlo].lo; 141 142 while (1) { 143 rem = (size_t)Z.lo & 0xf; 144 Z.lo = (Z.hi << 60) | (Z.lo >> 4); 145 Z.hi = (Z.hi >> 4); 146 if (sizeof(size_t) == 8) { 147 Z.hi ^= rem_4bit[rem]; 148 } else { 149 Z.hi ^= (uint64_t)rem_4bit[rem] << 32; 150 } 151 152 Z.hi ^= Htable[nhi].hi; 153 Z.lo ^= Htable[nhi].lo; 154 155 if (--cnt < 0) { 156 break; 157 } 158 159 nlo = ((const uint8_t *)Xi)[cnt]; 160 nhi = nlo >> 4; 161 nlo &= 0xf; 162 163 rem = (size_t)Z.lo & 0xf; 164 Z.lo = (Z.hi << 60) | (Z.lo >> 4); 165 Z.hi = (Z.hi >> 4); 166 if (sizeof(size_t) == 8) { 167 Z.hi ^= rem_4bit[rem]; 168 } else { 169 Z.hi ^= (uint64_t)rem_4bit[rem] << 32; 170 } 171 172 Z.hi ^= Htable[nlo].hi; 173 Z.lo ^= Htable[nlo].lo; 174 } 175 176 Xi[0] = CRYPTO_bswap8(Z.hi); 177 Xi[1] = CRYPTO_bswap8(Z.lo); 178} 179 180// Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for 181// details... Compiler-generated code doesn't seem to give any 182// performance improvement, at least not on x86[_64]. It's here 183// mostly as reference and a placeholder for possible future 184// non-trivial optimization[s]... 185static void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], 186 const uint8_t *inp, size_t len) { 187 u128 Z; 188 int cnt; 189 size_t rem, nlo, nhi; 190 191 do { 192 cnt = 15; 193 nlo = ((const uint8_t *)Xi)[15]; 194 nlo ^= inp[15]; 195 nhi = nlo >> 4; 196 nlo &= 0xf; 197 198 Z.hi = Htable[nlo].hi; 199 Z.lo = Htable[nlo].lo; 200 201 while (1) { 202 rem = (size_t)Z.lo & 0xf; 203 Z.lo = (Z.hi << 60) | (Z.lo >> 4); 204 Z.hi = (Z.hi >> 4); 205 if (sizeof(size_t) == 8) { 206 Z.hi ^= rem_4bit[rem]; 207 } else { 208 Z.hi ^= (uint64_t)rem_4bit[rem] << 32; 209 } 210 211 Z.hi ^= Htable[nhi].hi; 212 Z.lo ^= Htable[nhi].lo; 213 214 if (--cnt < 0) { 215 break; 216 } 217 218 nlo = ((const uint8_t *)Xi)[cnt]; 219 nlo ^= inp[cnt]; 220 nhi = nlo >> 4; 221 nlo &= 0xf; 222 223 rem = (size_t)Z.lo & 0xf; 224 Z.lo = (Z.hi << 60) | (Z.lo >> 4); 225 Z.hi = (Z.hi >> 4); 226 if (sizeof(size_t) == 8) { 227 Z.hi ^= rem_4bit[rem]; 228 } else { 229 Z.hi ^= (uint64_t)rem_4bit[rem] << 32; 230 } 231 232 Z.hi ^= Htable[nlo].hi; 233 Z.lo ^= Htable[nlo].lo; 234 } 235 236 Xi[0] = CRYPTO_bswap8(Z.hi); 237 Xi[1] = CRYPTO_bswap8(Z.lo); 238 } while (inp += 16, len -= 16); 239} 240#else // GHASH_ASM 241void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]); 242void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 243 size_t len); 244#endif 245 246#define GCM_MUL(ctx, Xi) gcm_gmult_4bit((ctx)->Xi.u, (ctx)->Htable) 247#if defined(GHASH_ASM) 248#define GHASH(ctx, in, len) gcm_ghash_4bit((ctx)->Xi.u, (ctx)->Htable, in, len) 249// GHASH_CHUNK is "stride parameter" missioned to mitigate cache 250// trashing effect. In other words idea is to hash data while it's 251// still in L1 cache after encryption pass... 252#define GHASH_CHUNK (3 * 1024) 253#endif 254 255 256#if defined(GHASH_ASM) 257 258#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64) 259#define GCM_FUNCREF_4BIT 260void gcm_init_clmul(u128 Htable[16], const uint64_t Xi[2]); 261void gcm_gmult_clmul(uint64_t Xi[2], const u128 Htable[16]); 262void gcm_ghash_clmul(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 263 size_t len); 264 265#if defined(OPENSSL_X86_64) 266#define GHASH_ASM_X86_64 267void gcm_init_avx(u128 Htable[16], const uint64_t Xi[2]); 268void gcm_gmult_avx(uint64_t Xi[2], const u128 Htable[16]); 269void gcm_ghash_avx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *in, 270 size_t len); 271#define AESNI_GCM 272size_t aesni_gcm_encrypt(const uint8_t *in, uint8_t *out, size_t len, 273 const void *key, uint8_t ivec[16], uint64_t *Xi); 274size_t aesni_gcm_decrypt(const uint8_t *in, uint8_t *out, size_t len, 275 const void *key, uint8_t ivec[16], uint64_t *Xi); 276#endif 277 278#if defined(OPENSSL_X86) 279#define GHASH_ASM_X86 280void gcm_gmult_4bit_mmx(uint64_t Xi[2], const u128 Htable[16]); 281void gcm_ghash_4bit_mmx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 282 size_t len); 283#endif 284 285#elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64) 286#include <openssl/arm_arch.h> 287#if __ARM_ARCH__ >= 7 288#define GHASH_ASM_ARM 289#define GCM_FUNCREF_4BIT 290 291static int pmull_capable(void) { 292 return CRYPTO_is_ARMv8_PMULL_capable(); 293} 294 295void gcm_init_v8(u128 Htable[16], const uint64_t Xi[2]); 296void gcm_gmult_v8(uint64_t Xi[2], const u128 Htable[16]); 297void gcm_ghash_v8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 298 size_t len); 299 300#if defined(OPENSSL_ARM) 301// 32-bit ARM also has support for doing GCM with NEON instructions. 302static int neon_capable(void) { 303 return CRYPTO_is_NEON_capable(); 304} 305 306void gcm_init_neon(u128 Htable[16], const uint64_t Xi[2]); 307void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]); 308void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 309 size_t len); 310#else 311// AArch64 only has the ARMv8 versions of functions. 312static int neon_capable(void) { 313 return 0; 314} 315static void gcm_init_neon(u128 Htable[16], const uint64_t Xi[2]) { 316 abort(); 317} 318static void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]) { 319 abort(); 320} 321static void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16], 322 const uint8_t *inp, size_t len) { 323 abort(); 324} 325#endif 326 327#endif 328#elif defined(OPENSSL_PPC64LE) 329#define GHASH_ASM_PPC64LE 330#define GCM_FUNCREF_4BIT 331void gcm_init_p8(u128 Htable[16], const uint64_t Xi[2]); 332void gcm_gmult_p8(uint64_t Xi[2], const u128 Htable[16]); 333void gcm_ghash_p8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 334 size_t len); 335#endif 336#endif 337 338#ifdef GCM_FUNCREF_4BIT 339#undef GCM_MUL 340#define GCM_MUL(ctx, Xi) (*gcm_gmult_p)((ctx)->Xi.u, (ctx)->Htable) 341#ifdef GHASH 342#undef GHASH 343#define GHASH(ctx, in, len) (*gcm_ghash_p)((ctx)->Xi.u, (ctx)->Htable, in, len) 344#endif 345#endif 346 347void CRYPTO_ghash_init(gmult_func *out_mult, ghash_func *out_hash, 348 u128 *out_key, u128 out_table[16], 349 int *out_is_avx, 350 const uint8_t *gcm_key) { 351 *out_is_avx = 0; 352 353 union { 354 uint64_t u[2]; 355 uint8_t c[16]; 356 } H; 357 358 OPENSSL_memcpy(H.c, gcm_key, 16); 359 360 // H is stored in host byte order 361 H.u[0] = CRYPTO_bswap8(H.u[0]); 362 H.u[1] = CRYPTO_bswap8(H.u[1]); 363 364 OPENSSL_memcpy(out_key, H.c, 16); 365 366#if defined(GHASH_ASM_X86_64) 367 if (crypto_gcm_clmul_enabled()) { 368 if (((OPENSSL_ia32cap_get()[1] >> 22) & 0x41) == 0x41) { // AVX+MOVBE 369 gcm_init_avx(out_table, H.u); 370 *out_mult = gcm_gmult_avx; 371 *out_hash = gcm_ghash_avx; 372 *out_is_avx = 1; 373 return; 374 } 375 gcm_init_clmul(out_table, H.u); 376 *out_mult = gcm_gmult_clmul; 377 *out_hash = gcm_ghash_clmul; 378 return; 379 } 380#elif defined(GHASH_ASM_X86) 381 if (crypto_gcm_clmul_enabled()) { 382 gcm_init_clmul(out_table, H.u); 383 *out_mult = gcm_gmult_clmul; 384 *out_hash = gcm_ghash_clmul; 385 return; 386 } 387#elif defined(GHASH_ASM_ARM) 388 if (pmull_capable()) { 389 gcm_init_v8(out_table, H.u); 390 *out_mult = gcm_gmult_v8; 391 *out_hash = gcm_ghash_v8; 392 return; 393 } 394 395 if (neon_capable()) { 396 gcm_init_neon(out_table, H.u); 397 *out_mult = gcm_gmult_neon; 398 *out_hash = gcm_ghash_neon; 399 return; 400 } 401#elif defined(GHASH_ASM_PPC64LE) 402 if (CRYPTO_is_PPC64LE_vcrypto_capable()) { 403 gcm_init_p8(out_table, H.u); 404 *out_mult = gcm_gmult_p8; 405 *out_hash = gcm_ghash_p8; 406 return; 407 } 408#endif 409 410 gcm_init_4bit(out_table, H.u); 411#if defined(GHASH_ASM_X86) 412 *out_mult = gcm_gmult_4bit_mmx; 413 *out_hash = gcm_ghash_4bit_mmx; 414#else 415 *out_mult = gcm_gmult_4bit; 416 *out_hash = gcm_ghash_4bit; 417#endif 418} 419 420void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, const void *aes_key, 421 block128_f block, int is_aesni_encrypt) { 422 OPENSSL_memset(ctx, 0, sizeof(*ctx)); 423 ctx->block = block; 424 425 uint8_t gcm_key[16]; 426 OPENSSL_memset(gcm_key, 0, sizeof(gcm_key)); 427 (*block)(gcm_key, gcm_key, aes_key); 428 429 int is_avx; 430 CRYPTO_ghash_init(&ctx->gmult, &ctx->ghash, &ctx->H, ctx->Htable, &is_avx, 431 gcm_key); 432 433 ctx->use_aesni_gcm_crypt = (is_avx && is_aesni_encrypt) ? 1 : 0; 434} 435 436void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const void *key, 437 const uint8_t *iv, size_t len) { 438 unsigned int ctr; 439#ifdef GCM_FUNCREF_4BIT 440 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult; 441#endif 442 443 ctx->Yi.u[0] = 0; 444 ctx->Yi.u[1] = 0; 445 ctx->Xi.u[0] = 0; 446 ctx->Xi.u[1] = 0; 447 ctx->len.u[0] = 0; // AAD length 448 ctx->len.u[1] = 0; // message length 449 ctx->ares = 0; 450 ctx->mres = 0; 451 452 if (len == 12) { 453 OPENSSL_memcpy(ctx->Yi.c, iv, 12); 454 ctx->Yi.c[15] = 1; 455 ctr = 1; 456 } else { 457 uint64_t len0 = len; 458 459 while (len >= 16) { 460 for (size_t i = 0; i < 16; ++i) { 461 ctx->Yi.c[i] ^= iv[i]; 462 } 463 GCM_MUL(ctx, Yi); 464 iv += 16; 465 len -= 16; 466 } 467 if (len) { 468 for (size_t i = 0; i < len; ++i) { 469 ctx->Yi.c[i] ^= iv[i]; 470 } 471 GCM_MUL(ctx, Yi); 472 } 473 len0 <<= 3; 474 ctx->Yi.u[1] ^= CRYPTO_bswap8(len0); 475 476 GCM_MUL(ctx, Yi); 477 ctr = CRYPTO_bswap4(ctx->Yi.d[3]); 478 } 479 480 (*ctx->block)(ctx->Yi.c, ctx->EK0.c, key); 481 ++ctr; 482 ctx->Yi.d[3] = CRYPTO_bswap4(ctr); 483} 484 485int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const uint8_t *aad, size_t len) { 486 unsigned int n; 487 uint64_t alen = ctx->len.u[0]; 488#ifdef GCM_FUNCREF_4BIT 489 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult; 490#ifdef GHASH 491 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 492 size_t len) = ctx->ghash; 493#endif 494#endif 495 496 if (ctx->len.u[1]) { 497 return 0; 498 } 499 500 alen += len; 501 if (alen > (UINT64_C(1) << 61) || (sizeof(len) == 8 && alen < len)) { 502 return 0; 503 } 504 ctx->len.u[0] = alen; 505 506 n = ctx->ares; 507 if (n) { 508 while (n && len) { 509 ctx->Xi.c[n] ^= *(aad++); 510 --len; 511 n = (n + 1) % 16; 512 } 513 if (n == 0) { 514 GCM_MUL(ctx, Xi); 515 } else { 516 ctx->ares = n; 517 return 1; 518 } 519 } 520 521 // Process a whole number of blocks. 522#ifdef GHASH 523 size_t len_blocks = len & kSizeTWithoutLower4Bits; 524 if (len_blocks != 0) { 525 GHASH(ctx, aad, len_blocks); 526 aad += len_blocks; 527 len -= len_blocks; 528 } 529#else 530 while (len >= 16) { 531 for (size_t i = 0; i < 16; ++i) { 532 ctx->Xi.c[i] ^= aad[i]; 533 } 534 GCM_MUL(ctx, Xi); 535 aad += 16; 536 len -= 16; 537 } 538#endif 539 540 // Process the remainder. 541 if (len != 0) { 542 n = (unsigned int)len; 543 for (size_t i = 0; i < len; ++i) { 544 ctx->Xi.c[i] ^= aad[i]; 545 } 546 } 547 548 ctx->ares = n; 549 return 1; 550} 551 552int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, const void *key, 553 const uint8_t *in, uint8_t *out, size_t len) { 554 unsigned int n, ctr; 555 uint64_t mlen = ctx->len.u[1]; 556 block128_f block = ctx->block; 557#ifdef GCM_FUNCREF_4BIT 558 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult; 559#ifdef GHASH 560 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 561 size_t len) = ctx->ghash; 562#endif 563#endif 564 565 mlen += len; 566 if (mlen > ((UINT64_C(1) << 36) - 32) || 567 (sizeof(len) == 8 && mlen < len)) { 568 return 0; 569 } 570 ctx->len.u[1] = mlen; 571 572 if (ctx->ares) { 573 // First call to encrypt finalizes GHASH(AAD) 574 GCM_MUL(ctx, Xi); 575 ctx->ares = 0; 576 } 577 578 ctr = CRYPTO_bswap4(ctx->Yi.d[3]); 579 580 n = ctx->mres; 581 if (n) { 582 while (n && len) { 583 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n]; 584 --len; 585 n = (n + 1) % 16; 586 } 587 if (n == 0) { 588 GCM_MUL(ctx, Xi); 589 } else { 590 ctx->mres = n; 591 return 1; 592 } 593 } 594 if (STRICT_ALIGNMENT && 595 ((uintptr_t)in | (uintptr_t)out) % sizeof(size_t) != 0) { 596 for (size_t i = 0; i < len; ++i) { 597 if (n == 0) { 598 (*block)(ctx->Yi.c, ctx->EKi.c, key); 599 ++ctr; 600 ctx->Yi.d[3] = CRYPTO_bswap4(ctr); 601 } 602 ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n]; 603 n = (n + 1) % 16; 604 if (n == 0) { 605 GCM_MUL(ctx, Xi); 606 } 607 } 608 609 ctx->mres = n; 610 return 1; 611 } 612#if defined(GHASH) && defined(GHASH_CHUNK) 613 while (len >= GHASH_CHUNK) { 614 size_t j = GHASH_CHUNK; 615 616 while (j) { 617 (*block)(ctx->Yi.c, ctx->EKi.c, key); 618 ++ctr; 619 ctx->Yi.d[3] = CRYPTO_bswap4(ctr); 620 for (size_t i = 0; i < 16; i += sizeof(size_t)) { 621 store_word_le(out + i, 622 load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)]); 623 } 624 out += 16; 625 in += 16; 626 j -= 16; 627 } 628 GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK); 629 len -= GHASH_CHUNK; 630 } 631 size_t len_blocks = len & kSizeTWithoutLower4Bits; 632 if (len_blocks != 0) { 633 while (len >= 16) { 634 (*block)(ctx->Yi.c, ctx->EKi.c, key); 635 ++ctr; 636 ctx->Yi.d[3] = CRYPTO_bswap4(ctr); 637 for (size_t i = 0; i < 16; i += sizeof(size_t)) { 638 store_word_le(out + i, 639 load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)]); 640 } 641 out += 16; 642 in += 16; 643 len -= 16; 644 } 645 GHASH(ctx, out - len_blocks, len_blocks); 646 } 647#else 648 while (len >= 16) { 649 (*block)(ctx->Yi.c, ctx->EKi.c, key); 650 ++ctr; 651 ctx->Yi.d[3] = CRYPTO_bswap4(ctr); 652 for (size_t i = 0; i < 16; i += sizeof(size_t)) { 653 size_t tmp = load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)]; 654 store_word_le(out + i, tmp); 655 ctx->Xi.t[i / sizeof(size_t)] ^= tmp; 656 } 657 GCM_MUL(ctx, Xi); 658 out += 16; 659 in += 16; 660 len -= 16; 661 } 662#endif 663 if (len) { 664 (*block)(ctx->Yi.c, ctx->EKi.c, key); 665 ++ctr; 666 ctx->Yi.d[3] = CRYPTO_bswap4(ctr); 667 while (len--) { 668 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n]; 669 ++n; 670 } 671 } 672 673 ctx->mres = n; 674 return 1; 675} 676 677int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, const void *key, 678 const unsigned char *in, unsigned char *out, 679 size_t len) { 680 unsigned int n, ctr; 681 uint64_t mlen = ctx->len.u[1]; 682 block128_f block = ctx->block; 683#ifdef GCM_FUNCREF_4BIT 684 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult; 685#ifdef GHASH 686 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 687 size_t len) = ctx->ghash; 688#endif 689#endif 690 691 mlen += len; 692 if (mlen > ((UINT64_C(1) << 36) - 32) || 693 (sizeof(len) == 8 && mlen < len)) { 694 return 0; 695 } 696 ctx->len.u[1] = mlen; 697 698 if (ctx->ares) { 699 // First call to decrypt finalizes GHASH(AAD) 700 GCM_MUL(ctx, Xi); 701 ctx->ares = 0; 702 } 703 704 ctr = CRYPTO_bswap4(ctx->Yi.d[3]); 705 706 n = ctx->mres; 707 if (n) { 708 while (n && len) { 709 uint8_t c = *(in++); 710 *(out++) = c ^ ctx->EKi.c[n]; 711 ctx->Xi.c[n] ^= c; 712 --len; 713 n = (n + 1) % 16; 714 } 715 if (n == 0) { 716 GCM_MUL(ctx, Xi); 717 } else { 718 ctx->mres = n; 719 return 1; 720 } 721 } 722 if (STRICT_ALIGNMENT && 723 ((uintptr_t)in | (uintptr_t)out) % sizeof(size_t) != 0) { 724 for (size_t i = 0; i < len; ++i) { 725 uint8_t c; 726 if (n == 0) { 727 (*block)(ctx->Yi.c, ctx->EKi.c, key); 728 ++ctr; 729 ctx->Yi.d[3] = CRYPTO_bswap4(ctr); 730 } 731 c = in[i]; 732 out[i] = c ^ ctx->EKi.c[n]; 733 ctx->Xi.c[n] ^= c; 734 n = (n + 1) % 16; 735 if (n == 0) { 736 GCM_MUL(ctx, Xi); 737 } 738 } 739 740 ctx->mres = n; 741 return 1; 742 } 743#if defined(GHASH) && defined(GHASH_CHUNK) 744 while (len >= GHASH_CHUNK) { 745 size_t j = GHASH_CHUNK; 746 747 GHASH(ctx, in, GHASH_CHUNK); 748 while (j) { 749 (*block)(ctx->Yi.c, ctx->EKi.c, key); 750 ++ctr; 751 ctx->Yi.d[3] = CRYPTO_bswap4(ctr); 752 for (size_t i = 0; i < 16; i += sizeof(size_t)) { 753 store_word_le(out + i, 754 load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)]); 755 } 756 out += 16; 757 in += 16; 758 j -= 16; 759 } 760 len -= GHASH_CHUNK; 761 } 762 size_t len_blocks = len & kSizeTWithoutLower4Bits; 763 if (len_blocks != 0) { 764 GHASH(ctx, in, len_blocks); 765 while (len >= 16) { 766 (*block)(ctx->Yi.c, ctx->EKi.c, key); 767 ++ctr; 768 ctx->Yi.d[3] = CRYPTO_bswap4(ctr); 769 for (size_t i = 0; i < 16; i += sizeof(size_t)) { 770 store_word_le(out + i, 771 load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)]); 772 } 773 out += 16; 774 in += 16; 775 len -= 16; 776 } 777 } 778#else 779 while (len >= 16) { 780 (*block)(ctx->Yi.c, ctx->EKi.c, key); 781 ++ctr; 782 ctx->Yi.d[3] = CRYPTO_bswap4(ctr); 783 for (size_t i = 0; i < 16; i += sizeof(size_t)) { 784 size_t c = load_word_le(in + i); 785 store_word_le(out + i, c ^ ctx->EKi.t[i / sizeof(size_t)]); 786 ctx->Xi.t[i / sizeof(size_t)] ^= c; 787 } 788 GCM_MUL(ctx, Xi); 789 out += 16; 790 in += 16; 791 len -= 16; 792 } 793#endif 794 if (len) { 795 (*block)(ctx->Yi.c, ctx->EKi.c, key); 796 ++ctr; 797 ctx->Yi.d[3] = CRYPTO_bswap4(ctr); 798 while (len--) { 799 uint8_t c = in[n]; 800 ctx->Xi.c[n] ^= c; 801 out[n] = c ^ ctx->EKi.c[n]; 802 ++n; 803 } 804 } 805 806 ctx->mres = n; 807 return 1; 808} 809 810int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, const void *key, 811 const uint8_t *in, uint8_t *out, size_t len, 812 ctr128_f stream) { 813 unsigned int n, ctr; 814 uint64_t mlen = ctx->len.u[1]; 815#ifdef GCM_FUNCREF_4BIT 816 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult; 817#ifdef GHASH 818 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 819 size_t len) = ctx->ghash; 820#endif 821#endif 822 823 mlen += len; 824 if (mlen > ((UINT64_C(1) << 36) - 32) || 825 (sizeof(len) == 8 && mlen < len)) { 826 return 0; 827 } 828 ctx->len.u[1] = mlen; 829 830 if (ctx->ares) { 831 // First call to encrypt finalizes GHASH(AAD) 832 GCM_MUL(ctx, Xi); 833 ctx->ares = 0; 834 } 835 836 n = ctx->mres; 837 if (n) { 838 while (n && len) { 839 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n]; 840 --len; 841 n = (n + 1) % 16; 842 } 843 if (n == 0) { 844 GCM_MUL(ctx, Xi); 845 } else { 846 ctx->mres = n; 847 return 1; 848 } 849 } 850 851#if defined(AESNI_GCM) 852 if (ctx->use_aesni_gcm_crypt) { 853 // |aesni_gcm_encrypt| may not process all the input given to it. It may 854 // not process *any* of its input if it is deemed too small. 855 size_t bulk = aesni_gcm_encrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u); 856 in += bulk; 857 out += bulk; 858 len -= bulk; 859 } 860#endif 861 862 ctr = CRYPTO_bswap4(ctx->Yi.d[3]); 863 864#if defined(GHASH) 865 while (len >= GHASH_CHUNK) { 866 (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c); 867 ctr += GHASH_CHUNK / 16; 868 ctx->Yi.d[3] = CRYPTO_bswap4(ctr); 869 GHASH(ctx, out, GHASH_CHUNK); 870 out += GHASH_CHUNK; 871 in += GHASH_CHUNK; 872 len -= GHASH_CHUNK; 873 } 874#endif 875 size_t i = len & kSizeTWithoutLower4Bits; 876 if (i != 0) { 877 size_t j = i / 16; 878 879 (*stream)(in, out, j, key, ctx->Yi.c); 880 ctr += (unsigned int)j; 881 ctx->Yi.d[3] = CRYPTO_bswap4(ctr); 882 in += i; 883 len -= i; 884#if defined(GHASH) 885 GHASH(ctx, out, i); 886 out += i; 887#else 888 while (j--) { 889 for (i = 0; i < 16; ++i) { 890 ctx->Xi.c[i] ^= out[i]; 891 } 892 GCM_MUL(ctx, Xi); 893 out += 16; 894 } 895#endif 896 } 897 if (len) { 898 (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key); 899 ++ctr; 900 ctx->Yi.d[3] = CRYPTO_bswap4(ctr); 901 while (len--) { 902 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n]; 903 ++n; 904 } 905 } 906 907 ctx->mres = n; 908 return 1; 909} 910 911int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, const void *key, 912 const uint8_t *in, uint8_t *out, size_t len, 913 ctr128_f stream) { 914 unsigned int n, ctr; 915 uint64_t mlen = ctx->len.u[1]; 916#ifdef GCM_FUNCREF_4BIT 917 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult; 918#ifdef GHASH 919 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, 920 size_t len) = ctx->ghash; 921#endif 922#endif 923 924 mlen += len; 925 if (mlen > ((UINT64_C(1) << 36) - 32) || 926 (sizeof(len) == 8 && mlen < len)) { 927 return 0; 928 } 929 ctx->len.u[1] = mlen; 930 931 if (ctx->ares) { 932 // First call to decrypt finalizes GHASH(AAD) 933 GCM_MUL(ctx, Xi); 934 ctx->ares = 0; 935 } 936 937 n = ctx->mres; 938 if (n) { 939 while (n && len) { 940 uint8_t c = *(in++); 941 *(out++) = c ^ ctx->EKi.c[n]; 942 ctx->Xi.c[n] ^= c; 943 --len; 944 n = (n + 1) % 16; 945 } 946 if (n == 0) { 947 GCM_MUL(ctx, Xi); 948 } else { 949 ctx->mres = n; 950 return 1; 951 } 952 } 953 954#if defined(AESNI_GCM) 955 if (ctx->use_aesni_gcm_crypt) { 956 // |aesni_gcm_decrypt| may not process all the input given to it. It may 957 // not process *any* of its input if it is deemed too small. 958 size_t bulk = aesni_gcm_decrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u); 959 in += bulk; 960 out += bulk; 961 len -= bulk; 962 } 963#endif 964 965 ctr = CRYPTO_bswap4(ctx->Yi.d[3]); 966 967#if defined(GHASH) 968 while (len >= GHASH_CHUNK) { 969 GHASH(ctx, in, GHASH_CHUNK); 970 (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c); 971 ctr += GHASH_CHUNK / 16; 972 ctx->Yi.d[3] = CRYPTO_bswap4(ctr); 973 out += GHASH_CHUNK; 974 in += GHASH_CHUNK; 975 len -= GHASH_CHUNK; 976 } 977#endif 978 size_t i = len & kSizeTWithoutLower4Bits; 979 if (i != 0) { 980 size_t j = i / 16; 981 982#if defined(GHASH) 983 GHASH(ctx, in, i); 984#else 985 while (j--) { 986 size_t k; 987 for (k = 0; k < 16; ++k) { 988 ctx->Xi.c[k] ^= in[k]; 989 } 990 GCM_MUL(ctx, Xi); 991 in += 16; 992 } 993 j = i / 16; 994 in -= i; 995#endif 996 (*stream)(in, out, j, key, ctx->Yi.c); 997 ctr += (unsigned int)j; 998 ctx->Yi.d[3] = CRYPTO_bswap4(ctr); 999 out += i; 1000 in += i; 1001 len -= i; 1002 } 1003 if (len) { 1004 (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key); 1005 ++ctr; 1006 ctx->Yi.d[3] = CRYPTO_bswap4(ctr); 1007 while (len--) { 1008 uint8_t c = in[n]; 1009 ctx->Xi.c[n] ^= c; 1010 out[n] = c ^ ctx->EKi.c[n]; 1011 ++n; 1012 } 1013 } 1014 1015 ctx->mres = n; 1016 return 1; 1017} 1018 1019int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const uint8_t *tag, size_t len) { 1020 uint64_t alen = ctx->len.u[0] << 3; 1021 uint64_t clen = ctx->len.u[1] << 3; 1022#ifdef GCM_FUNCREF_4BIT 1023 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult; 1024#endif 1025 1026 if (ctx->mres || ctx->ares) { 1027 GCM_MUL(ctx, Xi); 1028 } 1029 1030 alen = CRYPTO_bswap8(alen); 1031 clen = CRYPTO_bswap8(clen); 1032 1033 ctx->Xi.u[0] ^= alen; 1034 ctx->Xi.u[1] ^= clen; 1035 GCM_MUL(ctx, Xi); 1036 1037 ctx->Xi.u[0] ^= ctx->EK0.u[0]; 1038 ctx->Xi.u[1] ^= ctx->EK0.u[1]; 1039 1040 if (tag && len <= sizeof(ctx->Xi)) { 1041 return CRYPTO_memcmp(ctx->Xi.c, tag, len) == 0; 1042 } else { 1043 return 0; 1044 } 1045} 1046 1047void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) { 1048 CRYPTO_gcm128_finish(ctx, NULL, 0); 1049 OPENSSL_memcpy(tag, ctx->Xi.c, 1050 len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c)); 1051} 1052 1053#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64) 1054int crypto_gcm_clmul_enabled(void) { 1055#ifdef GHASH_ASM 1056 const uint32_t *ia32cap = OPENSSL_ia32cap_get(); 1057 return (ia32cap[0] & (1 << 24)) && // check FXSR bit 1058 (ia32cap[1] & (1 << 1)); // check PCLMULQDQ bit 1059#else 1060 return 0; 1061#endif 1062} 1063#endif 1064