195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley/* ====================================================================
295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * Copyright (c) 2008 The OpenSSL Project.  All rights reserved.
395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley *
495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * Redistribution and use in source and binary forms, with or without
595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * modification, are permitted provided that the following conditions
695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * are met:
795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley *
895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * 1. Redistributions of source code must retain the above copyright
995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley *    notice, this list of conditions and the following disclaimer.
1095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley *
1195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * 2. Redistributions in binary form must reproduce the above copyright
1295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley *    notice, this list of conditions and the following disclaimer in
1395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley *    the documentation and/or other materials provided with the
1495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley *    distribution.
1595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley *
1695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * 3. All advertising materials mentioning features or use of this
1795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley *    software must display the following acknowledgment:
1895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley *    "This product includes software developed by the OpenSSL Project
1995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
2095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley *
2195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
2295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley *    endorse or promote products derived from this software without
2395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley *    prior written permission. For written permission, please contact
2495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley *    openssl-core@openssl.org.
2595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley *
2695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * 5. Products derived from this software may not be called "OpenSSL"
2795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley *    nor may "OpenSSL" appear in their names without prior written
2895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley *    permission of the OpenSSL Project.
2995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley *
3095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * 6. Redistributions of any form whatsoever must retain the following
3195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley *    acknowledgment:
3295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley *    "This product includes software developed by the OpenSSL Project
3395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
3495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley *
3595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
3695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
3795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
3895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
3995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
4095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
4195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
4295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
4395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
4495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
4595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
4695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * OF THE POSSIBILITY OF SUCH DAMAGE.
4795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * ==================================================================== */
4895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
4995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#include <openssl/modes.h>
5095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
5195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#include <assert.h>
5295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
5395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#include <openssl/mem.h>
5495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#include <openssl/cpu.h>
5595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
5695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#include "internal.h"
5798ad22ec7ae21bdf5d5509049504b3632bfdbf93Adam Langley#include "../internal.h"
5895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
5995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
6095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#if !defined(OPENSSL_NO_ASM) && \
6195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || defined(OPENSSL_ARM))
6295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#define GHASH_ASM
6395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
6495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
6595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#if defined(BSWAP4) && STRICT_ALIGNMENT == 1
6695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley/* redefine, because alignment is ensured */
6795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#undef GETU32
6895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#define GETU32(p) BSWAP4(*(const uint32_t *)(p))
6995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#undef PUTU32
7095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#define PUTU32(p, v) *(uint32_t *)(p) = BSWAP4(v)
7195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
7295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
7395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#define PACK(s) ((size_t)(s) << (sizeof(size_t) * 8 - 16))
7498ad22ec7ae21bdf5d5509049504b3632bfdbf93Adam Langley#define REDUCE1BIT(V)                                                  \
7598ad22ec7ae21bdf5d5509049504b3632bfdbf93Adam Langley  do {                                                                 \
7698ad22ec7ae21bdf5d5509049504b3632bfdbf93Adam Langley    if (sizeof(size_t) == 8) {                                         \
7798ad22ec7ae21bdf5d5509049504b3632bfdbf93Adam Langley      uint64_t T = OPENSSL_U64(0xe100000000000000) & (0 - (V.lo & 1)); \
7898ad22ec7ae21bdf5d5509049504b3632bfdbf93Adam Langley      V.lo = (V.hi << 63) | (V.lo >> 1);                               \
7998ad22ec7ae21bdf5d5509049504b3632bfdbf93Adam Langley      V.hi = (V.hi >> 1) ^ T;                                          \
8098ad22ec7ae21bdf5d5509049504b3632bfdbf93Adam Langley    } else {                                                           \
8198ad22ec7ae21bdf5d5509049504b3632bfdbf93Adam Langley      uint32_t T = 0xe1000000U & (0 - (uint32_t)(V.lo & 1));           \
8298ad22ec7ae21bdf5d5509049504b3632bfdbf93Adam Langley      V.lo = (V.hi << 63) | (V.lo >> 1);                               \
8398ad22ec7ae21bdf5d5509049504b3632bfdbf93Adam Langley      V.hi = (V.hi >> 1) ^ ((uint64_t)T << 32);                        \
8498ad22ec7ae21bdf5d5509049504b3632bfdbf93Adam Langley    }                                                                  \
8595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  } while (0)
8695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
8795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
8895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleystatic void gcm_init_4bit(u128 Htable[16], uint64_t H[2]) {
8995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  u128 V;
9095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
9195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  Htable[0].hi = 0;
9295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  Htable[0].lo = 0;
9395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  V.hi = H[0];
9495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  V.lo = H[1];
9595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
9695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  Htable[8] = V;
9795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  REDUCE1BIT(V);
9895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  Htable[4] = V;
9995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  REDUCE1BIT(V);
10095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  Htable[2] = V;
10195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  REDUCE1BIT(V);
10295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  Htable[1] = V;
10395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
10495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  V = Htable[4];
10595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
10695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
10795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
10895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  V = Htable[8];
10995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
11095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
11195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
11295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
11395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
11495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
11595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
11695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
1173f4e13c7dab89994c7c104c3309ea1ed2f26dfd1Adam Langley#if defined(GHASH_ASM) && defined(OPENSSL_ARM)
11895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  /* ARM assembler expects specific dword order in Htable. */
11995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  {
12095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    int j;
12195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    const union {
12295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      long one;
12395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      char little;
12495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    } is_endian = {1};
12595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
12695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    if (is_endian.little) {
12795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      for (j = 0; j < 16; ++j) {
12895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        V = Htable[j];
12995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        Htable[j].hi = V.lo;
13095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        Htable[j].lo = V.hi;
13195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      }
13295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    } else {
13395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      for (j = 0; j < 16; ++j) {
13495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        V = Htable[j];
13595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        Htable[j].hi = V.lo << 32 | V.lo >> 32;
13695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        Htable[j].lo = V.hi << 32 | V.hi >> 32;
13795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      }
13895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
13995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
14095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
14195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley}
14295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
14395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#if !defined(GHASH_ASM)
14495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleystatic const size_t rem_4bit[16] = {
14595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
14695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
14795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
14895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)};
14995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
15095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleystatic void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]) {
15195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  u128 Z;
15295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  int cnt = 15;
15395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  size_t rem, nlo, nhi;
15495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  const union {
15595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    long one;
15695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    char little;
15795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  } is_endian = {1};
15895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
15995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  nlo = ((const uint8_t *)Xi)[15];
16095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  nhi = nlo >> 4;
16195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  nlo &= 0xf;
16295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
16395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  Z.hi = Htable[nlo].hi;
16495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  Z.lo = Htable[nlo].lo;
16595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
16695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  while (1) {
16795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    rem = (size_t)Z.lo & 0xf;
16895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    Z.lo = (Z.hi << 60) | (Z.lo >> 4);
16995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    Z.hi = (Z.hi >> 4);
17095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    if (sizeof(size_t) == 8) {
17195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      Z.hi ^= rem_4bit[rem];
17295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    } else {
17395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
17495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
17595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
17695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    Z.hi ^= Htable[nhi].hi;
17795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    Z.lo ^= Htable[nhi].lo;
17895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
17995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    if (--cnt < 0) {
18095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      break;
18195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
18295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
18395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    nlo = ((const uint8_t *)Xi)[cnt];
18495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    nhi = nlo >> 4;
18595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    nlo &= 0xf;
18695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
18795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    rem = (size_t)Z.lo & 0xf;
18895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    Z.lo = (Z.hi << 60) | (Z.lo >> 4);
18995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    Z.hi = (Z.hi >> 4);
19095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    if (sizeof(size_t) == 8) {
19195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      Z.hi ^= rem_4bit[rem];
19295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    } else {
19395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
19495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
19595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
19695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    Z.hi ^= Htable[nlo].hi;
19795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    Z.lo ^= Htable[nlo].lo;
19895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
19995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
20095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  if (is_endian.little) {
20195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#ifdef BSWAP8
20295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    Xi[0] = BSWAP8(Z.hi);
20395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    Xi[1] = BSWAP8(Z.lo);
20495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#else
20595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    uint8_t *p = (uint8_t *)Xi;
20695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    uint32_t v;
20795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    v = (uint32_t)(Z.hi >> 32);
20895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    PUTU32(p, v);
20995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    v = (uint32_t)(Z.hi);
21095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    PUTU32(p + 4, v);
21195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    v = (uint32_t)(Z.lo >> 32);
21295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    PUTU32(p + 8, v);
21395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    v = (uint32_t)(Z.lo);
21495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    PUTU32(p + 12, v);
21595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
21695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  } else {
21795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    Xi[0] = Z.hi;
21895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    Xi[1] = Z.lo;
21995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
22095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley}
22195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
22295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley/* Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
22395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * details... Compiler-generated code doesn't seem to give any
22495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * performance improvement, at least not on x86[_64]. It's here
22595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * mostly as reference and a placeholder for possible future
22695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * non-trivial optimization[s]... */
22795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleystatic void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
22895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley                           size_t len) {
22995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  u128 Z;
23095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  int cnt;
23195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  size_t rem, nlo, nhi;
23295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  const union {
23395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    long one;
23495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    char little;
23595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  } is_endian = {1};
23695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
23795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  do {
23895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    cnt = 15;
23995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    nlo = ((const uint8_t *)Xi)[15];
24095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    nlo ^= inp[15];
24195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    nhi = nlo >> 4;
24295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    nlo &= 0xf;
24395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
24495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    Z.hi = Htable[nlo].hi;
24595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    Z.lo = Htable[nlo].lo;
24695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
24795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    while (1) {
24895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      rem = (size_t)Z.lo & 0xf;
24995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      Z.lo = (Z.hi << 60) | (Z.lo >> 4);
25095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      Z.hi = (Z.hi >> 4);
25195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      if (sizeof(size_t) == 8) {
25295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        Z.hi ^= rem_4bit[rem];
25395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      } else {
25495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
25595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      }
25695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
25795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      Z.hi ^= Htable[nhi].hi;
25895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      Z.lo ^= Htable[nhi].lo;
25995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
26095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      if (--cnt < 0) {
26195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        break;
26295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      }
26395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
26495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      nlo = ((const uint8_t *)Xi)[cnt];
26595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      nlo ^= inp[cnt];
26695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      nhi = nlo >> 4;
26795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      nlo &= 0xf;
26895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
26995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      rem = (size_t)Z.lo & 0xf;
27095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      Z.lo = (Z.hi << 60) | (Z.lo >> 4);
27195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      Z.hi = (Z.hi >> 4);
27295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      if (sizeof(size_t) == 8) {
27395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        Z.hi ^= rem_4bit[rem];
27495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      } else {
27595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
27695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      }
27795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
27895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      Z.hi ^= Htable[nlo].hi;
27995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      Z.lo ^= Htable[nlo].lo;
28095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
28195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
28295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    if (is_endian.little) {
28395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#ifdef BSWAP8
28495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      Xi[0] = BSWAP8(Z.hi);
28595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      Xi[1] = BSWAP8(Z.lo);
28695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#else
28795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      uint8_t *p = (uint8_t *)Xi;
28895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      uint32_t v;
28995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      v = (uint32_t)(Z.hi >> 32);
29095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      PUTU32(p, v);
29195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      v = (uint32_t)(Z.hi);
29295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      PUTU32(p + 4, v);
29395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      v = (uint32_t)(Z.lo >> 32);
29495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      PUTU32(p + 8, v);
29595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      v = (uint32_t)(Z.lo);
29695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      PUTU32(p + 12, v);
29795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
29895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    } else {
29995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      Xi[0] = Z.hi;
30095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      Xi[1] = Z.lo;
30195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
30295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  } while (inp += 16, len -= 16);
30395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley}
30495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#else /* GHASH_ASM */
30595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleyvoid gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]);
30695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleyvoid gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
30795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley                    size_t len);
30895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
30995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
31095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#define GCM_MUL(ctx, Xi) gcm_gmult_4bit(ctx->Xi.u, ctx->Htable)
31195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#if defined(GHASH_ASM)
31295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#define GHASH(ctx, in, len) gcm_ghash_4bit((ctx)->Xi.u, (ctx)->Htable, in, len)
31395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley/* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
31495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * trashing effect. In other words idea is to hash data while it's
31595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * still in L1 cache after encryption pass... */
31695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#define GHASH_CHUNK (3 * 1024)
31795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
31895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
31995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
32095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#if defined(GHASH_ASM)
32195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
32295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#define GHASH_ASM_X86_OR_64
32395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#define GCM_FUNCREF_4BIT
32495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleyvoid gcm_init_clmul(u128 Htable[16], const uint64_t Xi[2]);
32595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleyvoid gcm_gmult_clmul(uint64_t Xi[2], const u128 Htable[16]);
32695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleyvoid gcm_ghash_clmul(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
32795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley                     size_t len);
32895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
32995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#if defined(OPENSSL_X86)
33095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#define gcm_init_avx gcm_init_clmul
33195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#define gcm_gmult_avx gcm_gmult_clmul
33295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#define gcm_ghash_avx gcm_ghash_clmul
33395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#else
33495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleyvoid gcm_init_avx(u128 Htable[16], const uint64_t Xi[2]);
33595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleyvoid gcm_gmult_avx(uint64_t Xi[2], const u128 Htable[16]);
33695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleyvoid gcm_ghash_avx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, size_t len);
33795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
33895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
33995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#if defined(OPENSSL_X86)
34095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#define GHASH_ASM_X86
34195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleyvoid gcm_gmult_4bit_mmx(uint64_t Xi[2], const u128 Htable[16]);
34295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleyvoid gcm_ghash_4bit_mmx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
34395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley                        size_t len);
34495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
34595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleyvoid gcm_gmult_4bit_x86(uint64_t Xi[2], const u128 Htable[16]);
34695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleyvoid gcm_ghash_4bit_x86(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
34795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley                        size_t len);
34895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
34995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#elif defined(OPENSSL_ARM)
35095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#include "../arm_arch.h"
35195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#if __ARM_ARCH__ >= 7
35295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#define GHASH_ASM_ARM
35395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#define GCM_FUNCREF_4BIT
3546a57f9219519a2fb52c45ff6706e36de4735aee0Adam Langleyvoid gcm_init_neon(u128 Htable[16],const uint64_t Xi[2]);
35595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleyvoid gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]);
35695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleyvoid gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
35795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley                    size_t len);
35895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
35995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
36095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
36195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
36295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#ifdef GCM_FUNCREF_4BIT
36395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#undef GCM_MUL
36495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#define GCM_MUL(ctx, Xi) (*gcm_gmult_p)(ctx->Xi.u, ctx->Htable)
36595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#ifdef GHASH
36695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#undef GHASH
36795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#define GHASH(ctx, in, len) (*gcm_ghash_p)(ctx->Xi.u, ctx->Htable, in, len)
36895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
36995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
37095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
37195c29f3cd1f6c08c6c0927868683392eea727ccAdam LangleyGCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block) {
37295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  GCM128_CONTEXT *ret;
37395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
37495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT));
37595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  if (ret != NULL) {
37695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    CRYPTO_gcm128_init(ret, key, block);
37795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
37895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
37995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  return ret;
38095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley}
38195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
38295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleyvoid CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block) {
38395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  const union {
38495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    long one;
38595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    char little;
38695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  } is_endian = {1};
38795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
38895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  memset(ctx, 0, sizeof(*ctx));
38995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  ctx->block = block;
39095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  ctx->key = key;
39195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
39295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  (*block)(ctx->H.c, ctx->H.c, key);
39395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
39495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  if (is_endian.little) {
39595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley/* H is stored in host byte order */
39695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#ifdef BSWAP8
39795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
39895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
39995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#else
40095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    uint8_t *p = ctx->H.c;
40195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    uint64_t hi, lo;
40295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    hi = (uint64_t)GETU32(p) << 32 | GETU32(p + 4);
40395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    lo = (uint64_t)GETU32(p + 8) << 32 | GETU32(p + 12);
40495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ctx->H.u[0] = hi;
40595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ctx->H.u[1] = lo;
40695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
40795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
40895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
40995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#if defined(GHASH_ASM_X86_OR_64)
4105213df4e9ed9ca130c40f142893cb91f2e18eee1David Benjamin  if (crypto_gcm_clmul_enabled()) {
41195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
41295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      gcm_init_avx(ctx->Htable, ctx->H.u);
41395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->gmult = gcm_gmult_avx;
41495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->ghash = gcm_ghash_avx;
41595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    } else {
41695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      gcm_init_clmul(ctx->Htable, ctx->H.u);
41795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->gmult = gcm_gmult_clmul;
41895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->ghash = gcm_ghash_clmul;
41995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
42095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    return;
42195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
42295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  gcm_init_4bit(ctx->Htable, ctx->H.u);
42395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#if defined(GHASH_ASM_X86) /* x86 only */
42495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
42595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ctx->gmult = gcm_gmult_4bit_mmx;
42695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ctx->ghash = gcm_ghash_4bit_mmx;
42795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  } else {
42895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ctx->gmult = gcm_gmult_4bit_x86;
42995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ctx->ghash = gcm_ghash_4bit_x86;
43095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
43195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#else
43295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  ctx->gmult = gcm_gmult_4bit;
43395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  ctx->ghash = gcm_ghash_4bit;
43495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
43595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#elif defined(GHASH_ASM_ARM)
43695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  if (CRYPTO_is_NEON_capable()) {
4376a57f9219519a2fb52c45ff6706e36de4735aee0Adam Langley    gcm_init_neon(ctx->Htable,ctx->H.u);
43895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ctx->gmult = gcm_gmult_neon;
43995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ctx->ghash = gcm_ghash_neon;
44095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  } else {
44195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    gcm_init_4bit(ctx->Htable, ctx->H.u);
44295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ctx->gmult = gcm_gmult_4bit;
44395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ctx->ghash = gcm_ghash_4bit;
44495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
44595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#else
44695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  ctx->gmult = gcm_gmult_4bit;
44795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  ctx->ghash = gcm_ghash_4bit;
44895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  gcm_init_4bit(ctx->Htable, ctx->H.u);
44995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
45095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley}
45195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
45295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleyvoid CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const uint8_t *iv, size_t len) {
45395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  const union {
45495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    long one;
45595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    char little;
45695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  } is_endian = {1};
45795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  unsigned int ctr;
45895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#ifdef GCM_FUNCREF_4BIT
45995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
46095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
46195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
46295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  ctx->Yi.u[0] = 0;
46395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  ctx->Yi.u[1] = 0;
46495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  ctx->Xi.u[0] = 0;
46595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  ctx->Xi.u[1] = 0;
46695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  ctx->len.u[0] = 0; /* AAD length */
46795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  ctx->len.u[1] = 0; /* message length */
46895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  ctx->ares = 0;
46995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  ctx->mres = 0;
47095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
47195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  if (len == 12) {
47295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    memcpy(ctx->Yi.c, iv, 12);
47395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ctx->Yi.c[15] = 1;
47495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ctr = 1;
47595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  } else {
47695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    size_t i;
47795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    uint64_t len0 = len;
47895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
47995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    while (len >= 16) {
48095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      for (i = 0; i < 16; ++i) {
48195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        ctx->Yi.c[i] ^= iv[i];
48295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      }
48395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      GCM_MUL(ctx, Yi);
48495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      iv += 16;
48595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      len -= 16;
48695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
48795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    if (len) {
48895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      for (i = 0; i < len; ++i) {
48995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        ctx->Yi.c[i] ^= iv[i];
49095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      }
49195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      GCM_MUL(ctx, Yi);
49295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
49395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    len0 <<= 3;
49495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    if (is_endian.little) {
49595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#ifdef BSWAP8
49695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->Yi.u[1] ^= BSWAP8(len0);
49795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#else
49895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->Yi.c[8] ^= (uint8_t)(len0 >> 56);
49995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->Yi.c[9] ^= (uint8_t)(len0 >> 48);
50095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->Yi.c[10] ^= (uint8_t)(len0 >> 40);
50195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->Yi.c[11] ^= (uint8_t)(len0 >> 32);
50295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->Yi.c[12] ^= (uint8_t)(len0 >> 24);
50395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->Yi.c[13] ^= (uint8_t)(len0 >> 16);
50495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->Yi.c[14] ^= (uint8_t)(len0 >> 8);
50595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->Yi.c[15] ^= (uint8_t)(len0);
50695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
50795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    } else {
50895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->Yi.u[1] ^= len0;
50995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
51095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
51195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    GCM_MUL(ctx, Yi);
51295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
51395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    if (is_endian.little) {
51495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctr = GETU32(ctx->Yi.c + 12);
51595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    } else {
51695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctr = ctx->Yi.d[3];
51795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
51895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
51995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
52095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  (*ctx->block)(ctx->Yi.c, ctx->EK0.c, ctx->key);
52195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  ++ctr;
52295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  if (is_endian.little) {
52395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    PUTU32(ctx->Yi.c + 12, ctr);
52495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  } else {
52595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ctx->Yi.d[3] = ctr;
52695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
52795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley}
52895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
52995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleyint CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const uint8_t *aad, size_t len) {
53095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  size_t i;
53195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  unsigned int n;
53295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  uint64_t alen = ctx->len.u[0];
53395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#ifdef GCM_FUNCREF_4BIT
53495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
53595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#ifdef GHASH
53695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
53795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley                      size_t len) = ctx->ghash;
53895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
53995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
54095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
54195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  if (ctx->len.u[1]) {
54295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    return 0;
54395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
54495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
54595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  alen += len;
54698ad22ec7ae21bdf5d5509049504b3632bfdbf93Adam Langley  if (alen > (OPENSSL_U64(1) << 61) || (sizeof(len) == 8 && alen < len)) {
54795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    return 0;
54895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
54995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  ctx->len.u[0] = alen;
55095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
55195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  n = ctx->ares;
55295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  if (n) {
55395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    while (n && len) {
55495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->Xi.c[n] ^= *(aad++);
55595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      --len;
55695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      n = (n + 1) % 16;
55795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
55895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    if (n == 0) {
55995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      GCM_MUL(ctx, Xi);
56095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    } else {
56195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->ares = n;
56295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      return 1;
56395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
56495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
56595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
56695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#ifdef GHASH
56795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  if ((i = (len & (size_t) - 16))) {
56895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    GHASH(ctx, aad, i);
56995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    aad += i;
57095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    len -= i;
57195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
57295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#else
57395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  while (len >= 16) {
57495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    for (i = 0; i < 16; ++i) {
57595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->Xi.c[i] ^= aad[i];
57695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
57795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    GCM_MUL(ctx, Xi);
57895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    aad += 16;
57995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    len -= 16;
58095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
58195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
58295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  if (len) {
58395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    n = (unsigned int)len;
58495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    for (i = 0; i < len; ++i)
58595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->Xi.c[i] ^= aad[i];
58695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
58795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
58895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  ctx->ares = n;
58995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  return 1;
59095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley}
59195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
59295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleyint CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, const unsigned char *in,
59395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley                          unsigned char *out, size_t len) {
59495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  const union {
59595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    long one;
59695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    char little;
59795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  } is_endian = {1};
59895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  unsigned int n, ctr;
59995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  size_t i;
60095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  uint64_t mlen = ctx->len.u[1];
60195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  block128_f block = ctx->block;
60295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  void *key = ctx->key;
60395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#ifdef GCM_FUNCREF_4BIT
60495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
60595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#ifdef GHASH
60695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
60795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley                      size_t len) = ctx->ghash;
60895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
60995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
61095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
61195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  mlen += len;
61298ad22ec7ae21bdf5d5509049504b3632bfdbf93Adam Langley  if (mlen > ((OPENSSL_U64(1) << 36) - 32) ||
61398ad22ec7ae21bdf5d5509049504b3632bfdbf93Adam Langley      (sizeof(len) == 8 && mlen < len)) {
61495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    return 0;
61595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
61695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  ctx->len.u[1] = mlen;
61795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
61895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  if (ctx->ares) {
61995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    /* First call to encrypt finalizes GHASH(AAD) */
62095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    GCM_MUL(ctx, Xi);
62195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ctx->ares = 0;
62295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
62395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
62495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  if (is_endian.little) {
62595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ctr = GETU32(ctx->Yi.c + 12);
62695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  } else {
62795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ctr = ctx->Yi.d[3];
62895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
62995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
63095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  n = ctx->mres;
63195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  if (n) {
63295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    while (n && len) {
63395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
63495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      --len;
63595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      n = (n + 1) % 16;
63695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
63795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    if (n == 0) {
63895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      GCM_MUL(ctx, Xi);
63995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    } else {
64095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->mres = n;
64195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      return 1;
64295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
64395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
64495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  if (STRICT_ALIGNMENT && ((size_t)in | (size_t)out) % sizeof(size_t) != 0) {
64595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    for (i = 0; i < len; ++i) {
64695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      if (n == 0) {
64795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        (*block)(ctx->Yi.c, ctx->EKi.c, key);
64895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        ++ctr;
64995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        if (is_endian.little) {
65095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley          PUTU32(ctx->Yi.c + 12, ctr);
65195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        } else {
65295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley          ctx->Yi.d[3] = ctr;
65395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        }
65495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      }
65595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
65695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      n = (n + 1) % 16;
65795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      if (n == 0) {
65895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        GCM_MUL(ctx, Xi);
65995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      }
66095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
66195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
66295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ctx->mres = n;
66395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    return 1;
66495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
66595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#if defined(GHASH) && defined(GHASH_CHUNK)
66695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  while (len >= GHASH_CHUNK) {
66795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    size_t j = GHASH_CHUNK;
66895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
66995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    while (j) {
67095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      size_t *out_t = (size_t *)out;
67195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      const size_t *in_t = (const size_t *)in;
67295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
67395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      (*block)(ctx->Yi.c, ctx->EKi.c, key);
67495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ++ctr;
67595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      if (is_endian.little) {
67695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        PUTU32(ctx->Yi.c + 12, ctr);
67795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      } else {
67895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        ctx->Yi.d[3] = ctr;
67995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      }
68095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      for (i = 0; i < 16 / sizeof(size_t); ++i) {
68195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
68295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      }
68395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      out += 16;
68495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      in += 16;
68595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      j -= 16;
68695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
68795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
68895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    len -= GHASH_CHUNK;
68995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
69095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  if ((i = (len & (size_t) - 16))) {
69195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    size_t j = i;
69295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
69395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    while (len >= 16) {
69495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      size_t *out_t = (size_t *)out;
69595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      const size_t *in_t = (const size_t *)in;
69695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
69795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      (*block)(ctx->Yi.c, ctx->EKi.c, key);
69895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ++ctr;
69995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      if (is_endian.little) {
70095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        PUTU32(ctx->Yi.c + 12, ctr);
70195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      } else {
70295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        ctx->Yi.d[3] = ctr;
70395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      }
70495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      for (i = 0; i < 16 / sizeof(size_t); ++i) {
70595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
70695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      }
70795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      out += 16;
70895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      in += 16;
70995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      len -= 16;
71095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
71195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    GHASH(ctx, out - j, j);
71295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
71395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#else
71495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  while (len >= 16) {
71595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    size_t *out_t = (size_t *)out;
71695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    const size_t *in_t = (const size_t *)in;
71795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
71895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    (*block)(ctx->Yi.c, ctx->EKi.c, key);
71995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ++ctr;
72095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    if (is_endian.little) {
72195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      PUTU32(ctx->Yi.c + 12, ctr);
72295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    } else {
72395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->Yi.d[3] = ctr;
72495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
72595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    for (i = 0; i < 16 / sizeof(size_t); ++i) {
72695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
72795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
72895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    GCM_MUL(ctx, Xi);
72995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    out += 16;
73095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    in += 16;
73195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    len -= 16;
73295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
73395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
73495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  if (len) {
73595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    (*block)(ctx->Yi.c, ctx->EKi.c, key);
73695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ++ctr;
73795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    if (is_endian.little) {
73895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      PUTU32(ctx->Yi.c + 12, ctr);
73995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    } else {
74095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->Yi.d[3] = ctr;
74195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
74295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    while (len--) {
74395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
74495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ++n;
74595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
74695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
74795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
74895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  ctx->mres = n;
74995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  return 1;
75095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley}
75195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
75295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleyint CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, const unsigned char *in,
75395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley                          unsigned char *out, size_t len) {
75495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  const union {
75595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    long one;
75695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    char little;
75795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  } is_endian = {1};
75895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  unsigned int n, ctr;
75995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  size_t i;
76095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  uint64_t mlen = ctx->len.u[1];
76195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  block128_f block = ctx->block;
76295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  void *key = ctx->key;
76395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#ifdef GCM_FUNCREF_4BIT
76495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
76595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#ifdef GHASH
76695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
76795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley                      size_t len) = ctx->ghash;
76895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
76995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
77095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
77195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  mlen += len;
77298ad22ec7ae21bdf5d5509049504b3632bfdbf93Adam Langley  if (mlen > ((OPENSSL_U64(1) << 36) - 32) ||
77398ad22ec7ae21bdf5d5509049504b3632bfdbf93Adam Langley      (sizeof(len) == 8 && mlen < len)) {
77495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    return 0;
77595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
77695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  ctx->len.u[1] = mlen;
77795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
77895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  if (ctx->ares) {
77995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    /* First call to decrypt finalizes GHASH(AAD) */
78095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    GCM_MUL(ctx, Xi);
78195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ctx->ares = 0;
78295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
78395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
78495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  if (is_endian.little) {
78595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ctr = GETU32(ctx->Yi.c + 12);
78695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  } else {
78795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ctr = ctx->Yi.d[3];
78895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
78995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
79095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  n = ctx->mres;
79195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  if (n) {
79295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    while (n && len) {
79395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      uint8_t c = *(in++);
79495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      *(out++) = c ^ ctx->EKi.c[n];
79595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->Xi.c[n] ^= c;
79695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      --len;
79795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      n = (n + 1) % 16;
79895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
79995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    if (n == 0) {
80095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      GCM_MUL(ctx, Xi);
80195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    } else {
80295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->mres = n;
80395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      return 1;
80495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
80595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
80695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  if (STRICT_ALIGNMENT && ((size_t)in | (size_t)out) % sizeof(size_t) != 0) {
80795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    for (i = 0; i < len; ++i) {
80895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      uint8_t c;
80995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      if (n == 0) {
81095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        (*block)(ctx->Yi.c, ctx->EKi.c, key);
81195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        ++ctr;
81295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        if (is_endian.little) {
81395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley          PUTU32(ctx->Yi.c + 12, ctr);
81495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        } else {
81595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley          ctx->Yi.d[3] = ctr;
81695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        }
81795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      }
81895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      c = in[i];
81995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      out[i] = c ^ ctx->EKi.c[n];
82095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->Xi.c[n] ^= c;
82195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      n = (n + 1) % 16;
82295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      if (n == 0) {
82395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        GCM_MUL(ctx, Xi);
82495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      }
82595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
82695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
82795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ctx->mres = n;
82895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    return 1;
82995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
83095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#if defined(GHASH) && defined(GHASH_CHUNK)
83195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  while (len >= GHASH_CHUNK) {
83295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    size_t j = GHASH_CHUNK;
83395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
83495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    GHASH(ctx, in, GHASH_CHUNK);
83595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    while (j) {
83695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      size_t *out_t = (size_t *)out;
83795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      const size_t *in_t = (const size_t *)in;
83895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
83995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      (*block)(ctx->Yi.c, ctx->EKi.c, key);
84095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ++ctr;
84195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      if (is_endian.little) {
84295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        PUTU32(ctx->Yi.c + 12, ctr);
84395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      } else {
84495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        ctx->Yi.d[3] = ctr;
84595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      }
84695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      for (i = 0; i < 16 / sizeof(size_t); ++i) {
84795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
84895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      }
84995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      out += 16;
85095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      in += 16;
85195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      j -= 16;
85295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
85395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    len -= GHASH_CHUNK;
85495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
85595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  if ((i = (len & (size_t) - 16))) {
85695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    GHASH(ctx, in, i);
85795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    while (len >= 16) {
85895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      size_t *out_t = (size_t *)out;
85995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      const size_t *in_t = (const size_t *)in;
86095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
86195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      (*block)(ctx->Yi.c, ctx->EKi.c, key);
86295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ++ctr;
86395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      if (is_endian.little) {
86495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        PUTU32(ctx->Yi.c + 12, ctr);
86595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      } else {
86695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        ctx->Yi.d[3] = ctr;
86795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      }
86895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      for (i = 0; i < 16 / sizeof(size_t); ++i) {
86995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
87095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      }
87195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      out += 16;
87295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      in += 16;
87395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      len -= 16;
87495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
87595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
87695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#else
87795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  while (len >= 16) {
87895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    size_t *out_t = (size_t *)out;
87995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    const size_t *in_t = (const size_t *)in;
88095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
88195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    (*block)(ctx->Yi.c, ctx->EKi.c, key);
88295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ++ctr;
88395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    if (is_endian.little) {
88495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      PUTU32(ctx->Yi.c + 12, ctr);
88595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    } else {
88695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->Yi.d[3] = ctr;
88795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
88895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    for (i = 0; i < 16 / sizeof(size_t); ++i) {
88995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      size_t c = in_t[i];
89095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      out_t[i] = c ^ ctx->EKi.t[i];
89195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->Xi.t[i] ^= c;
89295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
89395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    GCM_MUL(ctx, Xi);
89495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    out += 16;
89595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    in += 16;
89695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    len -= 16;
89795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
89895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
89995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  if (len) {
90095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    (*block)(ctx->Yi.c, ctx->EKi.c, key);
90195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ++ctr;
90295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    if (is_endian.little) {
90395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      PUTU32(ctx->Yi.c + 12, ctr);
90495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    } else {
90595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->Yi.d[3] = ctr;
90695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
90795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    while (len--) {
90895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      uint8_t c = in[n];
90995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->Xi.c[n] ^= c;
91095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      out[n] = c ^ ctx->EKi.c[n];
91195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ++n;
91295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
91395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
91495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
91595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  ctx->mres = n;
91695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  return 1;
91795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley}
91895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
91995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleyint CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, const uint8_t *in,
92095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley                                uint8_t *out, size_t len, ctr128_f stream) {
92195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  const union {
92295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    long one;
92395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    char little;
92495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  } is_endian = {1};
92595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  unsigned int n, ctr;
92695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  size_t i;
92795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  uint64_t mlen = ctx->len.u[1];
92895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  void *key = ctx->key;
92995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#ifdef GCM_FUNCREF_4BIT
93095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
93195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#ifdef GHASH
93295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
93395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley                      size_t len) = ctx->ghash;
93495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
93595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
93695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
93795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  mlen += len;
93898ad22ec7ae21bdf5d5509049504b3632bfdbf93Adam Langley  if (mlen > ((OPENSSL_U64(1) << 36) - 32) ||
93998ad22ec7ae21bdf5d5509049504b3632bfdbf93Adam Langley      (sizeof(len) == 8 && mlen < len)) {
94095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    return 0;
94195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
94295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  ctx->len.u[1] = mlen;
94395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
94495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  if (ctx->ares) {
94595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    /* First call to encrypt finalizes GHASH(AAD) */
94695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    GCM_MUL(ctx, Xi);
94795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ctx->ares = 0;
94895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
94995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
95095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  if (is_endian.little) {
95195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ctr = GETU32(ctx->Yi.c + 12);
95295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  } else {
95395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ctr = ctx->Yi.d[3];
95495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
95595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
95695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  n = ctx->mres;
95795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  if (n) {
95895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    while (n && len) {
95995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
96095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      --len;
96195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      n = (n + 1) % 16;
96295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
96395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    if (n == 0) {
96495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      GCM_MUL(ctx, Xi);
96595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    } else {
96695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->mres = n;
96795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      return 1;
96895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
96995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
97095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#if defined(GHASH)
97195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  while (len >= GHASH_CHUNK) {
97295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
97395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ctr += GHASH_CHUNK / 16;
97495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    if (is_endian.little) {
97595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      PUTU32(ctx->Yi.c + 12, ctr);
97695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    } else {
97795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->Yi.d[3] = ctr;
97895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
97995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    GHASH(ctx, out, GHASH_CHUNK);
98095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    out += GHASH_CHUNK;
98195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    in += GHASH_CHUNK;
98295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    len -= GHASH_CHUNK;
98395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
98495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
98595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  if ((i = (len & (size_t) - 16))) {
98695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    size_t j = i / 16;
98795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
98895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    (*stream)(in, out, j, key, ctx->Yi.c);
98995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ctr += (unsigned int)j;
99095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    if (is_endian.little) {
99195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      PUTU32(ctx->Yi.c + 12, ctr);
99295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    } else {
99395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->Yi.d[3] = ctr;
99495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
99595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    in += i;
99695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    len -= i;
99795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#if defined(GHASH)
99895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    GHASH(ctx, out, i);
99995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    out += i;
100095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#else
100195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    while (j--) {
100295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      for (i = 0; i < 16; ++i) {
100395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        ctx->Xi.c[i] ^= out[i];
100495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      }
100595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      GCM_MUL(ctx, Xi);
100695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      out += 16;
100795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
100895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
100995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
101095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  if (len) {
101195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
101295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ++ctr;
101395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    if (is_endian.little) {
101495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      PUTU32(ctx->Yi.c + 12, ctr);
101595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    } else {
101695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->Yi.d[3] = ctr;
101795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
101895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    while (len--) {
101995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
102095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ++n;
102195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
102295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
102395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
102495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  ctx->mres = n;
102595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  return 1;
102695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley}
102795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
102895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleyint CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, const uint8_t *in,
102995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley                                uint8_t *out, size_t len,
103095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley                                ctr128_f stream) {
103195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  const union {
103295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    long one;
103395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    char little;
103495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  } is_endian = {1};
103595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  unsigned int n, ctr;
103695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  size_t i;
103795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  uint64_t mlen = ctx->len.u[1];
103895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  void *key = ctx->key;
103995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#ifdef GCM_FUNCREF_4BIT
104095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
104195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#ifdef GHASH
104295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
104395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley                      size_t len) = ctx->ghash;
104495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
104595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
104695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
104795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  mlen += len;
104898ad22ec7ae21bdf5d5509049504b3632bfdbf93Adam Langley  if (mlen > ((OPENSSL_U64(1) << 36) - 32) ||
104998ad22ec7ae21bdf5d5509049504b3632bfdbf93Adam Langley      (sizeof(len) == 8 && mlen < len)) {
105095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    return 0;
105195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
105295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  ctx->len.u[1] = mlen;
105395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
105495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  if (ctx->ares) {
105595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    /* First call to decrypt finalizes GHASH(AAD) */
105695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    GCM_MUL(ctx, Xi);
105795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ctx->ares = 0;
105895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
105995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
106095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  if (is_endian.little) {
106195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ctr = GETU32(ctx->Yi.c + 12);
106295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  } else {
106395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ctr = ctx->Yi.d[3];
106495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
106595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
106695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  n = ctx->mres;
106795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  if (n) {
106895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    while (n && len) {
106995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      uint8_t c = *(in++);
107095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      *(out++) = c ^ ctx->EKi.c[n];
107195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->Xi.c[n] ^= c;
107295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      --len;
107395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      n = (n + 1) % 16;
107495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
107595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    if (n == 0) {
107695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      GCM_MUL(ctx, Xi);
107795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    } else {
107895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->mres = n;
107995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      return 1;
108095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
108195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
108295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#if defined(GHASH)
108395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  while (len >= GHASH_CHUNK) {
108495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    GHASH(ctx, in, GHASH_CHUNK);
108595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
108695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ctr += GHASH_CHUNK / 16;
108795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    if (is_endian.little)
108895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      PUTU32(ctx->Yi.c + 12, ctr);
108995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    else
109095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->Yi.d[3] = ctr;
109195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    out += GHASH_CHUNK;
109295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    in += GHASH_CHUNK;
109395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    len -= GHASH_CHUNK;
109495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
109595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
109695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  if ((i = (len & (size_t) - 16))) {
109795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    size_t j = i / 16;
109895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
109995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#if defined(GHASH)
110095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    GHASH(ctx, in, i);
110195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#else
110295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    while (j--) {
110395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      size_t k;
110495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      for (k = 0; k < 16; ++k)
110595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        ctx->Xi.c[k] ^= in[k];
110695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      GCM_MUL(ctx, Xi);
110795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      in += 16;
110895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
110995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    j = i / 16;
111095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    in -= i;
111195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
111295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    (*stream)(in, out, j, key, ctx->Yi.c);
111395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ctr += (unsigned int)j;
111495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    if (is_endian.little)
111595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      PUTU32(ctx->Yi.c + 12, ctr);
111695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    else
111795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->Yi.d[3] = ctr;
111895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    out += i;
111995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    in += i;
112095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    len -= i;
112195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
112295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  if (len) {
112395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
112495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ++ctr;
112595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    if (is_endian.little)
112695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      PUTU32(ctx->Yi.c + 12, ctr);
112795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    else
112895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->Yi.d[3] = ctr;
112995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    while (len--) {
113095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      uint8_t c = in[n];
113195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ctx->Xi.c[n] ^= c;
113295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      out[n] = c ^ ctx->EKi.c[n];
113395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley      ++n;
113495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
113595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
113695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
113795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  ctx->mres = n;
113895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  return 1;
113995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley}
114095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
114195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleyint CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const uint8_t *tag, size_t len) {
114295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  const union {
114395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    long one;
114495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    char little;
114595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  } is_endian = {1};
114695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  uint64_t alen = ctx->len.u[0] << 3;
114795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  uint64_t clen = ctx->len.u[1] << 3;
114895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#ifdef GCM_FUNCREF_4BIT
114995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
115095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
115195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
115295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  if (ctx->mres || ctx->ares) {
115395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    GCM_MUL(ctx, Xi);
115495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
115595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
115695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  if (is_endian.little) {
115795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#ifdef BSWAP8
115895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    alen = BSWAP8(alen);
115995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    clen = BSWAP8(clen);
116095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#else
116195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    uint8_t *p = ctx->len.c;
116295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
116395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ctx->len.u[0] = alen;
116495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    ctx->len.u[1] = clen;
116595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
116695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    alen = (uint64_t)GETU32(p) << 32 | GETU32(p + 4);
116795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    clen = (uint64_t)GETU32(p + 8) << 32 | GETU32(p + 12);
116895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#endif
116995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
117095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
117195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  ctx->Xi.u[0] ^= alen;
117295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  ctx->Xi.u[1] ^= clen;
117395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  GCM_MUL(ctx, Xi);
117495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
117595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  ctx->Xi.u[0] ^= ctx->EK0.u[0];
117695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  ctx->Xi.u[1] ^= ctx->EK0.u[1];
117795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
117895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  if (tag && len <= sizeof(ctx->Xi)) {
117995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    return CRYPTO_memcmp(ctx->Xi.c, tag, len) == 0;
118095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  } else {
118195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    return 0;
118295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
118395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley}
118495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
118595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleyvoid CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) {
118695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  CRYPTO_gcm128_finish(ctx, NULL, 0);
118795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  memcpy(tag, ctx->Xi.c, len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
118895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley}
118995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
119095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleyvoid CRYPTO_gcm128_release(GCM128_CONTEXT *ctx) {
119195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  if (ctx) {
119295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    OPENSSL_cleanse(ctx, sizeof(*ctx));
119395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    OPENSSL_free(ctx);
119495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley  }
119595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley}
11965213df4e9ed9ca130c40f142893cb91f2e18eee1David Benjamin
11975213df4e9ed9ca130c40f142893cb91f2e18eee1David Benjamin#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
11985213df4e9ed9ca130c40f142893cb91f2e18eee1David Benjaminint crypto_gcm_clmul_enabled(void) {
11995213df4e9ed9ca130c40f142893cb91f2e18eee1David Benjamin#ifdef GHASH_ASM
12005213df4e9ed9ca130c40f142893cb91f2e18eee1David Benjamin  return OPENSSL_ia32cap_P[0] & (1 << 24) &&  /* check FXSR bit */
12015213df4e9ed9ca130c40f142893cb91f2e18eee1David Benjamin    OPENSSL_ia32cap_P[1] & (1 << 1);  /* check PCLMULQDQ bit */
12025213df4e9ed9ca130c40f142893cb91f2e18eee1David Benjamin#else
12035213df4e9ed9ca130c40f142893cb91f2e18eee1David Benjamin  return 0;
12045213df4e9ed9ca130c40f142893cb91f2e18eee1David Benjamin#endif
12055213df4e9ed9ca130c40f142893cb91f2e18eee1David Benjamin}
12065213df4e9ed9ca130c40f142893cb91f2e18eee1David Benjamin#endif
1207