umac.c revision d059297112922cabb0c674840589be8db821fd9a
1/* $OpenBSD: umac.c,v 1.11 2014/07/22 07:13:42 guenther Exp $ */
2/* -----------------------------------------------------------------------
3 *
4 * umac.c -- C Implementation UMAC Message Authentication
5 *
6 * Version 0.93b of rfc4418.txt -- 2006 July 18
7 *
8 * For a full description of UMAC message authentication see the UMAC
9 * world-wide-web page at http://www.cs.ucdavis.edu/~rogaway/umac
10 * Please report bugs and suggestions to the UMAC webpage.
11 *
12 * Copyright (c) 1999-2006 Ted Krovetz
13 *
14 * Permission to use, copy, modify, and distribute this software and
15 * its documentation for any purpose and with or without fee, is hereby
16 * granted provided that the above copyright notice appears in all copies
17 * and in supporting documentation, and that the name of the copyright
18 * holder not be used in advertising or publicity pertaining to
19 * distribution of the software without specific, written prior permission.
20 *
21 * Comments should be directed to Ted Krovetz (tdk@acm.org)
22 *
23 * ---------------------------------------------------------------------- */
24
25 /* ////////////////////// IMPORTANT NOTES /////////////////////////////////
26  *
27  * 1) This version does not work properly on messages larger than 16MB
28  *
29  * 2) If you set the switch to use SSE2, then all data must be 16-byte
30  *    aligned
31  *
32  * 3) When calling the function umac(), it is assumed that msg is in
33  * a writable buffer of length divisible by 32 bytes. The message itself
34  * does not have to fill the entire buffer, but bytes beyond msg may be
35  * zeroed.
36  *
37  * 4) Three free AES implementations are supported by this implementation of
38  * UMAC. Paulo Barreto's version is in the public domain and can be found
39  * at http://www.esat.kuleuven.ac.be/~rijmen/rijndael/ (search for
40  * "Barreto"). The only two files needed are rijndael-alg-fst.c and
41  * rijndael-alg-fst.h. Brian Gladman's version is distributed with the GNU
42  * Public lisence at http://fp.gladman.plus.com/AES/index.htm. It
43  * includes a fast IA-32 assembly version. The OpenSSL crypo library is
44  * the third.
45  *
46  * 5) With FORCE_C_ONLY flags set to 0, incorrect results are sometimes
47  * produced under gcc with optimizations set -O3 or higher. Dunno why.
48  *
49  /////////////////////////////////////////////////////////////////////// */
50
51/* In OpenSSH, this file is compiled twice, with different #defines set on the
52 * command line. Since we don't want to stretch the Android build system, in
53 * Android this file is duplicated as umac.c and umac128.c. The latter contains
54 * the #defines (that were set in OpenSSH's Makefile) at the top of the
55 * file. */
56
57/* ---------------------------------------------------------------------- */
58/* --- User Switches ---------------------------------------------------- */
59/* ---------------------------------------------------------------------- */
60
61#ifndef UMAC_OUTPUT_LEN
62#define UMAC_OUTPUT_LEN     8  /* Alowable: 4, 8, 12, 16                  */
63#endif
64
65#if UMAC_OUTPUT_LEN != 4 && UMAC_OUTPUT_LEN != 8 && \
66    UMAC_OUTPUT_LEN != 12 && UMAC_OUTPUT_LEN != 16
67# error UMAC_OUTPUT_LEN must be defined to 4, 8, 12 or 16
68#endif
69
70/* #define FORCE_C_ONLY        1  ANSI C and 64-bit integers req'd        */
71/* #define AES_IMPLEMENTAION   1  1 = OpenSSL, 2 = Barreto, 3 = Gladman   */
72/* #define SSE2                0  Is SSE2 is available?                   */
73/* #define RUN_TESTS           0  Run basic correctness/speed tests       */
74/* #define UMAC_AE_SUPPORT     0  Enable auhthenticated encrytion         */
75
76/* ---------------------------------------------------------------------- */
77/* -- Global Includes --------------------------------------------------- */
78/* ---------------------------------------------------------------------- */
79
80#include "includes.h"
81#include <sys/types.h>
82#include <string.h>
83#include <stdio.h>
84#include <stdlib.h>
85#include <stddef.h>
86
87#include "xmalloc.h"
88#include "umac.h"
89#include "misc.h"
90
91/* ---------------------------------------------------------------------- */
92/* --- Primitive Data Types ---                                           */
93/* ---------------------------------------------------------------------- */
94
95/* The following assumptions may need change on your system */
96typedef u_int8_t	UINT8;  /* 1 byte   */
97typedef u_int16_t	UINT16; /* 2 byte   */
98typedef u_int32_t	UINT32; /* 4 byte   */
99typedef u_int64_t	UINT64; /* 8 bytes  */
100typedef unsigned int	UWORD;  /* Register */
101
102/* ---------------------------------------------------------------------- */
103/* --- Constants -------------------------------------------------------- */
104/* ---------------------------------------------------------------------- */
105
106#define UMAC_KEY_LEN           16  /* UMAC takes 16 bytes of external key */
107
108/* Message "words" are read from memory in an endian-specific manner.     */
109/* For this implementation to behave correctly, __LITTLE_ENDIAN__ must    */
110/* be set true if the host computer is little-endian.                     */
111
112#if BYTE_ORDER == LITTLE_ENDIAN
113#define __LITTLE_ENDIAN__ 1
114#else
115#define __LITTLE_ENDIAN__ 0
116#endif
117
118/* ---------------------------------------------------------------------- */
119/* ---------------------------------------------------------------------- */
120/* ----- Architecture Specific ------------------------------------------ */
121/* ---------------------------------------------------------------------- */
122/* ---------------------------------------------------------------------- */
123
124
125/* ---------------------------------------------------------------------- */
126/* ---------------------------------------------------------------------- */
127/* ----- Primitive Routines --------------------------------------------- */
128/* ---------------------------------------------------------------------- */
129/* ---------------------------------------------------------------------- */
130
131
132/* ---------------------------------------------------------------------- */
133/* --- 32-bit by 32-bit to 64-bit Multiplication ------------------------ */
134/* ---------------------------------------------------------------------- */
135
136#define MUL64(a,b) ((UINT64)((UINT64)(UINT32)(a) * (UINT64)(UINT32)(b)))
137
138/* ---------------------------------------------------------------------- */
139/* --- Endian Conversion --- Forcing assembly on some platforms           */
140/* ---------------------------------------------------------------------- */
141
142#if (__LITTLE_ENDIAN__)
143#define LOAD_UINT32_REVERSED(p)		get_u32(p)
144#define STORE_UINT32_REVERSED(p,v)	put_u32(p,v)
145#else
146#define LOAD_UINT32_REVERSED(p)		get_u32_le(p)
147#define STORE_UINT32_REVERSED(p,v)	put_u32_le(p,v)
148#endif
149
150#define LOAD_UINT32_LITTLE(p)		(get_u32_le(p))
151#define STORE_UINT32_BIG(p,v)		put_u32(p, v)
152
153/* ---------------------------------------------------------------------- */
154/* ---------------------------------------------------------------------- */
155/* ----- Begin KDF & PDF Section ---------------------------------------- */
156/* ---------------------------------------------------------------------- */
157/* ---------------------------------------------------------------------- */
158
159/* UMAC uses AES with 16 byte block and key lengths */
160#define AES_BLOCK_LEN  16
161
162/* OpenSSL's AES */
163#ifdef WITH_OPENSSL
164#include "openbsd-compat/openssl-compat.h"
165#ifndef USE_BUILTIN_RIJNDAEL
166# include <openssl/aes.h>
167#endif
168typedef AES_KEY aes_int_key[1];
169#define aes_encryption(in,out,int_key)                  \
170  AES_encrypt((u_char *)(in),(u_char *)(out),(AES_KEY *)int_key)
171#define aes_key_setup(key,int_key)                      \
172  AES_set_encrypt_key((const u_char *)(key),UMAC_KEY_LEN*8,int_key)
173#else
174#include "rijndael.h"
175#define AES_ROUNDS ((UMAC_KEY_LEN / 4) + 6)
176typedef UINT8 aes_int_key[AES_ROUNDS+1][4][4];	/* AES internal */
177#define aes_encryption(in,out,int_key) \
178  rijndaelEncrypt((u32 *)(int_key), AES_ROUNDS, (u8 *)(in), (u8 *)(out))
179#define aes_key_setup(key,int_key) \
180  rijndaelKeySetupEnc((u32 *)(int_key), (const unsigned char *)(key), \
181  UMAC_KEY_LEN*8)
182#endif
183
184/* The user-supplied UMAC key is stretched using AES in a counter
185 * mode to supply all random bits needed by UMAC. The kdf function takes
186 * an AES internal key representation 'key' and writes a stream of
187 * 'nbytes' bytes to the memory pointed at by 'bufp'. Each distinct
188 * 'ndx' causes a distinct byte stream.
189 */
190static void kdf(void *bufp, aes_int_key key, UINT8 ndx, int nbytes)
191{
192    UINT8 in_buf[AES_BLOCK_LEN] = {0};
193    UINT8 out_buf[AES_BLOCK_LEN];
194    UINT8 *dst_buf = (UINT8 *)bufp;
195    int i;
196
197    /* Setup the initial value */
198    in_buf[AES_BLOCK_LEN-9] = ndx;
199    in_buf[AES_BLOCK_LEN-1] = i = 1;
200
201    while (nbytes >= AES_BLOCK_LEN) {
202        aes_encryption(in_buf, out_buf, key);
203        memcpy(dst_buf,out_buf,AES_BLOCK_LEN);
204        in_buf[AES_BLOCK_LEN-1] = ++i;
205        nbytes -= AES_BLOCK_LEN;
206        dst_buf += AES_BLOCK_LEN;
207    }
208    if (nbytes) {
209        aes_encryption(in_buf, out_buf, key);
210        memcpy(dst_buf,out_buf,nbytes);
211    }
212}
213
214/* The final UHASH result is XOR'd with the output of a pseudorandom
215 * function. Here, we use AES to generate random output and
216 * xor the appropriate bytes depending on the last bits of nonce.
217 * This scheme is optimized for sequential, increasing big-endian nonces.
218 */
219
220typedef struct {
221    UINT8 cache[AES_BLOCK_LEN];  /* Previous AES output is saved      */
222    UINT8 nonce[AES_BLOCK_LEN];  /* The AES input making above cache  */
223    aes_int_key prf_key;         /* Expanded AES key for PDF          */
224} pdf_ctx;
225
226static void pdf_init(pdf_ctx *pc, aes_int_key prf_key)
227{
228    UINT8 buf[UMAC_KEY_LEN];
229
230    kdf(buf, prf_key, 0, UMAC_KEY_LEN);
231    aes_key_setup(buf, pc->prf_key);
232
233    /* Initialize pdf and cache */
234    memset(pc->nonce, 0, sizeof(pc->nonce));
235    aes_encryption(pc->nonce, pc->cache, pc->prf_key);
236}
237
238static void pdf_gen_xor(pdf_ctx *pc, const UINT8 nonce[8], UINT8 buf[8])
239{
240    /* 'ndx' indicates that we'll be using the 0th or 1st eight bytes
241     * of the AES output. If last time around we returned the ndx-1st
242     * element, then we may have the result in the cache already.
243     */
244
245#if (UMAC_OUTPUT_LEN == 4)
246#define LOW_BIT_MASK 3
247#elif (UMAC_OUTPUT_LEN == 8)
248#define LOW_BIT_MASK 1
249#elif (UMAC_OUTPUT_LEN > 8)
250#define LOW_BIT_MASK 0
251#endif
252    union {
253        UINT8 tmp_nonce_lo[4];
254        UINT32 align;
255    } t;
256#if LOW_BIT_MASK != 0
257    int ndx = nonce[7] & LOW_BIT_MASK;
258#endif
259    *(UINT32 *)t.tmp_nonce_lo = ((const UINT32 *)nonce)[1];
260    t.tmp_nonce_lo[3] &= ~LOW_BIT_MASK; /* zero last bit */
261
262    if ( (((UINT32 *)t.tmp_nonce_lo)[0] != ((UINT32 *)pc->nonce)[1]) ||
263         (((const UINT32 *)nonce)[0] != ((UINT32 *)pc->nonce)[0]) )
264    {
265        ((UINT32 *)pc->nonce)[0] = ((const UINT32 *)nonce)[0];
266        ((UINT32 *)pc->nonce)[1] = ((UINT32 *)t.tmp_nonce_lo)[0];
267        aes_encryption(pc->nonce, pc->cache, pc->prf_key);
268    }
269
270#if (UMAC_OUTPUT_LEN == 4)
271    *((UINT32 *)buf) ^= ((UINT32 *)pc->cache)[ndx];
272#elif (UMAC_OUTPUT_LEN == 8)
273    *((UINT64 *)buf) ^= ((UINT64 *)pc->cache)[ndx];
274#elif (UMAC_OUTPUT_LEN == 12)
275    ((UINT64 *)buf)[0] ^= ((UINT64 *)pc->cache)[0];
276    ((UINT32 *)buf)[2] ^= ((UINT32 *)pc->cache)[2];
277#elif (UMAC_OUTPUT_LEN == 16)
278    ((UINT64 *)buf)[0] ^= ((UINT64 *)pc->cache)[0];
279    ((UINT64 *)buf)[1] ^= ((UINT64 *)pc->cache)[1];
280#endif
281}
282
283/* ---------------------------------------------------------------------- */
284/* ---------------------------------------------------------------------- */
285/* ----- Begin NH Hash Section ------------------------------------------ */
286/* ---------------------------------------------------------------------- */
287/* ---------------------------------------------------------------------- */
288
289/* The NH-based hash functions used in UMAC are described in the UMAC paper
290 * and specification, both of which can be found at the UMAC website.
291 * The interface to this implementation has two
292 * versions, one expects the entire message being hashed to be passed
293 * in a single buffer and returns the hash result immediately. The second
294 * allows the message to be passed in a sequence of buffers. In the
295 * muliple-buffer interface, the client calls the routine nh_update() as
296 * many times as necessary. When there is no more data to be fed to the
297 * hash, the client calls nh_final() which calculates the hash output.
298 * Before beginning another hash calculation the nh_reset() routine
299 * must be called. The single-buffer routine, nh(), is equivalent to
300 * the sequence of calls nh_update() and nh_final(); however it is
301 * optimized and should be prefered whenever the multiple-buffer interface
302 * is not necessary. When using either interface, it is the client's
303 * responsability to pass no more than L1_KEY_LEN bytes per hash result.
304 *
305 * The routine nh_init() initializes the nh_ctx data structure and
306 * must be called once, before any other PDF routine.
307 */
308
309 /* The "nh_aux" routines do the actual NH hashing work. They
310  * expect buffers to be multiples of L1_PAD_BOUNDARY. These routines
311  * produce output for all STREAMS NH iterations in one call,
312  * allowing the parallel implementation of the streams.
313  */
314
315#define STREAMS (UMAC_OUTPUT_LEN / 4) /* Number of times hash is applied  */
316#define L1_KEY_LEN         1024     /* Internal key bytes                 */
317#define L1_KEY_SHIFT         16     /* Toeplitz key shift between streams */
318#define L1_PAD_BOUNDARY      32     /* pad message to boundary multiple   */
319#define ALLOC_BOUNDARY       16     /* Keep buffers aligned to this       */
320#define HASH_BUF_BYTES       64     /* nh_aux_hb buffer multiple          */
321
322typedef struct {
323    UINT8  nh_key [L1_KEY_LEN + L1_KEY_SHIFT * (STREAMS - 1)]; /* NH Key */
324    UINT8  data   [HASH_BUF_BYTES];    /* Incoming data buffer           */
325    int next_data_empty;    /* Bookeeping variable for data buffer.       */
326    int bytes_hashed;        /* Bytes (out of L1_KEY_LEN) incorperated.   */
327    UINT64 state[STREAMS];               /* on-line state     */
328} nh_ctx;
329
330
331#if (UMAC_OUTPUT_LEN == 4)
332
333static void nh_aux(void *kp, const void *dp, void *hp, UINT32 dlen)
334/* NH hashing primitive. Previous (partial) hash result is loaded and
335* then stored via hp pointer. The length of the data pointed at by "dp",
336* "dlen", is guaranteed to be divisible by L1_PAD_BOUNDARY (32).  Key
337* is expected to be endian compensated in memory at key setup.
338*/
339{
340    UINT64 h;
341    UWORD c = dlen / 32;
342    UINT32 *k = (UINT32 *)kp;
343    const UINT32 *d = (const UINT32 *)dp;
344    UINT32 d0,d1,d2,d3,d4,d5,d6,d7;
345    UINT32 k0,k1,k2,k3,k4,k5,k6,k7;
346
347    h = *((UINT64 *)hp);
348    do {
349        d0 = LOAD_UINT32_LITTLE(d+0); d1 = LOAD_UINT32_LITTLE(d+1);
350        d2 = LOAD_UINT32_LITTLE(d+2); d3 = LOAD_UINT32_LITTLE(d+3);
351        d4 = LOAD_UINT32_LITTLE(d+4); d5 = LOAD_UINT32_LITTLE(d+5);
352        d6 = LOAD_UINT32_LITTLE(d+6); d7 = LOAD_UINT32_LITTLE(d+7);
353        k0 = *(k+0); k1 = *(k+1); k2 = *(k+2); k3 = *(k+3);
354        k4 = *(k+4); k5 = *(k+5); k6 = *(k+6); k7 = *(k+7);
355        h += MUL64((k0 + d0), (k4 + d4));
356        h += MUL64((k1 + d1), (k5 + d5));
357        h += MUL64((k2 + d2), (k6 + d6));
358        h += MUL64((k3 + d3), (k7 + d7));
359
360        d += 8;
361        k += 8;
362    } while (--c);
363  *((UINT64 *)hp) = h;
364}
365
366#elif (UMAC_OUTPUT_LEN == 8)
367
368static void nh_aux(void *kp, const void *dp, void *hp, UINT32 dlen)
369/* Same as previous nh_aux, but two streams are handled in one pass,
370 * reading and writing 16 bytes of hash-state per call.
371 */
372{
373  UINT64 h1,h2;
374  UWORD c = dlen / 32;
375  UINT32 *k = (UINT32 *)kp;
376  const UINT32 *d = (const UINT32 *)dp;
377  UINT32 d0,d1,d2,d3,d4,d5,d6,d7;
378  UINT32 k0,k1,k2,k3,k4,k5,k6,k7,
379        k8,k9,k10,k11;
380
381  h1 = *((UINT64 *)hp);
382  h2 = *((UINT64 *)hp + 1);
383  k0 = *(k+0); k1 = *(k+1); k2 = *(k+2); k3 = *(k+3);
384  do {
385    d0 = LOAD_UINT32_LITTLE(d+0); d1 = LOAD_UINT32_LITTLE(d+1);
386    d2 = LOAD_UINT32_LITTLE(d+2); d3 = LOAD_UINT32_LITTLE(d+3);
387    d4 = LOAD_UINT32_LITTLE(d+4); d5 = LOAD_UINT32_LITTLE(d+5);
388    d6 = LOAD_UINT32_LITTLE(d+6); d7 = LOAD_UINT32_LITTLE(d+7);
389    k4 = *(k+4); k5 = *(k+5); k6 = *(k+6); k7 = *(k+7);
390    k8 = *(k+8); k9 = *(k+9); k10 = *(k+10); k11 = *(k+11);
391
392    h1 += MUL64((k0 + d0), (k4 + d4));
393    h2 += MUL64((k4 + d0), (k8 + d4));
394
395    h1 += MUL64((k1 + d1), (k5 + d5));
396    h2 += MUL64((k5 + d1), (k9 + d5));
397
398    h1 += MUL64((k2 + d2), (k6 + d6));
399    h2 += MUL64((k6 + d2), (k10 + d6));
400
401    h1 += MUL64((k3 + d3), (k7 + d7));
402    h2 += MUL64((k7 + d3), (k11 + d7));
403
404    k0 = k8; k1 = k9; k2 = k10; k3 = k11;
405
406    d += 8;
407    k += 8;
408  } while (--c);
409  ((UINT64 *)hp)[0] = h1;
410  ((UINT64 *)hp)[1] = h2;
411}
412
413#elif (UMAC_OUTPUT_LEN == 12)
414
415static void nh_aux(void *kp, const void *dp, void *hp, UINT32 dlen)
416/* Same as previous nh_aux, but two streams are handled in one pass,
417 * reading and writing 24 bytes of hash-state per call.
418*/
419{
420    UINT64 h1,h2,h3;
421    UWORD c = dlen / 32;
422    UINT32 *k = (UINT32 *)kp;
423    const UINT32 *d = (const UINT32 *)dp;
424    UINT32 d0,d1,d2,d3,d4,d5,d6,d7;
425    UINT32 k0,k1,k2,k3,k4,k5,k6,k7,
426        k8,k9,k10,k11,k12,k13,k14,k15;
427
428    h1 = *((UINT64 *)hp);
429    h2 = *((UINT64 *)hp + 1);
430    h3 = *((UINT64 *)hp + 2);
431    k0 = *(k+0); k1 = *(k+1); k2 = *(k+2); k3 = *(k+3);
432    k4 = *(k+4); k5 = *(k+5); k6 = *(k+6); k7 = *(k+7);
433    do {
434        d0 = LOAD_UINT32_LITTLE(d+0); d1 = LOAD_UINT32_LITTLE(d+1);
435        d2 = LOAD_UINT32_LITTLE(d+2); d3 = LOAD_UINT32_LITTLE(d+3);
436        d4 = LOAD_UINT32_LITTLE(d+4); d5 = LOAD_UINT32_LITTLE(d+5);
437        d6 = LOAD_UINT32_LITTLE(d+6); d7 = LOAD_UINT32_LITTLE(d+7);
438        k8 = *(k+8); k9 = *(k+9); k10 = *(k+10); k11 = *(k+11);
439        k12 = *(k+12); k13 = *(k+13); k14 = *(k+14); k15 = *(k+15);
440
441        h1 += MUL64((k0 + d0), (k4 + d4));
442        h2 += MUL64((k4 + d0), (k8 + d4));
443        h3 += MUL64((k8 + d0), (k12 + d4));
444
445        h1 += MUL64((k1 + d1), (k5 + d5));
446        h2 += MUL64((k5 + d1), (k9 + d5));
447        h3 += MUL64((k9 + d1), (k13 + d5));
448
449        h1 += MUL64((k2 + d2), (k6 + d6));
450        h2 += MUL64((k6 + d2), (k10 + d6));
451        h3 += MUL64((k10 + d2), (k14 + d6));
452
453        h1 += MUL64((k3 + d3), (k7 + d7));
454        h2 += MUL64((k7 + d3), (k11 + d7));
455        h3 += MUL64((k11 + d3), (k15 + d7));
456
457        k0 = k8; k1 = k9; k2 = k10; k3 = k11;
458        k4 = k12; k5 = k13; k6 = k14; k7 = k15;
459
460        d += 8;
461        k += 8;
462    } while (--c);
463    ((UINT64 *)hp)[0] = h1;
464    ((UINT64 *)hp)[1] = h2;
465    ((UINT64 *)hp)[2] = h3;
466}
467
468#elif (UMAC_OUTPUT_LEN == 16)
469
470static void nh_aux(void *kp, const void *dp, void *hp, UINT32 dlen)
471/* Same as previous nh_aux, but two streams are handled in one pass,
472 * reading and writing 24 bytes of hash-state per call.
473*/
474{
475    UINT64 h1,h2,h3,h4;
476    UWORD c = dlen / 32;
477    UINT32 *k = (UINT32 *)kp;
478    const UINT32 *d = (const UINT32 *)dp;
479    UINT32 d0,d1,d2,d3,d4,d5,d6,d7;
480    UINT32 k0,k1,k2,k3,k4,k5,k6,k7,
481        k8,k9,k10,k11,k12,k13,k14,k15,
482        k16,k17,k18,k19;
483
484    h1 = *((UINT64 *)hp);
485    h2 = *((UINT64 *)hp + 1);
486    h3 = *((UINT64 *)hp + 2);
487    h4 = *((UINT64 *)hp + 3);
488    k0 = *(k+0); k1 = *(k+1); k2 = *(k+2); k3 = *(k+3);
489    k4 = *(k+4); k5 = *(k+5); k6 = *(k+6); k7 = *(k+7);
490    do {
491        d0 = LOAD_UINT32_LITTLE(d+0); d1 = LOAD_UINT32_LITTLE(d+1);
492        d2 = LOAD_UINT32_LITTLE(d+2); d3 = LOAD_UINT32_LITTLE(d+3);
493        d4 = LOAD_UINT32_LITTLE(d+4); d5 = LOAD_UINT32_LITTLE(d+5);
494        d6 = LOAD_UINT32_LITTLE(d+6); d7 = LOAD_UINT32_LITTLE(d+7);
495        k8 = *(k+8); k9 = *(k+9); k10 = *(k+10); k11 = *(k+11);
496        k12 = *(k+12); k13 = *(k+13); k14 = *(k+14); k15 = *(k+15);
497        k16 = *(k+16); k17 = *(k+17); k18 = *(k+18); k19 = *(k+19);
498
499        h1 += MUL64((k0 + d0), (k4 + d4));
500        h2 += MUL64((k4 + d0), (k8 + d4));
501        h3 += MUL64((k8 + d0), (k12 + d4));
502        h4 += MUL64((k12 + d0), (k16 + d4));
503
504        h1 += MUL64((k1 + d1), (k5 + d5));
505        h2 += MUL64((k5 + d1), (k9 + d5));
506        h3 += MUL64((k9 + d1), (k13 + d5));
507        h4 += MUL64((k13 + d1), (k17 + d5));
508
509        h1 += MUL64((k2 + d2), (k6 + d6));
510        h2 += MUL64((k6 + d2), (k10 + d6));
511        h3 += MUL64((k10 + d2), (k14 + d6));
512        h4 += MUL64((k14 + d2), (k18 + d6));
513
514        h1 += MUL64((k3 + d3), (k7 + d7));
515        h2 += MUL64((k7 + d3), (k11 + d7));
516        h3 += MUL64((k11 + d3), (k15 + d7));
517        h4 += MUL64((k15 + d3), (k19 + d7));
518
519        k0 = k8; k1 = k9; k2 = k10; k3 = k11;
520        k4 = k12; k5 = k13; k6 = k14; k7 = k15;
521        k8 = k16; k9 = k17; k10 = k18; k11 = k19;
522
523        d += 8;
524        k += 8;
525    } while (--c);
526    ((UINT64 *)hp)[0] = h1;
527    ((UINT64 *)hp)[1] = h2;
528    ((UINT64 *)hp)[2] = h3;
529    ((UINT64 *)hp)[3] = h4;
530}
531
532/* ---------------------------------------------------------------------- */
533#endif  /* UMAC_OUTPUT_LENGTH */
534/* ---------------------------------------------------------------------- */
535
536
537/* ---------------------------------------------------------------------- */
538
539static void nh_transform(nh_ctx *hc, const UINT8 *buf, UINT32 nbytes)
540/* This function is a wrapper for the primitive NH hash functions. It takes
541 * as argument "hc" the current hash context and a buffer which must be a
542 * multiple of L1_PAD_BOUNDARY. The key passed to nh_aux is offset
543 * appropriately according to how much message has been hashed already.
544 */
545{
546    UINT8 *key;
547
548    key = hc->nh_key + hc->bytes_hashed;
549    nh_aux(key, buf, hc->state, nbytes);
550}
551
552/* ---------------------------------------------------------------------- */
553
554#if (__LITTLE_ENDIAN__)
555static void endian_convert(void *buf, UWORD bpw, UINT32 num_bytes)
556/* We endian convert the keys on little-endian computers to               */
557/* compensate for the lack of big-endian memory reads during hashing.     */
558{
559    UWORD iters = num_bytes / bpw;
560    if (bpw == 4) {
561        UINT32 *p = (UINT32 *)buf;
562        do {
563            *p = LOAD_UINT32_REVERSED(p);
564            p++;
565        } while (--iters);
566    } else if (bpw == 8) {
567        UINT32 *p = (UINT32 *)buf;
568        UINT32 t;
569        do {
570            t = LOAD_UINT32_REVERSED(p+1);
571            p[1] = LOAD_UINT32_REVERSED(p);
572            p[0] = t;
573            p += 2;
574        } while (--iters);
575    }
576}
577#define endian_convert_if_le(x,y,z) endian_convert((x),(y),(z))
578#else
579#define endian_convert_if_le(x,y,z) do{}while(0)  /* Do nothing */
580#endif
581
582/* ---------------------------------------------------------------------- */
583
584static void nh_reset(nh_ctx *hc)
585/* Reset nh_ctx to ready for hashing of new data */
586{
587    hc->bytes_hashed = 0;
588    hc->next_data_empty = 0;
589    hc->state[0] = 0;
590#if (UMAC_OUTPUT_LEN >= 8)
591    hc->state[1] = 0;
592#endif
593#if (UMAC_OUTPUT_LEN >= 12)
594    hc->state[2] = 0;
595#endif
596#if (UMAC_OUTPUT_LEN == 16)
597    hc->state[3] = 0;
598#endif
599
600}
601
602/* ---------------------------------------------------------------------- */
603
604static void nh_init(nh_ctx *hc, aes_int_key prf_key)
605/* Generate nh_key, endian convert and reset to be ready for hashing.   */
606{
607    kdf(hc->nh_key, prf_key, 1, sizeof(hc->nh_key));
608    endian_convert_if_le(hc->nh_key, 4, sizeof(hc->nh_key));
609    nh_reset(hc);
610}
611
612/* ---------------------------------------------------------------------- */
613
614static void nh_update(nh_ctx *hc, const UINT8 *buf, UINT32 nbytes)
615/* Incorporate nbytes of data into a nh_ctx, buffer whatever is not an    */
616/* even multiple of HASH_BUF_BYTES.                                       */
617{
618    UINT32 i,j;
619
620    j = hc->next_data_empty;
621    if ((j + nbytes) >= HASH_BUF_BYTES) {
622        if (j) {
623            i = HASH_BUF_BYTES - j;
624            memcpy(hc->data+j, buf, i);
625            nh_transform(hc,hc->data,HASH_BUF_BYTES);
626            nbytes -= i;
627            buf += i;
628            hc->bytes_hashed += HASH_BUF_BYTES;
629        }
630        if (nbytes >= HASH_BUF_BYTES) {
631            i = nbytes & ~(HASH_BUF_BYTES - 1);
632            nh_transform(hc, buf, i);
633            nbytes -= i;
634            buf += i;
635            hc->bytes_hashed += i;
636        }
637        j = 0;
638    }
639    memcpy(hc->data + j, buf, nbytes);
640    hc->next_data_empty = j + nbytes;
641}
642
643/* ---------------------------------------------------------------------- */
644
645static void zero_pad(UINT8 *p, int nbytes)
646{
647/* Write "nbytes" of zeroes, beginning at "p" */
648    if (nbytes >= (int)sizeof(UWORD)) {
649        while ((ptrdiff_t)p % sizeof(UWORD)) {
650            *p = 0;
651            nbytes--;
652            p++;
653        }
654        while (nbytes >= (int)sizeof(UWORD)) {
655            *(UWORD *)p = 0;
656            nbytes -= sizeof(UWORD);
657            p += sizeof(UWORD);
658        }
659    }
660    while (nbytes) {
661        *p = 0;
662        nbytes--;
663        p++;
664    }
665}
666
667/* ---------------------------------------------------------------------- */
668
669static void nh_final(nh_ctx *hc, UINT8 *result)
670/* After passing some number of data buffers to nh_update() for integration
671 * into an NH context, nh_final is called to produce a hash result. If any
672 * bytes are in the buffer hc->data, incorporate them into the
673 * NH context. Finally, add into the NH accumulation "state" the total number
674 * of bits hashed. The resulting numbers are written to the buffer "result".
675 * If nh_update was never called, L1_PAD_BOUNDARY zeroes are incorporated.
676 */
677{
678    int nh_len, nbits;
679
680    if (hc->next_data_empty != 0) {
681        nh_len = ((hc->next_data_empty + (L1_PAD_BOUNDARY - 1)) &
682                                                ~(L1_PAD_BOUNDARY - 1));
683        zero_pad(hc->data + hc->next_data_empty,
684                                          nh_len - hc->next_data_empty);
685        nh_transform(hc, hc->data, nh_len);
686        hc->bytes_hashed += hc->next_data_empty;
687    } else if (hc->bytes_hashed == 0) {
688    	nh_len = L1_PAD_BOUNDARY;
689        zero_pad(hc->data, L1_PAD_BOUNDARY);
690        nh_transform(hc, hc->data, nh_len);
691    }
692
693    nbits = (hc->bytes_hashed << 3);
694    ((UINT64 *)result)[0] = ((UINT64 *)hc->state)[0] + nbits;
695#if (UMAC_OUTPUT_LEN >= 8)
696    ((UINT64 *)result)[1] = ((UINT64 *)hc->state)[1] + nbits;
697#endif
698#if (UMAC_OUTPUT_LEN >= 12)
699    ((UINT64 *)result)[2] = ((UINT64 *)hc->state)[2] + nbits;
700#endif
701#if (UMAC_OUTPUT_LEN == 16)
702    ((UINT64 *)result)[3] = ((UINT64 *)hc->state)[3] + nbits;
703#endif
704    nh_reset(hc);
705}
706
707/* ---------------------------------------------------------------------- */
708
709static void nh(nh_ctx *hc, const UINT8 *buf, UINT32 padded_len,
710               UINT32 unpadded_len, UINT8 *result)
711/* All-in-one nh_update() and nh_final() equivalent.
712 * Assumes that padded_len is divisible by L1_PAD_BOUNDARY and result is
713 * well aligned
714 */
715{
716    UINT32 nbits;
717
718    /* Initialize the hash state */
719    nbits = (unpadded_len << 3);
720
721    ((UINT64 *)result)[0] = nbits;
722#if (UMAC_OUTPUT_LEN >= 8)
723    ((UINT64 *)result)[1] = nbits;
724#endif
725#if (UMAC_OUTPUT_LEN >= 12)
726    ((UINT64 *)result)[2] = nbits;
727#endif
728#if (UMAC_OUTPUT_LEN == 16)
729    ((UINT64 *)result)[3] = nbits;
730#endif
731
732    nh_aux(hc->nh_key, buf, result, padded_len);
733}
734
735/* ---------------------------------------------------------------------- */
736/* ---------------------------------------------------------------------- */
737/* ----- Begin UHASH Section -------------------------------------------- */
738/* ---------------------------------------------------------------------- */
739/* ---------------------------------------------------------------------- */
740
741/* UHASH is a multi-layered algorithm. Data presented to UHASH is first
742 * hashed by NH. The NH output is then hashed by a polynomial-hash layer
743 * unless the initial data to be hashed is short. After the polynomial-
744 * layer, an inner-product hash is used to produce the final UHASH output.
745 *
746 * UHASH provides two interfaces, one all-at-once and another where data
747 * buffers are presented sequentially. In the sequential interface, the
748 * UHASH client calls the routine uhash_update() as many times as necessary.
749 * When there is no more data to be fed to UHASH, the client calls
750 * uhash_final() which
751 * calculates the UHASH output. Before beginning another UHASH calculation
752 * the uhash_reset() routine must be called. The all-at-once UHASH routine,
753 * uhash(), is equivalent to the sequence of calls uhash_update() and
754 * uhash_final(); however it is optimized and should be
755 * used whenever the sequential interface is not necessary.
756 *
757 * The routine uhash_init() initializes the uhash_ctx data structure and
758 * must be called once, before any other UHASH routine.
759 */
760
761/* ---------------------------------------------------------------------- */
762/* ----- Constants and uhash_ctx ---------------------------------------- */
763/* ---------------------------------------------------------------------- */
764
765/* ---------------------------------------------------------------------- */
766/* ----- Poly hash and Inner-Product hash Constants --------------------- */
767/* ---------------------------------------------------------------------- */
768
769/* Primes and masks */
770#define p36    ((UINT64)0x0000000FFFFFFFFBull)              /* 2^36 -  5 */
771#define p64    ((UINT64)0xFFFFFFFFFFFFFFC5ull)              /* 2^64 - 59 */
772#define m36    ((UINT64)0x0000000FFFFFFFFFull)  /* The low 36 of 64 bits */
773
774
775/* ---------------------------------------------------------------------- */
776
777typedef struct uhash_ctx {
778    nh_ctx hash;                          /* Hash context for L1 NH hash  */
779    UINT64 poly_key_8[STREAMS];           /* p64 poly keys                */
780    UINT64 poly_accum[STREAMS];           /* poly hash result             */
781    UINT64 ip_keys[STREAMS*4];            /* Inner-product keys           */
782    UINT32 ip_trans[STREAMS];             /* Inner-product translation    */
783    UINT32 msg_len;                       /* Total length of data passed  */
784                                          /* to uhash */
785} uhash_ctx;
786typedef struct uhash_ctx *uhash_ctx_t;
787
788/* ---------------------------------------------------------------------- */
789
790
791/* The polynomial hashes use Horner's rule to evaluate a polynomial one
792 * word at a time. As described in the specification, poly32 and poly64
793 * require keys from special domains. The following implementations exploit
794 * the special domains to avoid overflow. The results are not guaranteed to
795 * be within Z_p32 and Z_p64, but the Inner-Product hash implementation
796 * patches any errant values.
797 */
798
799static UINT64 poly64(UINT64 cur, UINT64 key, UINT64 data)
800{
801    UINT32 key_hi = (UINT32)(key >> 32),
802           key_lo = (UINT32)key,
803           cur_hi = (UINT32)(cur >> 32),
804           cur_lo = (UINT32)cur,
805           x_lo,
806           x_hi;
807    UINT64 X,T,res;
808
809    X =  MUL64(key_hi, cur_lo) + MUL64(cur_hi, key_lo);
810    x_lo = (UINT32)X;
811    x_hi = (UINT32)(X >> 32);
812
813    res = (MUL64(key_hi, cur_hi) + x_hi) * 59 + MUL64(key_lo, cur_lo);
814
815    T = ((UINT64)x_lo << 32);
816    res += T;
817    if (res < T)
818        res += 59;
819
820    res += data;
821    if (res < data)
822        res += 59;
823
824    return res;
825}
826
827
828/* Although UMAC is specified to use a ramped polynomial hash scheme, this
829 * implementation does not handle all ramp levels. Because we don't handle
830 * the ramp up to p128 modulus in this implementation, we are limited to
831 * 2^14 poly_hash() invocations per stream (for a total capacity of 2^24
832 * bytes input to UMAC per tag, ie. 16MB).
833 */
834static void poly_hash(uhash_ctx_t hc, UINT32 data_in[])
835{
836    int i;
837    UINT64 *data=(UINT64*)data_in;
838
839    for (i = 0; i < STREAMS; i++) {
840        if ((UINT32)(data[i] >> 32) == 0xfffffffful) {
841            hc->poly_accum[i] = poly64(hc->poly_accum[i],
842                                       hc->poly_key_8[i], p64 - 1);
843            hc->poly_accum[i] = poly64(hc->poly_accum[i],
844                                       hc->poly_key_8[i], (data[i] - 59));
845        } else {
846            hc->poly_accum[i] = poly64(hc->poly_accum[i],
847                                       hc->poly_key_8[i], data[i]);
848        }
849    }
850}
851
852
853/* ---------------------------------------------------------------------- */
854
855
856/* The final step in UHASH is an inner-product hash. The poly hash
857 * produces a result not neccesarily WORD_LEN bytes long. The inner-
858 * product hash breaks the polyhash output into 16-bit chunks and
859 * multiplies each with a 36 bit key.
860 */
861
862static UINT64 ip_aux(UINT64 t, UINT64 *ipkp, UINT64 data)
863{
864    t = t + ipkp[0] * (UINT64)(UINT16)(data >> 48);
865    t = t + ipkp[1] * (UINT64)(UINT16)(data >> 32);
866    t = t + ipkp[2] * (UINT64)(UINT16)(data >> 16);
867    t = t + ipkp[3] * (UINT64)(UINT16)(data);
868
869    return t;
870}
871
872static UINT32 ip_reduce_p36(UINT64 t)
873{
874/* Divisionless modular reduction */
875    UINT64 ret;
876
877    ret = (t & m36) + 5 * (t >> 36);
878    if (ret >= p36)
879        ret -= p36;
880
881    /* return least significant 32 bits */
882    return (UINT32)(ret);
883}
884
885
886/* If the data being hashed by UHASH is no longer than L1_KEY_LEN, then
887 * the polyhash stage is skipped and ip_short is applied directly to the
888 * NH output.
889 */
890static void ip_short(uhash_ctx_t ahc, UINT8 *nh_res, u_char *res)
891{
892    UINT64 t;
893    UINT64 *nhp = (UINT64 *)nh_res;
894
895    t  = ip_aux(0,ahc->ip_keys, nhp[0]);
896    STORE_UINT32_BIG((UINT32 *)res+0, ip_reduce_p36(t) ^ ahc->ip_trans[0]);
897#if (UMAC_OUTPUT_LEN >= 8)
898    t  = ip_aux(0,ahc->ip_keys+4, nhp[1]);
899    STORE_UINT32_BIG((UINT32 *)res+1, ip_reduce_p36(t) ^ ahc->ip_trans[1]);
900#endif
901#if (UMAC_OUTPUT_LEN >= 12)
902    t  = ip_aux(0,ahc->ip_keys+8, nhp[2]);
903    STORE_UINT32_BIG((UINT32 *)res+2, ip_reduce_p36(t) ^ ahc->ip_trans[2]);
904#endif
905#if (UMAC_OUTPUT_LEN == 16)
906    t  = ip_aux(0,ahc->ip_keys+12, nhp[3]);
907    STORE_UINT32_BIG((UINT32 *)res+3, ip_reduce_p36(t) ^ ahc->ip_trans[3]);
908#endif
909}
910
911/* If the data being hashed by UHASH is longer than L1_KEY_LEN, then
912 * the polyhash stage is not skipped and ip_long is applied to the
913 * polyhash output.
914 */
915static void ip_long(uhash_ctx_t ahc, u_char *res)
916{
917    int i;
918    UINT64 t;
919
920    for (i = 0; i < STREAMS; i++) {
921        /* fix polyhash output not in Z_p64 */
922        if (ahc->poly_accum[i] >= p64)
923            ahc->poly_accum[i] -= p64;
924        t  = ip_aux(0,ahc->ip_keys+(i*4), ahc->poly_accum[i]);
925        STORE_UINT32_BIG((UINT32 *)res+i,
926                         ip_reduce_p36(t) ^ ahc->ip_trans[i]);
927    }
928}
929
930
931/* ---------------------------------------------------------------------- */
932
933/* ---------------------------------------------------------------------- */
934
935/* Reset uhash context for next hash session */
936static int uhash_reset(uhash_ctx_t pc)
937{
938    nh_reset(&pc->hash);
939    pc->msg_len = 0;
940    pc->poly_accum[0] = 1;
941#if (UMAC_OUTPUT_LEN >= 8)
942    pc->poly_accum[1] = 1;
943#endif
944#if (UMAC_OUTPUT_LEN >= 12)
945    pc->poly_accum[2] = 1;
946#endif
947#if (UMAC_OUTPUT_LEN == 16)
948    pc->poly_accum[3] = 1;
949#endif
950    return 1;
951}
952
953/* ---------------------------------------------------------------------- */
954
955/* Given a pointer to the internal key needed by kdf() and a uhash context,
956 * initialize the NH context and generate keys needed for poly and inner-
957 * product hashing. All keys are endian adjusted in memory so that native
958 * loads cause correct keys to be in registers during calculation.
959 */
960static void uhash_init(uhash_ctx_t ahc, aes_int_key prf_key)
961{
962    int i;
963    UINT8 buf[(8*STREAMS+4)*sizeof(UINT64)];
964
965    /* Zero the entire uhash context */
966    memset(ahc, 0, sizeof(uhash_ctx));
967
968    /* Initialize the L1 hash */
969    nh_init(&ahc->hash, prf_key);
970
971    /* Setup L2 hash variables */
972    kdf(buf, prf_key, 2, sizeof(buf));    /* Fill buffer with index 1 key */
973    for (i = 0; i < STREAMS; i++) {
974        /* Fill keys from the buffer, skipping bytes in the buffer not
975         * used by this implementation. Endian reverse the keys if on a
976         * little-endian computer.
977         */
978        memcpy(ahc->poly_key_8+i, buf+24*i, 8);
979        endian_convert_if_le(ahc->poly_key_8+i, 8, 8);
980        /* Mask the 64-bit keys to their special domain */
981        ahc->poly_key_8[i] &= ((UINT64)0x01ffffffu << 32) + 0x01ffffffu;
982        ahc->poly_accum[i] = 1;  /* Our polyhash prepends a non-zero word */
983    }
984
985    /* Setup L3-1 hash variables */
986    kdf(buf, prf_key, 3, sizeof(buf)); /* Fill buffer with index 2 key */
987    for (i = 0; i < STREAMS; i++)
988          memcpy(ahc->ip_keys+4*i, buf+(8*i+4)*sizeof(UINT64),
989                                                 4*sizeof(UINT64));
990    endian_convert_if_le(ahc->ip_keys, sizeof(UINT64),
991                                                  sizeof(ahc->ip_keys));
992    for (i = 0; i < STREAMS*4; i++)
993        ahc->ip_keys[i] %= p36;  /* Bring into Z_p36 */
994
995    /* Setup L3-2 hash variables    */
996    /* Fill buffer with index 4 key */
997    kdf(ahc->ip_trans, prf_key, 4, STREAMS * sizeof(UINT32));
998    endian_convert_if_le(ahc->ip_trans, sizeof(UINT32),
999                         STREAMS * sizeof(UINT32));
1000}
1001
1002/* ---------------------------------------------------------------------- */
1003
1004#if 0
1005static uhash_ctx_t uhash_alloc(u_char key[])
1006{
1007/* Allocate memory and force to a 16-byte boundary. */
1008    uhash_ctx_t ctx;
1009    u_char bytes_to_add;
1010    aes_int_key prf_key;
1011
1012    ctx = (uhash_ctx_t)malloc(sizeof(uhash_ctx)+ALLOC_BOUNDARY);
1013    if (ctx) {
1014        if (ALLOC_BOUNDARY) {
1015            bytes_to_add = ALLOC_BOUNDARY -
1016                              ((ptrdiff_t)ctx & (ALLOC_BOUNDARY -1));
1017            ctx = (uhash_ctx_t)((u_char *)ctx + bytes_to_add);
1018            *((u_char *)ctx - 1) = bytes_to_add;
1019        }
1020        aes_key_setup(key,prf_key);
1021        uhash_init(ctx, prf_key);
1022    }
1023    return (ctx);
1024}
1025#endif
1026
1027/* ---------------------------------------------------------------------- */
1028
1029#if 0
1030static int uhash_free(uhash_ctx_t ctx)
1031{
1032/* Free memory allocated by uhash_alloc */
1033    u_char bytes_to_sub;
1034
1035    if (ctx) {
1036        if (ALLOC_BOUNDARY) {
1037            bytes_to_sub = *((u_char *)ctx - 1);
1038            ctx = (uhash_ctx_t)((u_char *)ctx - bytes_to_sub);
1039        }
1040        free(ctx);
1041    }
1042    return (1);
1043}
1044#endif
1045/* ---------------------------------------------------------------------- */
1046
1047static int uhash_update(uhash_ctx_t ctx, const u_char *input, long len)
1048/* Given len bytes of data, we parse it into L1_KEY_LEN chunks and
1049 * hash each one with NH, calling the polyhash on each NH output.
1050 */
1051{
1052    UWORD bytes_hashed, bytes_remaining;
1053    UINT64 result_buf[STREAMS];
1054    UINT8 *nh_result = (UINT8 *)&result_buf;
1055
1056    if (ctx->msg_len + len <= L1_KEY_LEN) {
1057        nh_update(&ctx->hash, (const UINT8 *)input, len);
1058        ctx->msg_len += len;
1059    } else {
1060
1061         bytes_hashed = ctx->msg_len % L1_KEY_LEN;
1062         if (ctx->msg_len == L1_KEY_LEN)
1063             bytes_hashed = L1_KEY_LEN;
1064
1065         if (bytes_hashed + len >= L1_KEY_LEN) {
1066
1067             /* If some bytes have been passed to the hash function      */
1068             /* then we want to pass at most (L1_KEY_LEN - bytes_hashed) */
1069             /* bytes to complete the current nh_block.                  */
1070             if (bytes_hashed) {
1071                 bytes_remaining = (L1_KEY_LEN - bytes_hashed);
1072                 nh_update(&ctx->hash, (const UINT8 *)input, bytes_remaining);
1073                 nh_final(&ctx->hash, nh_result);
1074                 ctx->msg_len += bytes_remaining;
1075                 poly_hash(ctx,(UINT32 *)nh_result);
1076                 len -= bytes_remaining;
1077                 input += bytes_remaining;
1078             }
1079
1080             /* Hash directly from input stream if enough bytes */
1081             while (len >= L1_KEY_LEN) {
1082                 nh(&ctx->hash, (const UINT8 *)input, L1_KEY_LEN,
1083                                   L1_KEY_LEN, nh_result);
1084                 ctx->msg_len += L1_KEY_LEN;
1085                 len -= L1_KEY_LEN;
1086                 input += L1_KEY_LEN;
1087                 poly_hash(ctx,(UINT32 *)nh_result);
1088             }
1089         }
1090
1091         /* pass remaining < L1_KEY_LEN bytes of input data to NH */
1092         if (len) {
1093             nh_update(&ctx->hash, (const UINT8 *)input, len);
1094             ctx->msg_len += len;
1095         }
1096     }
1097
1098    return (1);
1099}
1100
1101/* ---------------------------------------------------------------------- */
1102
1103static int uhash_final(uhash_ctx_t ctx, u_char *res)
1104/* Incorporate any pending data, pad, and generate tag */
1105{
1106    UINT64 result_buf[STREAMS];
1107    UINT8 *nh_result = (UINT8 *)&result_buf;
1108
1109    if (ctx->msg_len > L1_KEY_LEN) {
1110        if (ctx->msg_len % L1_KEY_LEN) {
1111            nh_final(&ctx->hash, nh_result);
1112            poly_hash(ctx,(UINT32 *)nh_result);
1113        }
1114        ip_long(ctx, res);
1115    } else {
1116        nh_final(&ctx->hash, nh_result);
1117        ip_short(ctx,nh_result, res);
1118    }
1119    uhash_reset(ctx);
1120    return (1);
1121}
1122
1123/* ---------------------------------------------------------------------- */
1124
1125#if 0
1126static int uhash(uhash_ctx_t ahc, u_char *msg, long len, u_char *res)
1127/* assumes that msg is in a writable buffer of length divisible by */
1128/* L1_PAD_BOUNDARY. Bytes beyond msg[len] may be zeroed.           */
1129{
1130    UINT8 nh_result[STREAMS*sizeof(UINT64)];
1131    UINT32 nh_len;
1132    int extra_zeroes_needed;
1133
1134    /* If the message to be hashed is no longer than L1_HASH_LEN, we skip
1135     * the polyhash.
1136     */
1137    if (len <= L1_KEY_LEN) {
1138    	if (len == 0)                  /* If zero length messages will not */
1139    		nh_len = L1_PAD_BOUNDARY;  /* be seen, comment out this case   */
1140    	else
1141        	nh_len = ((len + (L1_PAD_BOUNDARY - 1)) & ~(L1_PAD_BOUNDARY - 1));
1142        extra_zeroes_needed = nh_len - len;
1143        zero_pad((UINT8 *)msg + len, extra_zeroes_needed);
1144        nh(&ahc->hash, (UINT8 *)msg, nh_len, len, nh_result);
1145        ip_short(ahc,nh_result, res);
1146    } else {
1147        /* Otherwise, we hash each L1_KEY_LEN chunk with NH, passing the NH
1148         * output to poly_hash().
1149         */
1150        do {
1151            nh(&ahc->hash, (UINT8 *)msg, L1_KEY_LEN, L1_KEY_LEN, nh_result);
1152            poly_hash(ahc,(UINT32 *)nh_result);
1153            len -= L1_KEY_LEN;
1154            msg += L1_KEY_LEN;
1155        } while (len >= L1_KEY_LEN);
1156        if (len) {
1157            nh_len = ((len + (L1_PAD_BOUNDARY - 1)) & ~(L1_PAD_BOUNDARY - 1));
1158            extra_zeroes_needed = nh_len - len;
1159            zero_pad((UINT8 *)msg + len, extra_zeroes_needed);
1160            nh(&ahc->hash, (UINT8 *)msg, nh_len, len, nh_result);
1161            poly_hash(ahc,(UINT32 *)nh_result);
1162        }
1163
1164        ip_long(ahc, res);
1165    }
1166
1167    uhash_reset(ahc);
1168    return 1;
1169}
1170#endif
1171
1172/* ---------------------------------------------------------------------- */
1173/* ---------------------------------------------------------------------- */
1174/* ----- Begin UMAC Section --------------------------------------------- */
1175/* ---------------------------------------------------------------------- */
1176/* ---------------------------------------------------------------------- */
1177
1178/* The UMAC interface has two interfaces, an all-at-once interface where
1179 * the entire message to be authenticated is passed to UMAC in one buffer,
1180 * and a sequential interface where the message is presented a little at a
1181 * time. The all-at-once is more optimaized than the sequential version and
1182 * should be preferred when the sequential interface is not required.
1183 */
1184struct umac_ctx {
1185    uhash_ctx hash;          /* Hash function for message compression    */
1186    pdf_ctx pdf;             /* PDF for hashed output                    */
1187    void *free_ptr;          /* Address to free this struct via          */
1188} umac_ctx;
1189
1190/* ---------------------------------------------------------------------- */
1191
1192#if 0
1193int umac_reset(struct umac_ctx *ctx)
1194/* Reset the hash function to begin a new authentication.        */
1195{
1196    uhash_reset(&ctx->hash);
1197    return (1);
1198}
1199#endif
1200
1201/* ---------------------------------------------------------------------- */
1202
1203int umac_delete(struct umac_ctx *ctx)
1204/* Deallocate the ctx structure */
1205{
1206    if (ctx) {
1207        if (ALLOC_BOUNDARY)
1208            ctx = (struct umac_ctx *)ctx->free_ptr;
1209        free(ctx);
1210    }
1211    return (1);
1212}
1213
1214/* ---------------------------------------------------------------------- */
1215
1216struct umac_ctx *umac_new(const u_char key[])
1217/* Dynamically allocate a umac_ctx struct, initialize variables,
1218 * generate subkeys from key. Align to 16-byte boundary.
1219 */
1220{
1221    struct umac_ctx *ctx, *octx;
1222    size_t bytes_to_add;
1223    aes_int_key prf_key;
1224
1225    octx = ctx = xcalloc(1, sizeof(*ctx) + ALLOC_BOUNDARY);
1226    if (ctx) {
1227        if (ALLOC_BOUNDARY) {
1228            bytes_to_add = ALLOC_BOUNDARY -
1229                              ((ptrdiff_t)ctx & (ALLOC_BOUNDARY - 1));
1230            ctx = (struct umac_ctx *)((u_char *)ctx + bytes_to_add);
1231        }
1232        ctx->free_ptr = octx;
1233        aes_key_setup(key, prf_key);
1234        pdf_init(&ctx->pdf, prf_key);
1235        uhash_init(&ctx->hash, prf_key);
1236    }
1237
1238    return (ctx);
1239}
1240
1241/* ---------------------------------------------------------------------- */
1242
1243int umac_final(struct umac_ctx *ctx, u_char tag[], const u_char nonce[8])
1244/* Incorporate any pending data, pad, and generate tag */
1245{
1246    uhash_final(&ctx->hash, (u_char *)tag);
1247    pdf_gen_xor(&ctx->pdf, (const UINT8 *)nonce, (UINT8 *)tag);
1248
1249    return (1);
1250}
1251
1252/* ---------------------------------------------------------------------- */
1253
1254int umac_update(struct umac_ctx *ctx, const u_char *input, long len)
1255/* Given len bytes of data, we parse it into L1_KEY_LEN chunks and   */
1256/* hash each one, calling the PDF on the hashed output whenever the hash- */
1257/* output buffer is full.                                                 */
1258{
1259    uhash_update(&ctx->hash, input, len);
1260    return (1);
1261}
1262
1263/* ---------------------------------------------------------------------- */
1264
1265#if 0
1266int umac(struct umac_ctx *ctx, u_char *input,
1267         long len, u_char tag[],
1268         u_char nonce[8])
1269/* All-in-one version simply calls umac_update() and umac_final().        */
1270{
1271    uhash(&ctx->hash, input, len, (u_char *)tag);
1272    pdf_gen_xor(&ctx->pdf, (UINT8 *)nonce, (UINT8 *)tag);
1273
1274    return (1);
1275}
1276#endif
1277
1278/* ---------------------------------------------------------------------- */
1279/* ---------------------------------------------------------------------- */
1280/* ----- End UMAC Section ----------------------------------------------- */
1281/* ---------------------------------------------------------------------- */
1282/* ---------------------------------------------------------------------- */
1283