1/*-
2 * Copyright © 2011, 2014
3 *	Thorsten Glaser <tg@mirbsd.org>
4 *
5 * Provided that these terms and disclaimer and all copyright notices
6 * are retained or reproduced in an accompanying document, permission
7 * is granted to deal in this work without restriction, including un‐
8 * limited rights to use, publicly perform, distribute, sell, modify,
9 * merge, give away, or sublicence.
10 *
11 * This work is provided “AS IS” and WITHOUT WARRANTY of any kind, to
12 * the utmost extent permitted by applicable law, neither express nor
13 * implied; without malicious intent or gross negligence. In no event
14 * may a licensor, author or contributor be held liable for indirect,
15 * direct, other damage, loss, or other issues arising in any way out
16 * of dealing in the work, even if advised of the possibility of such
17 * damage or existence of a defect, except proven that it results out
18 * of said person’s immediate fault when using the work as intended.
19 *-
20 * This file provides BAFH (Better Avalanche for the Jenkins Hash) as
21 * inline macro bodies that operate on “register uint32_t” variables,
22 * with variants that use their local intermediate registers.
23 *
24 * Usage note for BAFH with entropy distribution: input up to 4 bytes
25 * is best combined into a 32-bit unsigned integer, which is then run
26 * through BAFHFinish_reg for mixing and then used as context instead
27 * of 0. Longer input should be handled the same: take the first four
28 * bytes as IV after mixing then add subsequent bytes the same way.
29 * This needs counting input bytes and is endian-dependent, thus not,
30 * for speed reasons, specified for the regular stable hash, but very
31 * much recommended if the actual output value may differ across runs
32 * (so is using a random value instead of 0 for the IV).
33 *-
34 * Little quote gem:
35 *	We are looking into it. Changing the core
36 *	hash function in PHP isn't a trivial change
37 *	and will take us some time.
38 * -- Rasmus Lerdorf
39 */
40
41#ifndef SYSKERN_MIRHASH_H
42#define SYSKERN_MIRHASH_H 1
43#define SYSKERN_MIRHASH_BAFH
44
45#include <sys/types.h>
46
47__RCSID("$MirOS: src/bin/mksh/mirhash.h,v 1.3 2014/10/02 19:34:06 tg Exp $");
48
49/*-
50 * BAFH itself is defined by the following primitives:
51 *
52 * • BAFHInit(ctx) initialises the hash context, which consists of a
53 *   sole 32-bit unsigned integer (ideally in a register), to 0.
54 *   It is possible to use any initial value out of [0; 2³²[ – which
55 *   is, in fact, recommended if using BAFH for entropy distribution
56 *   – but for a regular stable hash, the IV 0 is needed.
57 *
58 * • BAFHUpdateOctet(ctx,val) compresses the unsigned 8-bit quantity
59 *   into the hash context. The algorithm used is Jenkins’ one-at-a-
60 *   time, except that an additional constant 1 is added so that, if
61 *   the context is (still) zero, adding a NUL byte is not ignored.
62 *
63 * • BAFHror(eax,cl) evaluates to the unsigned 32-bit integer “eax”,
64 *   rotated right by “cl” ∈ [0;31]; no casting, be careful!
65 *
66 * • BAFHFinish(ctx) avalanches the context around so every sub-byte
67 *   depends on all input octets; afterwards, the context variable’s
68 *   value is the hash output. BAFH does not use any padding, nor is
69 *   the input length added; this is due to the common use case (for
70 *   quick entropy distribution and use with a hashtable).
71 *   Warning: BAFHFinish uses the MixColumn algorithm of AES – which
72 *   is reversible (to avoid introducing funnels and reducing entro‐
73 *   py), so blinding may need to be employed for some uses, e.g. in
74 *   mksh, after a fork.
75 *
76 * The BAFHUpdateOctet and BAFHFinish are available in two flavours:
77 * suffixed with _reg (assumes the context is in a register) or _mem
78 * (which doesn’t).
79 *
80 * The following high-level macros (with _reg and _mem variants) are
81 * available:
82 *
83 * • BAFHUpdateMem(ctx,buf,len) adds a memory block to a context.
84 * • BAFHUpdateStr(ctx,buf) is equivalent to using len=strlen(buf).
85 * • BAFHHostMem(ctx,buf,len) calculates the hash of the memory buf‐
86 *   fer using the first 4 octets (mixed) for IV, as outlined above;
87 *   the result is endian-dependent; “ctx” assumed to be a register.
88 * • BAFHHostStr(ctx,buf) does the same for C strings.
89 *
90 * All macros may use ctx multiple times in their expansion, but all
91 * other arguments are always evaluated at most once.
92 *
93 * To stay portable, never use the BAFHHost*() macros (these are for
94 * host-local entropy shuffling), and encode numbers using ULEB128.
95 */
96
97#define BAFHInit(h) do {					\
98	(h) = 0;						\
99} while (/* CONSTCOND */ 0)
100
101#define BAFHUpdateOctet_reg(h,b) do {				\
102	(h) += (uint8_t)(b);					\
103	++(h);							\
104	(h) += (h) << 10;					\
105	(h) ^= (h) >> 6;					\
106} while (/* CONSTCOND */ 0)
107
108#define BAFHUpdateOctet_mem(m,b) do {				\
109	register uint32_t BAFH_h = (m);				\
110								\
111	BAFHUpdateOctet_reg(BAFH_h, (b));			\
112	(m) = BAFH_h;						\
113} while (/* CONSTCOND */ 0)
114
115#define BAFHror(eax,cl) (((eax) >> (cl)) | ((eax) << (32 - (cl))))
116
117#define BAFHFinish_reg(h) do {					\
118	register uint32_t BAFHFinish_v;				\
119								\
120	BAFHFinish_v = ((h) >> 7) & 0x01010101U;		\
121	BAFHFinish_v += BAFHFinish_v << 1;			\
122	BAFHFinish_v += BAFHFinish_v << 3;			\
123	BAFHFinish_v ^= ((h) << 1) & 0xFEFEFEFEU;		\
124								\
125	BAFHFinish_v ^= BAFHror(BAFHFinish_v, 8);		\
126	BAFHFinish_v ^= ((h) = BAFHror((h), 8));		\
127	BAFHFinish_v ^= ((h) = BAFHror((h), 8));		\
128	(h) = BAFHror((h), 8) ^ BAFHFinish_v;			\
129} while (/* CONSTCOND */ 0)
130
131#define BAFHFinish_mem(m) do {					\
132	register uint32_t BAFHFinish_v, BAFH_h = (m);		\
133								\
134	BAFHFinish_v = (BAFH_h >> 7) & 0x01010101U;		\
135	BAFHFinish_v += BAFHFinish_v << 1;			\
136	BAFHFinish_v += BAFHFinish_v << 3;			\
137	BAFHFinish_v ^= (BAFH_h << 1) & 0xFEFEFEFEU;		\
138								\
139	BAFHFinish_v ^= BAFHror(BAFHFinish_v, 8);		\
140	BAFHFinish_v ^= (BAFH_h = BAFHror(BAFH_h, 8));		\
141	BAFHFinish_v ^= (BAFH_h = BAFHror(BAFH_h, 8));		\
142	(m) = BAFHror(BAFH_h, 8) ^ BAFHFinish_v;		\
143} while (/* CONSTCOND */ 0)
144
145#define BAFHUpdateMem_reg(h,p,z) do {				\
146	register const uint8_t *BAFHUpdate_p;			\
147	register size_t BAFHUpdate_z = (z);			\
148								\
149	BAFHUpdate_p = (const void *)(p);			\
150	while (BAFHUpdate_z--)					\
151		BAFHUpdateOctet_reg((h), *BAFHUpdate_p++);	\
152} while (/* CONSTCOND */ 0)
153
154/* meh should have named them _r/m but that’s not valid C */
155#define BAFHUpdateMem_mem(m,p,z) do {				\
156	register uint32_t BAFH_h = (m);				\
157								\
158	BAFHUpdateMem_reg(BAFH_h, (p), (z));			\
159	(m) = BAFH_h;						\
160} while (/* CONSTCOND */ 0)
161
162#define BAFHUpdateStr_reg(h,s) do {				\
163	register const uint8_t *BAFHUpdate_s;			\
164	register uint8_t BAFHUpdate_c;				\
165								\
166	BAFHUpdate_s = (const void *)(s);			\
167	while ((BAFHUpdate_c = *BAFHUpdate_s++) != 0)		\
168		BAFHUpdateOctet_reg((h), BAFHUpdate_c);		\
169} while (/* CONSTCOND */ 0)
170
171#define BAFHUpdateStr_mem(m,s) do {				\
172	register uint32_t BAFH_h = (m);				\
173								\
174	BAFHUpdateStr_reg(BAFH_h, (s));				\
175	(m) = BAFH_h;						\
176} while (/* CONSTCOND */ 0)
177
178#define BAFHHostMem(h,p,z) do {					\
179	register const uint8_t *BAFHUpdate_p;			\
180	register size_t BAFHUpdate_z = (z);			\
181	size_t BAFHHost_z;					\
182	union {							\
183		uint8_t as_u8[4];				\
184		uint32_t as_u32;				\
185	} BAFHHost_v;						\
186								\
187	BAFHUpdate_p = (const void *)(p);			\
188	BAFHHost_v.as_u32 = 0;					\
189	BAFHHost_z = BAFHUpdate_z < 4 ? BAFHUpdate_z : 4;	\
190	memcpy(BAFHHost_v.as_u8, BAFHUpdate_p, BAFHHost_z);	\
191	BAFHUpdate_p += BAFHHost_z;				\
192	BAFHUpdate_z -= BAFHHost_z;				\
193	(h) = BAFHHost_v.as_u32;				\
194	BAFHFinish_reg(h);					\
195	while (BAFHUpdate_z--)					\
196		BAFHUpdateOctet_reg((h), *BAFHUpdate_p++);	\
197	BAFHFinish_reg(h);					\
198} while (/* CONSTCOND */ 0)
199
200#define BAFHHostStr(h,s) do {					\
201	register const uint8_t *BAFHUpdate_s;			\
202	register uint8_t BAFHUpdate_c;				\
203	union {							\
204		uint8_t as_u8[4];				\
205		uint32_t as_u32;				\
206	} BAFHHost_v;						\
207								\
208	BAFHUpdate_s = (const void *)(s);			\
209	if ((BAFHHost_v.as_u8[0] = *BAFHUpdate_s) != 0)		\
210		++BAFHUpdate_s;					\
211	if ((BAFHHost_v.as_u8[1] = *BAFHUpdate_s) != 0)		\
212		++BAFHUpdate_s;					\
213	if ((BAFHHost_v.as_u8[2] = *BAFHUpdate_s) != 0)		\
214		++BAFHUpdate_s;					\
215	if ((BAFHHost_v.as_u8[3] = *BAFHUpdate_s) != 0)		\
216		++BAFHUpdate_s;					\
217	(h) = BAFHHost_v.as_u32;				\
218	BAFHFinish_reg(h);					\
219	while ((BAFHUpdate_c = *BAFHUpdate_s++) != 0)		\
220		BAFHUpdateOctet_reg((h), BAFHUpdate_c);		\
221	BAFHFinish_reg(h);					\
222} while (/* CONSTCOND */ 0)
223
224#endif
225