1/* Set of hash utility functions to help maintaining the invariant that
2    if a==b then hash(a)==hash(b)
3
4   All the utility functions (_Py_Hash*()) return "-1" to signify an error.
5*/
6#include "Python.h"
7
8#ifdef __APPLE__
9#  include <libkern/OSByteOrder.h>
10#elif defined(HAVE_LE64TOH) && defined(HAVE_ENDIAN_H)
11#  include <endian.h>
12#elif defined(HAVE_LE64TOH) && defined(HAVE_SYS_ENDIAN_H)
13#  include <sys/endian.h>
14#endif
15
16#ifdef __cplusplus
17extern "C" {
18#endif
19
20_Py_HashSecret_t _Py_HashSecret;
21
22#if Py_HASH_ALGORITHM == Py_HASH_EXTERNAL
23extern PyHash_FuncDef PyHash_Func;
24#else
25static PyHash_FuncDef PyHash_Func;
26#endif
27
28/* Count _Py_HashBytes() calls */
29#ifdef Py_HASH_STATS
30#define Py_HASH_STATS_MAX 32
31static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0};
32#endif
33
34/* For numeric types, the hash of a number x is based on the reduction
35   of x modulo the prime P = 2**_PyHASH_BITS - 1.  It's designed so that
36   hash(x) == hash(y) whenever x and y are numerically equal, even if
37   x and y have different types.
38
39   A quick summary of the hashing strategy:
40
41   (1) First define the 'reduction of x modulo P' for any rational
42   number x; this is a standard extension of the usual notion of
43   reduction modulo P for integers.  If x == p/q (written in lowest
44   terms), the reduction is interpreted as the reduction of p times
45   the inverse of the reduction of q, all modulo P; if q is exactly
46   divisible by P then define the reduction to be infinity.  So we've
47   got a well-defined map
48
49      reduce : { rational numbers } -> { 0, 1, 2, ..., P-1, infinity }.
50
51   (2) Now for a rational number x, define hash(x) by:
52
53      reduce(x)   if x >= 0
54      -reduce(-x) if x < 0
55
56   If the result of the reduction is infinity (this is impossible for
57   integers, floats and Decimals) then use the predefined hash value
58   _PyHASH_INF for x >= 0, or -_PyHASH_INF for x < 0, instead.
59   _PyHASH_INF, -_PyHASH_INF and _PyHASH_NAN are also used for the
60   hashes of float and Decimal infinities and nans.
61
62   A selling point for the above strategy is that it makes it possible
63   to compute hashes of decimal and binary floating-point numbers
64   efficiently, even if the exponent of the binary or decimal number
65   is large.  The key point is that
66
67      reduce(x * y) == reduce(x) * reduce(y) (modulo _PyHASH_MODULUS)
68
69   provided that {reduce(x), reduce(y)} != {0, infinity}.  The reduction of a
70   binary or decimal float is never infinity, since the denominator is a power
71   of 2 (for binary) or a divisor of a power of 10 (for decimal).  So we have,
72   for nonnegative x,
73
74      reduce(x * 2**e) == reduce(x) * reduce(2**e) % _PyHASH_MODULUS
75
76      reduce(x * 10**e) == reduce(x) * reduce(10**e) % _PyHASH_MODULUS
77
78   and reduce(10**e) can be computed efficiently by the usual modular
79   exponentiation algorithm.  For reduce(2**e) it's even better: since
80   P is of the form 2**n-1, reduce(2**e) is 2**(e mod n), and multiplication
81   by 2**(e mod n) modulo 2**n-1 just amounts to a rotation of bits.
82
83   */
84
85Py_hash_t
86_Py_HashDouble(double v)
87{
88    int e, sign;
89    double m;
90    Py_uhash_t x, y;
91
92    if (!Py_IS_FINITE(v)) {
93        if (Py_IS_INFINITY(v))
94            return v > 0 ? _PyHASH_INF : -_PyHASH_INF;
95        else
96            return _PyHASH_NAN;
97    }
98
99    m = frexp(v, &e);
100
101    sign = 1;
102    if (m < 0) {
103        sign = -1;
104        m = -m;
105    }
106
107    /* process 28 bits at a time;  this should work well both for binary
108       and hexadecimal floating point. */
109    x = 0;
110    while (m) {
111        x = ((x << 28) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - 28);
112        m *= 268435456.0;  /* 2**28 */
113        e -= 28;
114        y = (Py_uhash_t)m;  /* pull out integer part */
115        m -= y;
116        x += y;
117        if (x >= _PyHASH_MODULUS)
118            x -= _PyHASH_MODULUS;
119    }
120
121    /* adjust for the exponent;  first reduce it modulo _PyHASH_BITS */
122    e = e >= 0 ? e % _PyHASH_BITS : _PyHASH_BITS-1-((-1-e) % _PyHASH_BITS);
123    x = ((x << e) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - e);
124
125    x = x * sign;
126    if (x == (Py_uhash_t)-1)
127        x = (Py_uhash_t)-2;
128    return (Py_hash_t)x;
129}
130
131Py_hash_t
132_Py_HashPointer(void *p)
133{
134    Py_hash_t x;
135    size_t y = (size_t)p;
136    /* bottom 3 or 4 bits are likely to be 0; rotate y by 4 to avoid
137       excessive hash collisions for dicts and sets */
138    y = (y >> 4) | (y << (8 * SIZEOF_VOID_P - 4));
139    x = (Py_hash_t)y;
140    if (x == -1)
141        x = -2;
142    return x;
143}
144
145Py_hash_t
146_Py_HashBytes(const void *src, Py_ssize_t len)
147{
148    Py_hash_t x;
149    /*
150      We make the hash of the empty string be 0, rather than using
151      (prefix ^ suffix), since this slightly obfuscates the hash secret
152    */
153    if (len == 0) {
154        return 0;
155    }
156
157#ifdef Py_HASH_STATS
158    hashstats[(len <= Py_HASH_STATS_MAX) ? len : 0]++;
159#endif
160
161#if Py_HASH_CUTOFF > 0
162    if (len < Py_HASH_CUTOFF) {
163        /* Optimize hashing of very small strings with inline DJBX33A. */
164        Py_uhash_t hash;
165        const unsigned char *p = src;
166        hash = 5381; /* DJBX33A starts with 5381 */
167
168        switch(len) {
169            /* ((hash << 5) + hash) + *p == hash * 33 + *p */
170            case 7: hash = ((hash << 5) + hash) + *p++; /* fallthrough */
171            case 6: hash = ((hash << 5) + hash) + *p++; /* fallthrough */
172            case 5: hash = ((hash << 5) + hash) + *p++; /* fallthrough */
173            case 4: hash = ((hash << 5) + hash) + *p++; /* fallthrough */
174            case 3: hash = ((hash << 5) + hash) + *p++; /* fallthrough */
175            case 2: hash = ((hash << 5) + hash) + *p++; /* fallthrough */
176            case 1: hash = ((hash << 5) + hash) + *p++; break;
177            default:
178                assert(0);
179        }
180        hash ^= len;
181        hash ^= (Py_uhash_t) _Py_HashSecret.djbx33a.suffix;
182        x = (Py_hash_t)hash;
183    }
184    else
185#endif /* Py_HASH_CUTOFF */
186        x = PyHash_Func.hash(src, len);
187
188    if (x == -1)
189        return -2;
190    return x;
191}
192
193void
194_PyHash_Fini(void)
195{
196#ifdef Py_HASH_STATS
197    int i;
198    Py_ssize_t total = 0;
199    char *fmt = "%2i %8" PY_FORMAT_SIZE_T "d %8" PY_FORMAT_SIZE_T "d\n";
200
201    fprintf(stderr, "len   calls    total\n");
202    for (i = 1; i <= Py_HASH_STATS_MAX; i++) {
203        total += hashstats[i];
204        fprintf(stderr, fmt, i, hashstats[i], total);
205    }
206    total += hashstats[0];
207    fprintf(stderr, ">  %8" PY_FORMAT_SIZE_T "d %8" PY_FORMAT_SIZE_T "d\n",
208            hashstats[0], total);
209#endif
210}
211
212PyHash_FuncDef *
213PyHash_GetFuncDef(void)
214{
215    return &PyHash_Func;
216}
217
218/* Optimized memcpy() for Windows */
219#ifdef _MSC_VER
220#  if SIZEOF_PY_UHASH_T == 4
221#    define PY_UHASH_CPY(dst, src) do {                                    \
222       dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; \
223       } while(0)
224#  elif SIZEOF_PY_UHASH_T == 8
225#    define PY_UHASH_CPY(dst, src) do {                                    \
226       dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; \
227       dst[4] = src[4]; dst[5] = src[5]; dst[6] = src[6]; dst[7] = src[7]; \
228       } while(0)
229#  else
230#    error SIZEOF_PY_UHASH_T must be 4 or 8
231#  endif /* SIZEOF_PY_UHASH_T */
232#else /* not Windows */
233#  define PY_UHASH_CPY(dst, src) memcpy(dst, src, SIZEOF_PY_UHASH_T)
234#endif /* _MSC_VER */
235
236
237#if Py_HASH_ALGORITHM == Py_HASH_FNV
238/* **************************************************************************
239 * Modified Fowler-Noll-Vo (FNV) hash function
240 */
241static Py_hash_t
242fnv(const void *src, Py_ssize_t len)
243{
244    const unsigned char *p = src;
245    Py_uhash_t x;
246    Py_ssize_t remainder, blocks;
247    union {
248        Py_uhash_t value;
249        unsigned char bytes[SIZEOF_PY_UHASH_T];
250    } block;
251
252#ifdef Py_DEBUG
253    assert(_Py_HashSecret_Initialized);
254#endif
255    remainder = len % SIZEOF_PY_UHASH_T;
256    if (remainder == 0) {
257        /* Process at least one block byte by byte to reduce hash collisions
258         * for strings with common prefixes. */
259        remainder = SIZEOF_PY_UHASH_T;
260    }
261    blocks = (len - remainder) / SIZEOF_PY_UHASH_T;
262
263    x = (Py_uhash_t) _Py_HashSecret.fnv.prefix;
264    x ^= (Py_uhash_t) *p << 7;
265    while (blocks--) {
266        PY_UHASH_CPY(block.bytes, p);
267        x = (_PyHASH_MULTIPLIER * x) ^ block.value;
268        p += SIZEOF_PY_UHASH_T;
269    }
270    /* add remainder */
271    for (; remainder > 0; remainder--)
272        x = (_PyHASH_MULTIPLIER * x) ^ (Py_uhash_t) *p++;
273    x ^= (Py_uhash_t) len;
274    x ^= (Py_uhash_t) _Py_HashSecret.fnv.suffix;
275    if (x == -1) {
276        x = -2;
277    }
278    return x;
279}
280
281static PyHash_FuncDef PyHash_Func = {fnv, "fnv", 8 * SIZEOF_PY_HASH_T,
282                                     16 * SIZEOF_PY_HASH_T};
283
284#endif /* Py_HASH_ALGORITHM == Py_HASH_FNV */
285
286
287#if Py_HASH_ALGORITHM == Py_HASH_SIPHASH24
288/* **************************************************************************
289 <MIT License>
290 Copyright (c) 2013  Marek Majkowski <marek@popcount.org>
291
292 Permission is hereby granted, free of charge, to any person obtaining a copy
293 of this software and associated documentation files (the "Software"), to deal
294 in the Software without restriction, including without limitation the rights
295 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
296 copies of the Software, and to permit persons to whom the Software is
297 furnished to do so, subject to the following conditions:
298
299 The above copyright notice and this permission notice shall be included in
300 all copies or substantial portions of the Software.
301
302 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
303 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
304 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
305 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
306 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
307 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
308 THE SOFTWARE.
309 </MIT License>
310
311 Original location:
312    https://github.com/majek/csiphash/
313
314 Solution inspired by code from:
315    Samuel Neves (supercop/crypto_auth/siphash24/little)
316    djb (supercop/crypto_auth/siphash24/little2)
317    Jean-Philippe Aumasson (https://131002.net/siphash/siphash24.c)
318
319 Modified for Python by Christian Heimes:
320    - C89 / MSVC compatibility
321    - _rotl64() on Windows
322    - letoh64() fallback
323*/
324
325/* byte swap little endian to host endian
326 * Endian conversion not only ensures that the hash function returns the same
327 * value on all platforms. It is also required to for a good dispersion of
328 * the hash values' least significant bits.
329 */
330#if PY_LITTLE_ENDIAN
331#  define _le64toh(x) ((uint64_t)(x))
332#elif defined(__APPLE__)
333#  define _le64toh(x) OSSwapLittleToHostInt64(x)
334#elif defined(HAVE_LETOH64)
335#  define _le64toh(x) le64toh(x)
336#else
337#  define _le64toh(x) (((uint64_t)(x) << 56) | \
338                      (((uint64_t)(x) << 40) & 0xff000000000000ULL) | \
339                      (((uint64_t)(x) << 24) & 0xff0000000000ULL) | \
340                      (((uint64_t)(x) << 8)  & 0xff00000000ULL) | \
341                      (((uint64_t)(x) >> 8)  & 0xff000000ULL) | \
342                      (((uint64_t)(x) >> 24) & 0xff0000ULL) | \
343                      (((uint64_t)(x) >> 40) & 0xff00ULL) | \
344                      ((uint64_t)(x)  >> 56))
345#endif
346
347
348#ifdef _MSC_VER
349#  define ROTATE(x, b)  _rotl64(x, b)
350#else
351#  define ROTATE(x, b) (uint64_t)( ((x) << (b)) | ( (x) >> (64 - (b))) )
352#endif
353
354#define HALF_ROUND(a,b,c,d,s,t)         \
355    a += b; c += d;             \
356    b = ROTATE(b, s) ^ a;           \
357    d = ROTATE(d, t) ^ c;           \
358    a = ROTATE(a, 32);
359
360#define DOUBLE_ROUND(v0,v1,v2,v3)       \
361    HALF_ROUND(v0,v1,v2,v3,13,16);      \
362    HALF_ROUND(v2,v1,v0,v3,17,21);      \
363    HALF_ROUND(v0,v1,v2,v3,13,16);      \
364    HALF_ROUND(v2,v1,v0,v3,17,21);
365
366
367static Py_hash_t
368siphash24(const void *src, Py_ssize_t src_sz) {
369    uint64_t k0 = _le64toh(_Py_HashSecret.siphash.k0);
370    uint64_t k1 = _le64toh(_Py_HashSecret.siphash.k1);
371    uint64_t b = (uint64_t)src_sz << 56;
372    const uint64_t *in = (uint64_t*)src;
373
374    uint64_t v0 = k0 ^ 0x736f6d6570736575ULL;
375    uint64_t v1 = k1 ^ 0x646f72616e646f6dULL;
376    uint64_t v2 = k0 ^ 0x6c7967656e657261ULL;
377    uint64_t v3 = k1 ^ 0x7465646279746573ULL;
378
379    uint64_t t;
380    uint8_t *pt;
381    uint8_t *m;
382
383    while (src_sz >= 8) {
384        uint64_t mi = _le64toh(*in);
385        in += 1;
386        src_sz -= 8;
387        v3 ^= mi;
388        DOUBLE_ROUND(v0,v1,v2,v3);
389        v0 ^= mi;
390    }
391
392    t = 0;
393    pt = (uint8_t *)&t;
394    m = (uint8_t *)in;
395    switch (src_sz) {
396        case 7: pt[6] = m[6];
397        case 6: pt[5] = m[5];
398        case 5: pt[4] = m[4];
399        case 4: memcpy(pt, m, sizeof(uint32_t)); break;
400        case 3: pt[2] = m[2];
401        case 2: pt[1] = m[1];
402        case 1: pt[0] = m[0];
403    }
404    b |= _le64toh(t);
405
406    v3 ^= b;
407    DOUBLE_ROUND(v0,v1,v2,v3);
408    v0 ^= b;
409    v2 ^= 0xff;
410    DOUBLE_ROUND(v0,v1,v2,v3);
411    DOUBLE_ROUND(v0,v1,v2,v3);
412
413    /* modified */
414    t = (v0 ^ v1) ^ (v2 ^ v3);
415    return (Py_hash_t)t;
416}
417
418static PyHash_FuncDef PyHash_Func = {siphash24, "siphash24", 64, 128};
419
420#endif /* Py_HASH_ALGORITHM == Py_HASH_SIPHASH24 */
421
422#ifdef __cplusplus
423}
424#endif
425