1// Copyright (c) 2011 Google, Inc.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20//
21// CityHash, by Geoff Pike and Jyrki Alakuijala
22//
23// This file provides CityHash64() and related functions.
24//
25// It's probably possible to create even faster hash functions by
26// writing a program that systematically explores some of the space of
27// possible hash functions, by using SIMD instructions, or by
28// compromising on hash quality.
29
30#include "City.h"
31
32#include <algorithm>
33#include <string.h>  // for memcpy and memset
34
35using namespace std;
36
37static uint64 UNALIGNED_LOAD64(const char *p) {
38  uint64 result;
39  memcpy(&result, p, sizeof(result));
40  return result;
41}
42
43static uint32 UNALIGNED_LOAD32(const char *p) {
44  uint32 result;
45  memcpy(&result, p, sizeof(result));
46  return result;
47}
48
49#ifndef __BIG_ENDIAN__
50
51#define uint32_in_expected_order(x) (x)
52#define uint64_in_expected_order(x) (x)
53
54#else
55
56#ifdef _MSC_VER
57#include <stdlib.h>
58#define bswap_32(x) _byteswap_ulong(x)
59#define bswap_64(x) _byteswap_uint64(x)
60
61#elif defined(__APPLE__)
62// Mac OS X / Darwin features
63#include <libkern/OSByteOrder.h>
64#define bswap_32(x) OSSwapInt32(x)
65#define bswap_64(x) OSSwapInt64(x)
66
67#else
68#include <byteswap.h>
69#endif
70
71#define uint32_in_expected_order(x) (bswap_32(x))
72#define uint64_in_expected_order(x) (bswap_64(x))
73
74#endif  // __BIG_ENDIAN__
75
76#if !defined(LIKELY)
77#if defined(__GNUC__) || defined(__INTEL_COMPILER)
78#define LIKELY(x) (__builtin_expect(!!(x), 1))
79#else
80#define LIKELY(x) (x)
81#endif
82#endif
83
84static uint64 Fetch64(const char *p) {
85  return uint64_in_expected_order(UNALIGNED_LOAD64(p));
86}
87
88static uint32 Fetch32(const char *p) {
89  return uint32_in_expected_order(UNALIGNED_LOAD32(p));
90}
91
92// Some primes between 2^63 and 2^64 for various uses.
93static const uint64 k0 = 0xc3a5c85c97cb3127ULL;
94static const uint64 k1 = 0xb492b66fbe98f273ULL;
95static const uint64 k2 = 0x9ae16a3b2f90404fULL;
96static const uint64 k3 = 0xc949d7c7509e6557ULL;
97
98// Bitwise right rotate.  Normally this will compile to a single
99// instruction, especially if the shift is a manifest constant.
100static uint64 Rotate(uint64 val, int shift) {
101  // Avoid shifting by 64: doing so yields an undefined result.
102  return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
103}
104
105// Equivalent to Rotate(), but requires the second arg to be non-zero.
106// On x86-64, and probably others, it's possible for this to compile
107// to a single instruction if both args are already in registers.
108static uint64 RotateByAtLeast1(uint64 val, int shift) {
109  return (val >> shift) | (val << (64 - shift));
110}
111
112static uint64 ShiftMix(uint64 val) {
113  return val ^ (val >> 47);
114}
115
116static uint64 HashLen16(uint64 u, uint64 v) {
117  return Hash128to64(uint128(u, v));
118}
119
120static uint64 HashLen0to16(const char *s, size_t len) {
121  if (len > 8) {
122    uint64 a = Fetch64(s);
123    uint64 b = Fetch64(s + len - 8);
124    return HashLen16(a, RotateByAtLeast1(b + len, len)) ^ b;
125  }
126  if (len >= 4) {
127    uint64 a = Fetch32(s);
128    return HashLen16(len + (a << 3), Fetch32(s + len - 4));
129  }
130  if (len > 0) {
131    uint8 a = s[0];
132    uint8 b = s[len >> 1];
133    uint8 c = s[len - 1];
134    uint32 y = static_cast<uint32>(a) + (static_cast<uint32>(b) << 8);
135    uint32 z = len + (static_cast<uint32>(c) << 2);
136    return ShiftMix(y * k2 ^ z * k3) * k2;
137  }
138  return k2;
139}
140
141// This probably works well for 16-byte strings as well, but it may be overkill
142// in that case.
143static uint64 HashLen17to32(const char *s, size_t len) {
144  uint64 a = Fetch64(s) * k1;
145  uint64 b = Fetch64(s + 8);
146  uint64 c = Fetch64(s + len - 8) * k2;
147  uint64 d = Fetch64(s + len - 16) * k0;
148  return HashLen16(Rotate(a - b, 43) + Rotate(c, 30) + d,
149                   a + Rotate(b ^ k3, 20) - c + len);
150}
151
152// Return a 16-byte hash for 48 bytes.  Quick and dirty.
153// Callers do best to use "random-looking" values for a and b.
154static pair<uint64, uint64> WeakHashLen32WithSeeds(
155    uint64 w, uint64 x, uint64 y, uint64 z, uint64 a, uint64 b) {
156  a += w;
157  b = Rotate(b + a + z, 21);
158  uint64 c = a;
159  a += x;
160  a += y;
161  b += Rotate(a, 44);
162  return make_pair(a + z, b + c);
163}
164
165// Return a 16-byte hash for s[0] ... s[31], a, and b.  Quick and dirty.
166static pair<uint64, uint64> WeakHashLen32WithSeeds(
167    const char* s, uint64 a, uint64 b) {
168  return WeakHashLen32WithSeeds(Fetch64(s),
169                                Fetch64(s + 8),
170                                Fetch64(s + 16),
171                                Fetch64(s + 24),
172                                a,
173                                b);
174}
175
176// Return an 8-byte hash for 33 to 64 bytes.
177static uint64 HashLen33to64(const char *s, size_t len) {
178  uint64 z = Fetch64(s + 24);
179  uint64 a = Fetch64(s) + (len + Fetch64(s + len - 16)) * k0;
180  uint64 b = Rotate(a + z, 52);
181  uint64 c = Rotate(a, 37);
182  a += Fetch64(s + 8);
183  c += Rotate(a, 7);
184  a += Fetch64(s + 16);
185  uint64 vf = a + z;
186  uint64 vs = b + Rotate(a, 31) + c;
187  a = Fetch64(s + 16) + Fetch64(s + len - 32);
188  z = Fetch64(s + len - 8);
189  b = Rotate(a + z, 52);
190  c = Rotate(a, 37);
191  a += Fetch64(s + len - 24);
192  c += Rotate(a, 7);
193  a += Fetch64(s + len - 16);
194  uint64 wf = a + z;
195  uint64 ws = b + Rotate(a, 31) + c;
196  uint64 r = ShiftMix((vf + ws) * k2 + (wf + vs) * k0);
197  return ShiftMix(r * k0 + vs) * k2;
198}
199
200uint64 CityHash64(const char *s, size_t len) {
201  if (len <= 32) {
202    if (len <= 16) {
203      return HashLen0to16(s, len);
204    } else {
205      return HashLen17to32(s, len);
206    }
207  } else if (len <= 64) {
208    return HashLen33to64(s, len);
209  }
210
211  // For strings over 64 bytes we hash the end first, and then as we
212  // loop we keep 56 bytes of state: v, w, x, y, and z.
213  uint64 x = Fetch64(s + len - 40);
214  uint64 y = Fetch64(s + len - 16) + Fetch64(s + len - 56);
215  uint64 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24));
216  pair<uint64, uint64> v = WeakHashLen32WithSeeds(s + len - 64, len, z);
217  pair<uint64, uint64> w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x);
218  x = x * k1 + Fetch64(s);
219
220  // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
221  len = (len - 1) & ~static_cast<size_t>(63);
222  do {
223    x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
224    y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
225    x ^= w.second;
226    y += v.first + Fetch64(s + 40);
227    z = Rotate(z + w.first, 33) * k1;
228    v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
229    w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
230    std::swap(z, x);
231    s += 64;
232    len -= 64;
233  } while (len != 0);
234  return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z,
235                   HashLen16(v.second, w.second) + x);
236}
237
238uint64 CityHash64WithSeed(const char *s, size_t len, uint64 seed) {
239  return CityHash64WithSeeds(s, len, k2, seed);
240}
241
242uint64 CityHash64WithSeeds(const char *s, size_t len,
243                           uint64 seed0, uint64 seed1) {
244  return HashLen16(CityHash64(s, len) - seed0, seed1);
245}
246
247// A subroutine for CityHash128().  Returns a decent 128-bit hash for strings
248// of any length representable in signed long.  Based on City and Murmur.
249static uint128 CityMurmur(const char *s, size_t len, uint128 seed) {
250  uint64 a = Uint128Low64(seed);
251  uint64 b = Uint128High64(seed);
252  uint64 c = 0;
253  uint64 d = 0;
254  signed long l = len - 16;
255  if (l <= 0) {  // len <= 16
256    a = ShiftMix(a * k1) * k1;
257    c = b * k1 + HashLen0to16(s, len);
258    d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c));
259  } else {  // len > 16
260    c = HashLen16(Fetch64(s + len - 8) + k1, a);
261    d = HashLen16(b + len, c + Fetch64(s + len - 16));
262    a += d;
263    do {
264      a ^= ShiftMix(Fetch64(s) * k1) * k1;
265      a *= k1;
266      b ^= a;
267      c ^= ShiftMix(Fetch64(s + 8) * k1) * k1;
268      c *= k1;
269      d ^= c;
270      s += 16;
271      l -= 16;
272    } while (l > 0);
273  }
274  a = HashLen16(a, c);
275  b = HashLen16(d, b);
276  return uint128(a ^ b, HashLen16(b, a));
277}
278
279uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) {
280  if (len < 128) {
281    return CityMurmur(s, len, seed);
282  }
283
284  // We expect len >= 128 to be the common case.  Keep 56 bytes of state:
285  // v, w, x, y, and z.
286  pair<uint64, uint64> v, w;
287  uint64 x = Uint128Low64(seed);
288  uint64 y = Uint128High64(seed);
289  uint64 z = len * k1;
290  v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s);
291  v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8);
292  w.first = Rotate(y + z, 35) * k1 + x;
293  w.second = Rotate(x + Fetch64(s + 88), 53) * k1;
294
295  // This is the same inner loop as CityHash64(), manually unrolled.
296  do {
297    x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
298    y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
299    x ^= w.second;
300    y += v.first + Fetch64(s + 40);
301    z = Rotate(z + w.first, 33) * k1;
302    v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
303    w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
304    std::swap(z, x);
305    s += 64;
306    x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
307    y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
308    x ^= w.second;
309    y += v.first + Fetch64(s + 40);
310    z = Rotate(z + w.first, 33) * k1;
311    v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
312    w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
313    std::swap(z, x);
314    s += 64;
315    len -= 128;
316  } while (LIKELY(len >= 128));
317  x += Rotate(v.first + z, 49) * k0;
318  z += Rotate(w.first, 37) * k0;
319  // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
320  for (size_t tail_done = 0; tail_done < len; ) {
321    tail_done += 32;
322    y = Rotate(x + y, 42) * k0 + v.second;
323    w.first += Fetch64(s + len - tail_done + 16);
324    x = x * k0 + w.first;
325    z += w.second + Fetch64(s + len - tail_done);
326    w.second += v.first;
327    v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second);
328  }
329  // At this point our 56 bytes of state should contain more than
330  // enough information for a strong 128-bit hash.  We use two
331  // different 56-byte-to-8-byte hashes to get a 16-byte final result.
332  x = HashLen16(x, v.first);
333  y = HashLen16(y + z, w.first);
334  return uint128(HashLen16(x + v.second, w.second) + y,
335                 HashLen16(x + w.second, y + v.second));
336}
337
338uint128 CityHash128(const char *s, size_t len) {
339  if (len >= 16) {
340    return CityHash128WithSeed(s + 16,
341                               len - 16,
342                               uint128(Fetch64(s) ^ k3,
343                                       Fetch64(s + 8)));
344  } else if (len >= 8) {
345    return CityHash128WithSeed(NULL,
346                               0,
347                               uint128(Fetch64(s) ^ (len * k0),
348                                       Fetch64(s + len - 8) ^ k1));
349  } else {
350    return CityHash128WithSeed(s, len, uint128(k0, k1));
351  }
352}
353
354#if defined(__SSE4_2__) && defined(__x86_64__)
355#include <nmmintrin.h>
356
357// Requires len >= 240.
358static void CityHashCrc256Long(const char *s, size_t len,
359                               uint32 seed, uint64 *result) {
360  uint64 a = Fetch64(s + 56) + k0;
361  uint64 b = Fetch64(s + 96) + k0;
362  uint64 c = result[0] = HashLen16(b, len);
363  uint64 d = result[1] = Fetch64(s + 120) * k0 + len;
364  uint64 e = Fetch64(s + 184) + seed;
365  uint64 f = seed;
366  uint64 g = 0;
367  uint64 h = 0;
368  uint64 i = 0;
369  uint64 j = 0;
370  uint64 t = c + d;
371
372  // 240 bytes of input per iter.
373  size_t iters = len / 240;
374  len -= iters * 240;
375  do {
376#define CHUNK(multiplier, z)                                    \
377    {                                                           \
378      uint64 old_a = a;                                         \
379      a = Rotate(b, 41 ^ z) * multiplier + Fetch64(s);          \
380      b = Rotate(c, 27 ^ z) * multiplier + Fetch64(s + 8);      \
381      c = Rotate(d, 41 ^ z) * multiplier + Fetch64(s + 16);     \
382      d = Rotate(e, 33 ^ z) * multiplier + Fetch64(s + 24);     \
383      e = Rotate(t, 25 ^ z) * multiplier + Fetch64(s + 32);     \
384      t = old_a;                                                \
385    }                                                           \
386    f = _mm_crc32_u64(f, a);                                    \
387    g = _mm_crc32_u64(g, b);                                    \
388    h = _mm_crc32_u64(h, c);                                    \
389    i = _mm_crc32_u64(i, d);                                    \
390    j = _mm_crc32_u64(j, e);                                    \
391    s += 40
392
393    CHUNK(1, 1); CHUNK(k0, 0);
394    CHUNK(1, 1); CHUNK(k0, 0);
395    CHUNK(1, 1); CHUNK(k0, 0);
396  } while (--iters > 0);
397
398  while (len >= 40) {
399    CHUNK(k0, 0);
400    len -= 40;
401  }
402  if (len > 0) {
403    s = s + len - 40;
404    CHUNK(k0, 0);
405  }
406  j += i << 32;
407  a = HashLen16(a, j);
408  h += g << 32;
409  b += h;
410  c = HashLen16(c, f) + i;
411  d = HashLen16(d, e + result[0]);
412  j += e;
413  i += HashLen16(h, t);
414  e = HashLen16(a, d) + j;
415  f = HashLen16(b, c) + a;
416  g = HashLen16(j, i) + c;
417  result[0] = e + f + g + h;
418  a = ShiftMix((a + g) * k0) * k0 + b;
419  result[1] += a + result[0];
420  a = ShiftMix(a * k0) * k0 + c;
421  result[2] = a + result[1];
422  a = ShiftMix((a + e) * k0) * k0;
423  result[3] = a + result[2];
424}
425
426// Requires len < 240.
427static void CityHashCrc256Short(const char *s, size_t len, uint64 *result) {
428  char buf[240];
429  memcpy(buf, s, len);
430  memset(buf + len, 0, 240 - len);
431  CityHashCrc256Long(buf, 240, ~static_cast<uint32>(len), result);
432}
433
434void CityHashCrc256(const char *s, size_t len, uint64 *result) {
435  if (LIKELY(len >= 240)) {
436    CityHashCrc256Long(s, len, 0, result);
437  } else {
438    CityHashCrc256Short(s, len, result);
439  }
440}
441
442uint128 CityHashCrc128WithSeed(const char *s, size_t len, uint128 seed) {
443  if (len <= 900) {
444    return CityHash128WithSeed(s, len, seed);
445  } else {
446    uint64 result[4];
447    CityHashCrc256(s, len, result);
448    uint64 u = Uint128High64(seed) + result[0];
449    uint64 v = Uint128Low64(seed) + result[1];
450    return uint128(HashLen16(u, v + result[2]),
451                   HashLen16(Rotate(v, 32), u * k0 + result[3]));
452  }
453}
454
455uint128 CityHashCrc128(const char *s, size_t len) {
456  if (len <= 900) {
457    return CityHash128(s, len);
458  } else {
459    uint64 result[4];
460    CityHashCrc256(s, len, result);
461    return uint128(result[2], result[3]);
462  }
463}
464
465#endif
466