1f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// Copyright (c) 2011 Google, Inc.
2f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com//
3f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// Permission is hereby granted, free of charge, to any person obtaining a copy
4f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// of this software and associated documentation files (the "Software"), to deal
5f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// in the Software without restriction, including without limitation the rights
6f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// copies of the Software, and to permit persons to whom the Software is
8f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// furnished to do so, subject to the following conditions:
9f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com//
10f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// The above copyright notice and this permission notice shall be included in
11f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// all copies or substantial portions of the Software.
12f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com//
13f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// THE SOFTWARE.
20f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com//
21f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// CityHash, by Geoff Pike and Jyrki Alakuijala
22f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com//
23f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// This file provides a few functions for hashing strings. On x86-64
24f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// hardware in 2011, CityHash64() is faster than other high-quality
25f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// hash functions, such as Murmur.  This is largely due to higher
26f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// instruction-level parallelism.  CityHash64() and CityHash128() also perform
27f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// well on hash-quality tests.
28f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com//
29f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// CityHash128() is optimized for relatively long strings and returns
30f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// a 128-bit hash.  For strings more than about 2000 bytes it can be
31f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// faster than CityHash64().
32f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com//
33f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// Functions in the CityHash family are not suitable for cryptography.
34f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com//
35f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// WARNING: This code has not been tested on big-endian platforms!
36f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// It is known to work well on little-endian platforms that have a small penalty
37f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs.
38f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com//
39f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// By the way, for some hash functions, given strings a and b, the hash
40f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// of a+b is easily derived from the hashes of a and b.  This property
41f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// doesn't hold for any hash functions in this file.
42f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
43f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com#ifndef CITY_HASH_H_
44f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com#define CITY_HASH_H_
45f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
46f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com#include <stdlib.h>  // for size_t.
47f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com#include <utility>
48f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
49e87738e57558e0ec472b2fc3a643b838e5b6e88faappleby@google.com// Microsoft Visual Studio may not have stdint.h.
50e87738e57558e0ec472b2fc3a643b838e5b6e88faappleby@google.com#if defined(_MSC_VER) && (_MSC_VER < 1600)
51e87738e57558e0ec472b2fc3a643b838e5b6e88faappleby@google.comtypedef unsigned char uint8_t;
52e87738e57558e0ec472b2fc3a643b838e5b6e88faappleby@google.comtypedef unsigned int uint32_t;
53e87738e57558e0ec472b2fc3a643b838e5b6e88faappleby@google.comtypedef unsigned __int64 uint64_t;
54e87738e57558e0ec472b2fc3a643b838e5b6e88faappleby@google.com#else  // defined(_MSC_VER)
55e87738e57558e0ec472b2fc3a643b838e5b6e88faappleby@google.com#include <stdint.h>
56e87738e57558e0ec472b2fc3a643b838e5b6e88faappleby@google.com#endif // !defined(_MSC_VER)
57e87738e57558e0ec472b2fc3a643b838e5b6e88faappleby@google.com
58f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.comtypedef uint8_t uint8;
59f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.comtypedef uint32_t uint32;
60f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.comtypedef uint64_t uint64;
61f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.comtypedef std::pair<uint64, uint64> uint128;
62f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
63f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.cominline uint64 Uint128Low64(const uint128& x) { return x.first; }
64f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.cominline uint64 Uint128High64(const uint128& x) { return x.second; }
65f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
66f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// Hash function for a byte array.
67f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.comuint64 CityHash64(const char *buf, size_t len);
68f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
69f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// Hash function for a byte array.  For convenience, a 64-bit seed is also
70f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// hashed into the result.
71f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.comuint64 CityHash64WithSeed(const char *buf, size_t len, uint64 seed);
72f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
73f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// Hash function for a byte array.  For convenience, two seeds are also
74f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// hashed into the result.
75f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.comuint64 CityHash64WithSeeds(const char *buf, size_t len,
76f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com                           uint64 seed0, uint64 seed1);
77f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
78f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// Hash function for a byte array.
79f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.comuint128 CityHash128(const char *s, size_t len);
80f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
81f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// Hash function for a byte array.  For convenience, a 128-bit seed is also
82f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// hashed into the result.
83f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.comuint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed);
84f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
85f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// Hash 128 input bits down to 64 bits of output.
86f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// This is intended to be a reasonably good hash function.
87f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.cominline uint64 Hash128to64(const uint128& x) {
88f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  // Murmur-inspired hashing.
89f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  const uint64 kMul = 0x9ddfea08eb382d69ULL;
90f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  uint64 a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
91f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  a ^= (a >> 47);
92f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  uint64 b = (Uint128High64(x) ^ a) * kMul;
93f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  b ^= (b >> 47);
94f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  b *= kMul;
95f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  return b;
96f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com}
97f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
98f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// Conditionally include declarations for versions of City that require SSE4.2
99f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// instructions to be available.
1008adb1336422e3ad4d78ba54fb56692f2ed07124ctanjent@gmail.com#if defined(__SSE4_2__) && defined(__x86_64__)
101f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
102f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// Hash function for a byte array.
103f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.comuint128 CityHashCrc128(const char *s, size_t len);
104f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
105f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// Hash function for a byte array.  For convenience, a 128-bit seed is also
106f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// hashed into the result.
107f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.comuint128 CityHashCrc128WithSeed(const char *s, size_t len, uint128 seed);
108f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
109f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// Hash function for a byte array.  Sets result[0] ... result[3].
110f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.comvoid CityHashCrc256(const char *s, size_t len, uint64 *result);
111f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
112f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com#endif  // __SSE4_2__
113f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
114f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com#endif  // CITY_HASH_H_
115