1f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// Copyright (c) 2011 Google, Inc. 2f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// 3f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// Permission is hereby granted, free of charge, to any person obtaining a copy 4f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// of this software and associated documentation files (the "Software"), to deal 5f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// in the Software without restriction, including without limitation the rights 6f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// copies of the Software, and to permit persons to whom the Software is 8f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// furnished to do so, subject to the following conditions: 9f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// 10f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// The above copyright notice and this permission notice shall be included in 11f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// all copies or substantial portions of the Software. 12f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// 13f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// THE SOFTWARE. 20f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// 21f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// CityHash, by Geoff Pike and Jyrki Alakuijala 22f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// 23f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// This file provides a few functions for hashing strings. On x86-64 24f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// hardware in 2011, CityHash64() is faster than other high-quality 25f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// hash functions, such as Murmur. This is largely due to higher 26f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// instruction-level parallelism. CityHash64() and CityHash128() also perform 27f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// well on hash-quality tests. 28f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// 29f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// CityHash128() is optimized for relatively long strings and returns 30f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// a 128-bit hash. For strings more than about 2000 bytes it can be 31f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// faster than CityHash64(). 32f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// 33f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// Functions in the CityHash family are not suitable for cryptography. 34f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// 35f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// WARNING: This code has not been tested on big-endian platforms! 36f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// It is known to work well on little-endian platforms that have a small penalty 37f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs. 38f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// 39f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// By the way, for some hash functions, given strings a and b, the hash 40f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// of a+b is easily derived from the hashes of a and b. This property 41f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// doesn't hold for any hash functions in this file. 42f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 43f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com#ifndef CITY_HASH_H_ 44f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com#define CITY_HASH_H_ 45f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 46f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com#include <stdlib.h> // for size_t. 47f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com#include <utility> 48f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 49e87738e57558e0ec472b2fc3a643b838e5b6e88faappleby@google.com// Microsoft Visual Studio may not have stdint.h. 50e87738e57558e0ec472b2fc3a643b838e5b6e88faappleby@google.com#if defined(_MSC_VER) && (_MSC_VER < 1600) 51e87738e57558e0ec472b2fc3a643b838e5b6e88faappleby@google.comtypedef unsigned char uint8_t; 52e87738e57558e0ec472b2fc3a643b838e5b6e88faappleby@google.comtypedef unsigned int uint32_t; 53e87738e57558e0ec472b2fc3a643b838e5b6e88faappleby@google.comtypedef unsigned __int64 uint64_t; 54e87738e57558e0ec472b2fc3a643b838e5b6e88faappleby@google.com#else // defined(_MSC_VER) 55e87738e57558e0ec472b2fc3a643b838e5b6e88faappleby@google.com#include <stdint.h> 56e87738e57558e0ec472b2fc3a643b838e5b6e88faappleby@google.com#endif // !defined(_MSC_VER) 57e87738e57558e0ec472b2fc3a643b838e5b6e88faappleby@google.com 58f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.comtypedef uint8_t uint8; 59f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.comtypedef uint32_t uint32; 60f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.comtypedef uint64_t uint64; 61f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.comtypedef std::pair<uint64, uint64> uint128; 62f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 63f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.cominline uint64 Uint128Low64(const uint128& x) { return x.first; } 64f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.cominline uint64 Uint128High64(const uint128& x) { return x.second; } 65f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 66f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// Hash function for a byte array. 67f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.comuint64 CityHash64(const char *buf, size_t len); 68f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 69f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// Hash function for a byte array. For convenience, a 64-bit seed is also 70f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// hashed into the result. 71f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.comuint64 CityHash64WithSeed(const char *buf, size_t len, uint64 seed); 72f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 73f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// Hash function for a byte array. For convenience, two seeds are also 74f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// hashed into the result. 75f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.comuint64 CityHash64WithSeeds(const char *buf, size_t len, 76f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com uint64 seed0, uint64 seed1); 77f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 78f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// Hash function for a byte array. 79f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.comuint128 CityHash128(const char *s, size_t len); 80f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 81f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// Hash function for a byte array. For convenience, a 128-bit seed is also 82f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// hashed into the result. 83f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.comuint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed); 84f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 85f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// Hash 128 input bits down to 64 bits of output. 86f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// This is intended to be a reasonably good hash function. 87f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.cominline uint64 Hash128to64(const uint128& x) { 88f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com // Murmur-inspired hashing. 89f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com const uint64 kMul = 0x9ddfea08eb382d69ULL; 90f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com uint64 a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul; 91f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com a ^= (a >> 47); 92f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com uint64 b = (Uint128High64(x) ^ a) * kMul; 93f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com b ^= (b >> 47); 94f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com b *= kMul; 95f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com return b; 96f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com} 97f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 98f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// Conditionally include declarations for versions of City that require SSE4.2 99f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// instructions to be available. 1008adb1336422e3ad4d78ba54fb56692f2ed07124ctanjent@gmail.com#if defined(__SSE4_2__) && defined(__x86_64__) 101f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 102f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// Hash function for a byte array. 103f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.comuint128 CityHashCrc128(const char *s, size_t len); 104f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 105f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// Hash function for a byte array. For convenience, a 128-bit seed is also 106f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// hashed into the result. 107f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.comuint128 CityHashCrc128WithSeed(const char *s, size_t len, uint128 seed); 108f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 109f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// Hash function for a byte array. Sets result[0] ... result[3]. 110f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.comvoid CityHashCrc256(const char *s, size_t len, uint64 *result); 111f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 112f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com#endif // __SSE4_2__ 113f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 114f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com#endif // CITY_HASH_H_ 115