1/* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#ifndef LIBTEXTCLASSIFIER_UTIL_HASH_FARMHASH_H_ 18#define LIBTEXTCLASSIFIER_UTIL_HASH_FARMHASH_H_ 19 20#include <assert.h> 21#include <stdint.h> 22#include <stdlib.h> 23#include <string.h> // for memcpy and memset 24#include <utility> 25 26#ifndef NAMESPACE_FOR_HASH_FUNCTIONS 27#define NAMESPACE_FOR_HASH_FUNCTIONS tcfarmhash 28#endif 29 30namespace NAMESPACE_FOR_HASH_FUNCTIONS { 31 32#if defined(FARMHASH_UINT128_T_DEFINED) 33inline uint64_t Uint128Low64(const uint128_t x) { 34 return static_cast<uint64_t>(x); 35} 36inline uint64_t Uint128High64(const uint128_t x) { 37 return static_cast<uint64_t>(x >> 64); 38} 39inline uint128_t Uint128(uint64_t lo, uint64_t hi) { 40 return lo + (((uint128_t)hi) << 64); 41} 42#else 43typedef std::pair<uint64_t, uint64_t> uint128_t; 44inline uint64_t Uint128Low64(const uint128_t x) { return x.first; } 45inline uint64_t Uint128High64(const uint128_t x) { return x.second; } 46inline uint128_t Uint128(uint64_t lo, uint64_t hi) { return uint128_t(lo, hi); } 47#endif 48 49 50// BASIC STRING HASHING 51 52// Hash function for a byte array. 53// May change from time to time, may differ on different platforms, may differ 54// depending on NDEBUG. 55size_t Hash(const char* s, size_t len); 56 57// Hash function for a byte array. Most useful in 32-bit binaries. 58// May change from time to time, may differ on different platforms, may differ 59// depending on NDEBUG. 60uint32_t Hash32(const char* s, size_t len); 61 62// Hash function for a byte array. For convenience, a 32-bit seed is also 63// hashed into the result. 64// May change from time to time, may differ on different platforms, may differ 65// depending on NDEBUG. 66uint32_t Hash32WithSeed(const char* s, size_t len, uint32_t seed); 67 68// Hash 128 input bits down to 64 bits of output. 69// Hash function for a byte array. 70// May change from time to time, may differ on different platforms, may differ 71// depending on NDEBUG. 72uint64_t Hash64(const char* s, size_t len); 73 74// Hash function for a byte array. For convenience, a 64-bit seed is also 75// hashed into the result. 76// May change from time to time, may differ on different platforms, may differ 77// depending on NDEBUG. 78uint64_t Hash64WithSeed(const char* s, size_t len, uint64_t seed); 79 80// Hash function for a byte array. For convenience, two seeds are also 81// hashed into the result. 82// May change from time to time, may differ on different platforms, may differ 83// depending on NDEBUG. 84uint64_t Hash64WithSeeds(const char* s, size_t len, 85 uint64_t seed0, uint64_t seed1); 86 87// Hash function for a byte array. 88// May change from time to time, may differ on different platforms, may differ 89// depending on NDEBUG. 90uint128_t Hash128(const char* s, size_t len); 91 92// Hash function for a byte array. For convenience, a 128-bit seed is also 93// hashed into the result. 94// May change from time to time, may differ on different platforms, may differ 95// depending on NDEBUG. 96uint128_t Hash128WithSeed(const char* s, size_t len, uint128_t seed); 97 98// BASIC NON-STRING HASHING 99 100// This is intended to be a reasonably good hash function. 101// May change from time to time, may differ on different platforms, may differ 102// depending on NDEBUG. 103inline uint64_t Hash128to64(uint128_t x) { 104 // Murmur-inspired hashing. 105 const uint64_t kMul = 0x9ddfea08eb382d69ULL; 106 uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul; 107 a ^= (a >> 47); 108 uint64_t b = (Uint128High64(x) ^ a) * kMul; 109 b ^= (b >> 47); 110 b *= kMul; 111 return b; 112} 113 114// FINGERPRINTING (i.e., good, portable, forever-fixed hash functions) 115 116// Fingerprint function for a byte array. Most useful in 32-bit binaries. 117uint32_t Fingerprint32(const char* s, size_t len); 118 119// Fingerprint function for a byte array. 120uint64_t Fingerprint64(const char* s, size_t len); 121 122// Fingerprint function for a byte array. 123uint128_t Fingerprint128(const char* s, size_t len); 124 125// This is intended to be a good fingerprinting primitive. 126// See below for more overloads. 127inline uint64_t Fingerprint(uint128_t x) { 128 // Murmur-inspired hashing. 129 const uint64_t kMul = 0x9ddfea08eb382d69ULL; 130 uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul; 131 a ^= (a >> 47); 132 uint64_t b = (Uint128High64(x) ^ a) * kMul; 133 b ^= (b >> 44); 134 b *= kMul; 135 b ^= (b >> 41); 136 b *= kMul; 137 return b; 138} 139 140// This is intended to be a good fingerprinting primitive. 141inline uint64_t Fingerprint(uint64_t x) { 142 // Murmur-inspired hashing. 143 const uint64_t kMul = 0x9ddfea08eb382d69ULL; 144 uint64_t b = x * kMul; 145 b ^= (b >> 44); 146 b *= kMul; 147 b ^= (b >> 41); 148 b *= kMul; 149 return b; 150} 151 152#ifndef FARMHASH_NO_CXX_STRING 153 154// Convenience functions to hash or fingerprint C++ strings. 155// These require that Str::data() return a pointer to the first char 156// (as a const char*) and that Str::length() return the string's length; 157// they work with std::string, for example. 158 159// Hash function for a byte array. Most useful in 32-bit binaries. 160// May change from time to time, may differ on different platforms, may differ 161// depending on NDEBUG. 162template <typename Str> 163inline size_t Hash(const Str& s) { 164 assert(sizeof(s[0]) == 1); 165 return Hash(s.data(), s.length()); 166} 167 168// Hash function for a byte array. Most useful in 32-bit binaries. 169// May change from time to time, may differ on different platforms, may differ 170// depending on NDEBUG. 171template <typename Str> 172inline uint32_t Hash32(const Str& s) { 173 assert(sizeof(s[0]) == 1); 174 return Hash32(s.data(), s.length()); 175} 176 177// Hash function for a byte array. For convenience, a 32-bit seed is also 178// hashed into the result. 179// May change from time to time, may differ on different platforms, may differ 180// depending on NDEBUG. 181template <typename Str> 182inline uint32_t Hash32WithSeed(const Str& s, uint32_t seed) { 183 assert(sizeof(s[0]) == 1); 184 return Hash32WithSeed(s.data(), s.length(), seed); 185} 186 187// Hash 128 input bits down to 64 bits of output. 188// Hash function for a byte array. 189// May change from time to time, may differ on different platforms, may differ 190// depending on NDEBUG. 191template <typename Str> 192inline uint64_t Hash64(const Str& s) { 193 assert(sizeof(s[0]) == 1); 194 return Hash64(s.data(), s.length()); 195} 196 197// Hash function for a byte array. For convenience, a 64-bit seed is also 198// hashed into the result. 199// May change from time to time, may differ on different platforms, may differ 200// depending on NDEBUG. 201template <typename Str> 202inline uint64_t Hash64WithSeed(const Str& s, uint64_t seed) { 203 assert(sizeof(s[0]) == 1); 204 return Hash64WithSeed(s.data(), s.length(), seed); 205} 206 207// Hash function for a byte array. For convenience, two seeds are also 208// hashed into the result. 209// May change from time to time, may differ on different platforms, may differ 210// depending on NDEBUG. 211template <typename Str> 212inline uint64_t Hash64WithSeeds(const Str& s, uint64_t seed0, uint64_t seed1) { 213 assert(sizeof(s[0]) == 1); 214 return Hash64WithSeeds(s.data(), s.length(), seed0, seed1); 215} 216 217// Hash function for a byte array. 218// May change from time to time, may differ on different platforms, may differ 219// depending on NDEBUG. 220template <typename Str> 221inline uint128_t Hash128(const Str& s) { 222 assert(sizeof(s[0]) == 1); 223 return Hash128(s.data(), s.length()); 224} 225 226// Hash function for a byte array. For convenience, a 128-bit seed is also 227// hashed into the result. 228// May change from time to time, may differ on different platforms, may differ 229// depending on NDEBUG. 230template <typename Str> 231inline uint128_t Hash128WithSeed(const Str& s, uint128_t seed) { 232 assert(sizeof(s[0]) == 1); 233 return Hash128(s.data(), s.length(), seed); 234} 235 236// FINGERPRINTING (i.e., good, portable, forever-fixed hash functions) 237 238// Fingerprint function for a byte array. Most useful in 32-bit binaries. 239template <typename Str> 240inline uint32_t Fingerprint32(const Str& s) { 241 assert(sizeof(s[0]) == 1); 242 return Fingerprint32(s.data(), s.length()); 243} 244 245// Fingerprint 128 input bits down to 64 bits of output. 246// Fingerprint function for a byte array. 247template <typename Str> 248inline uint64_t Fingerprint64(const Str& s) { 249 assert(sizeof(s[0]) == 1); 250 return Fingerprint64(s.data(), s.length()); 251} 252 253// Fingerprint function for a byte array. 254template <typename Str> 255inline uint128_t Fingerprint128(const Str& s) { 256 assert(sizeof(s[0]) == 1); 257 return Fingerprint128(s.data(), s.length()); 258} 259 260#endif 261 262} // namespace NAMESPACE_FOR_HASH_FUNCTIONS 263 264#endif // LIBTEXTCLASSIFIER_UTIL_HASH_FARMHASH_H_ 265