1/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef LIBTEXTCLASSIFIER_UTIL_HASH_FARMHASH_H_
18#define LIBTEXTCLASSIFIER_UTIL_HASH_FARMHASH_H_
19
20#include <assert.h>
21#include <stdint.h>
22#include <stdlib.h>
23#include <string.h>   // for memcpy and memset
24#include <utility>
25
26#ifndef NAMESPACE_FOR_HASH_FUNCTIONS
27#define NAMESPACE_FOR_HASH_FUNCTIONS tcfarmhash
28#endif
29
30namespace NAMESPACE_FOR_HASH_FUNCTIONS {
31
32#if defined(FARMHASH_UINT128_T_DEFINED)
33inline uint64_t Uint128Low64(const uint128_t x) {
34  return static_cast<uint64_t>(x);
35}
36inline uint64_t Uint128High64(const uint128_t x) {
37  return static_cast<uint64_t>(x >> 64);
38}
39inline uint128_t Uint128(uint64_t lo, uint64_t hi) {
40  return lo + (((uint128_t)hi) << 64);
41}
42#else
43typedef std::pair<uint64_t, uint64_t> uint128_t;
44inline uint64_t Uint128Low64(const uint128_t x) { return x.first; }
45inline uint64_t Uint128High64(const uint128_t x) { return x.second; }
46inline uint128_t Uint128(uint64_t lo, uint64_t hi) { return uint128_t(lo, hi); }
47#endif
48
49
50// BASIC STRING HASHING
51
52// Hash function for a byte array.
53// May change from time to time, may differ on different platforms, may differ
54// depending on NDEBUG.
55size_t Hash(const char* s, size_t len);
56
57// Hash function for a byte array.  Most useful in 32-bit binaries.
58// May change from time to time, may differ on different platforms, may differ
59// depending on NDEBUG.
60uint32_t Hash32(const char* s, size_t len);
61
62// Hash function for a byte array.  For convenience, a 32-bit seed is also
63// hashed into the result.
64// May change from time to time, may differ on different platforms, may differ
65// depending on NDEBUG.
66uint32_t Hash32WithSeed(const char* s, size_t len, uint32_t seed);
67
68// Hash 128 input bits down to 64 bits of output.
69// Hash function for a byte array.
70// May change from time to time, may differ on different platforms, may differ
71// depending on NDEBUG.
72uint64_t Hash64(const char* s, size_t len);
73
74// Hash function for a byte array.  For convenience, a 64-bit seed is also
75// hashed into the result.
76// May change from time to time, may differ on different platforms, may differ
77// depending on NDEBUG.
78uint64_t Hash64WithSeed(const char* s, size_t len, uint64_t seed);
79
80// Hash function for a byte array.  For convenience, two seeds are also
81// hashed into the result.
82// May change from time to time, may differ on different platforms, may differ
83// depending on NDEBUG.
84uint64_t Hash64WithSeeds(const char* s, size_t len,
85                       uint64_t seed0, uint64_t seed1);
86
87// Hash function for a byte array.
88// May change from time to time, may differ on different platforms, may differ
89// depending on NDEBUG.
90uint128_t Hash128(const char* s, size_t len);
91
92// Hash function for a byte array.  For convenience, a 128-bit seed is also
93// hashed into the result.
94// May change from time to time, may differ on different platforms, may differ
95// depending on NDEBUG.
96uint128_t Hash128WithSeed(const char* s, size_t len, uint128_t seed);
97
98// BASIC NON-STRING HASHING
99
100// This is intended to be a reasonably good hash function.
101// May change from time to time, may differ on different platforms, may differ
102// depending on NDEBUG.
103inline uint64_t Hash128to64(uint128_t x) {
104  // Murmur-inspired hashing.
105  const uint64_t kMul = 0x9ddfea08eb382d69ULL;
106  uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
107  a ^= (a >> 47);
108  uint64_t b = (Uint128High64(x) ^ a) * kMul;
109  b ^= (b >> 47);
110  b *= kMul;
111  return b;
112}
113
114// FINGERPRINTING (i.e., good, portable, forever-fixed hash functions)
115
116// Fingerprint function for a byte array.  Most useful in 32-bit binaries.
117uint32_t Fingerprint32(const char* s, size_t len);
118
119// Fingerprint function for a byte array.
120uint64_t Fingerprint64(const char* s, size_t len);
121
122// Fingerprint function for a byte array.
123uint128_t Fingerprint128(const char* s, size_t len);
124
125// This is intended to be a good fingerprinting primitive.
126// See below for more overloads.
127inline uint64_t Fingerprint(uint128_t x) {
128  // Murmur-inspired hashing.
129  const uint64_t kMul = 0x9ddfea08eb382d69ULL;
130  uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
131  a ^= (a >> 47);
132  uint64_t b = (Uint128High64(x) ^ a) * kMul;
133  b ^= (b >> 44);
134  b *= kMul;
135  b ^= (b >> 41);
136  b *= kMul;
137  return b;
138}
139
140// This is intended to be a good fingerprinting primitive.
141inline uint64_t Fingerprint(uint64_t x) {
142  // Murmur-inspired hashing.
143  const uint64_t kMul = 0x9ddfea08eb382d69ULL;
144  uint64_t b = x * kMul;
145  b ^= (b >> 44);
146  b *= kMul;
147  b ^= (b >> 41);
148  b *= kMul;
149  return b;
150}
151
152#ifndef FARMHASH_NO_CXX_STRING
153
154// Convenience functions to hash or fingerprint C++ strings.
155// These require that Str::data() return a pointer to the first char
156// (as a const char*) and that Str::length() return the string's length;
157// they work with std::string, for example.
158
159// Hash function for a byte array.  Most useful in 32-bit binaries.
160// May change from time to time, may differ on different platforms, may differ
161// depending on NDEBUG.
162template <typename Str>
163inline size_t Hash(const Str& s) {
164  assert(sizeof(s[0]) == 1);
165  return Hash(s.data(), s.length());
166}
167
168// Hash function for a byte array.  Most useful in 32-bit binaries.
169// May change from time to time, may differ on different platforms, may differ
170// depending on NDEBUG.
171template <typename Str>
172inline uint32_t Hash32(const Str& s) {
173  assert(sizeof(s[0]) == 1);
174  return Hash32(s.data(), s.length());
175}
176
177// Hash function for a byte array.  For convenience, a 32-bit seed is also
178// hashed into the result.
179// May change from time to time, may differ on different platforms, may differ
180// depending on NDEBUG.
181template <typename Str>
182inline uint32_t Hash32WithSeed(const Str& s, uint32_t seed) {
183  assert(sizeof(s[0]) == 1);
184  return Hash32WithSeed(s.data(), s.length(), seed);
185}
186
187// Hash 128 input bits down to 64 bits of output.
188// Hash function for a byte array.
189// May change from time to time, may differ on different platforms, may differ
190// depending on NDEBUG.
191template <typename Str>
192inline uint64_t Hash64(const Str& s) {
193  assert(sizeof(s[0]) == 1);
194  return Hash64(s.data(), s.length());
195}
196
197// Hash function for a byte array.  For convenience, a 64-bit seed is also
198// hashed into the result.
199// May change from time to time, may differ on different platforms, may differ
200// depending on NDEBUG.
201template <typename Str>
202inline uint64_t Hash64WithSeed(const Str& s, uint64_t seed) {
203  assert(sizeof(s[0]) == 1);
204  return Hash64WithSeed(s.data(), s.length(), seed);
205}
206
207// Hash function for a byte array.  For convenience, two seeds are also
208// hashed into the result.
209// May change from time to time, may differ on different platforms, may differ
210// depending on NDEBUG.
211template <typename Str>
212inline uint64_t Hash64WithSeeds(const Str& s, uint64_t seed0, uint64_t seed1) {
213  assert(sizeof(s[0]) == 1);
214  return Hash64WithSeeds(s.data(), s.length(), seed0, seed1);
215}
216
217// Hash function for a byte array.
218// May change from time to time, may differ on different platforms, may differ
219// depending on NDEBUG.
220template <typename Str>
221inline uint128_t Hash128(const Str& s) {
222  assert(sizeof(s[0]) == 1);
223  return Hash128(s.data(), s.length());
224}
225
226// Hash function for a byte array.  For convenience, a 128-bit seed is also
227// hashed into the result.
228// May change from time to time, may differ on different platforms, may differ
229// depending on NDEBUG.
230template <typename Str>
231inline uint128_t Hash128WithSeed(const Str& s, uint128_t seed) {
232  assert(sizeof(s[0]) == 1);
233  return Hash128(s.data(), s.length(), seed);
234}
235
236// FINGERPRINTING (i.e., good, portable, forever-fixed hash functions)
237
238// Fingerprint function for a byte array.  Most useful in 32-bit binaries.
239template <typename Str>
240inline uint32_t Fingerprint32(const Str& s) {
241  assert(sizeof(s[0]) == 1);
242  return Fingerprint32(s.data(), s.length());
243}
244
245// Fingerprint 128 input bits down to 64 bits of output.
246// Fingerprint function for a byte array.
247template <typename Str>
248inline uint64_t Fingerprint64(const Str& s) {
249  assert(sizeof(s[0]) == 1);
250  return Fingerprint64(s.data(), s.length());
251}
252
253// Fingerprint function for a byte array.
254template <typename Str>
255inline uint128_t Fingerprint128(const Str& s) {
256  assert(sizeof(s[0]) == 1);
257  return Fingerprint128(s.data(), s.length());
258}
259
260#endif
261
262}  // namespace NAMESPACE_FOR_HASH_FUNCTIONS
263
264#endif  // LIBTEXTCLASSIFIER_UTIL_HASH_FARMHASH_H_
265