StringHash.h revision f7ac0f19a1c8d0ad14bcf6456ce368b830fea886
1//===- StringHash.h -------------------------------------------------------===//
2//
3//                     The MCLinker Project
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9#ifndef MCLD_STRING_HASH_FUNCTION_H
10#define MCLD_STRING_HASH_FUNCTION_H
11#ifdef ENABLE_UNITTEST
12#include <gtest.h>
13#endif
14#include <llvm/ADT/StringRef.h>
15#include <llvm/Support/DataTypes.h>
16#include <llvm/Support/ErrorHandling.h>
17#include <cctype>
18#include <functional>
19
20namespace mcld {
21namespace hash {
22
23enum Type {
24  RS,
25  JS,
26  PJW,
27  ELF,
28  BKDR,
29  SDBM,
30  DJB,
31  DEK,
32  BP,
33  FNV,
34  AP,
35  ES
36};
37
38/** \class template<uint32_t TYPE> StringHash
39 *  \brief the template StringHash class, for specification
40 */
41template<uint32_t TYPE>
42struct StringHash : public std::unary_function<const llvm::StringRef&, uint32_t>
43{
44  uint32_t operator()(const llvm::StringRef& pKey) const
45  {
46    llvm::report_fatal_error("Undefined StringHash function.\n");
47  }
48};
49
50/** \class StringHash<RSHash>
51 *  \brief RS StringHash funciton
52 */
53template<>
54struct StringHash<RS> : public std::unary_function<const llvm::StringRef&, uint32_t>
55{
56  uint32_t operator()(const llvm::StringRef& pKey) const
57  {
58    const unsigned int b = 378551;
59    uint32_t a = 63689;
60    uint32_t hash_val = 0;
61
62    for(unsigned int i = 0; i < pKey.size(); ++i) {
63      hash_val = hash_val * a + pKey[i];
64      a = a * b;
65    }
66    return hash_val;
67  }
68};
69
70/** \class StringHash<JSHash>
71 *  \brief JS hash funciton
72 */
73template<>
74struct StringHash<JS> : public std::unary_function<const llvm::StringRef&, uint32_t>
75{
76  uint32_t operator()(const llvm::StringRef& pKey) const
77  {
78    uint32_t hash_val = 1315423911;
79
80    for(unsigned int i = 0; i < pKey.size(); ++i) {
81       hash_val ^= ((hash_val << 5) + pKey[i] + (hash_val >> 2));
82    }
83    return hash_val;
84  }
85};
86
87/** \class StringHash<PJW>
88 *  \brief P.J. Weinberger hash function
89 */
90template<>
91struct StringHash<PJW> : public std::unary_function<const llvm::StringRef&, uint32_t>
92{
93  uint32_t operator()(const llvm::StringRef& pKey) const
94  {
95    const unsigned int BitsInUnsignedInt = (unsigned int)(sizeof(unsigned int) * 8);
96    const unsigned int ThreeQuarters     = (unsigned int)((BitsInUnsignedInt  * 3) / 4);
97    const unsigned int OneEighth         = (unsigned int)(BitsInUnsignedInt / 8);
98    const unsigned int HighBits          = (unsigned int)(0xFFFFFFFF) << (BitsInUnsignedInt - OneEighth);
99    uint32_t hash_val = 0;
100    uint32_t test = 0;
101
102    for(unsigned int i = 0; i < pKey.size(); ++i) {
103      hash_val = (hash_val << OneEighth) + pKey[i];
104
105      if((test = hash_val & HighBits) != 0) {
106        hash_val = (( hash_val ^ (test >> ThreeQuarters)) & (~HighBits));
107      }
108    }
109    return hash_val;
110  }
111};
112
113/** \class StringHash<ELF>
114 *  \brief ELF hash function.
115 */
116template<>
117struct StringHash<ELF> : public std::unary_function<const llvm::StringRef&, uint32_t>
118{
119  uint32_t operator()(const llvm::StringRef& pKey) const
120  {
121    uint32_t hash_val = 0;
122    uint32_t x = 0;
123
124    for (unsigned int i = 0; i < pKey.size(); ++i) {
125      hash_val = (hash_val << 4) + pKey[i];
126      if((x = hash_val & 0xF0000000L) != 0)
127        hash_val ^= (x >> 24);
128      hash_val &= ~x;
129    }
130    return hash_val;
131  }
132};
133
134/** \class StringHash<BKDR>
135 *  \brief BKDR hash function
136 */
137template<>
138struct StringHash<BKDR> : public std::unary_function<const llvm::StringRef&, uint32_t>
139{
140  uint32_t operator()(const llvm::StringRef& pKey) const
141  {
142    const uint32_t seed = 131;
143    uint32_t hash_val = 0;
144
145    for(uint32_t i = 0; i < pKey.size(); ++i)
146      hash_val = (hash_val * seed) + pKey[i];
147    return hash_val;
148  }
149};
150
151
152/** \class StringHash<SDBM>
153 *  \brief SDBM hash function
154 *  0.049s in 100000 test
155 */
156template<>
157struct StringHash<SDBM> : public std::unary_function<const llvm::StringRef&, uint32_t>
158{
159  uint32_t operator()(const llvm::StringRef& pKey) const
160  {
161    uint32_t hash_val = 0;
162
163    for(uint32_t i = 0; i < pKey.size(); ++i)
164      hash_val = pKey[i] + (hash_val << 6) + (hash_val << 16) - hash_val;
165    return hash_val;
166  }
167};
168
169/** \class StringHash<DJB>
170 *  \brief DJB hash function
171 *  0.057s in 100000 test
172 */
173template<>
174struct StringHash<DJB> : public std::unary_function<const llvm::StringRef&, uint32_t>
175{
176  uint32_t operator()(const llvm::StringRef& pKey) const
177  {
178    uint32_t hash_val = 5381;
179
180    for(uint32_t i = 0; i < pKey.size(); ++i)
181      hash_val = ((hash_val << 5) + hash_val) + pKey[i];
182
183    return hash_val;
184  }
185};
186
187/** \class StringHash<DEK>
188 *  \brief DEK hash function
189 *  0.60s
190 */
191template<>
192struct StringHash<DEK> : public std::unary_function<const llvm::StringRef&, uint32_t>
193{
194  uint32_t operator()(const llvm::StringRef& pKey) const
195  {
196    uint32_t hash_val = pKey.size();
197
198    for(uint32_t i = 0; i < pKey.size(); ++i)
199      hash_val = ((hash_val << 5) ^ (hash_val >> 27)) ^ pKey[i];
200
201    return hash_val;
202  }
203};
204
205/** \class StringHash<BP>
206 *  \brief BP hash function
207 *  0.057s
208 */
209template<>
210struct StringHash<BP> : public std::unary_function<const llvm::StringRef&, uint32_t>
211{
212  uint32_t operator()(const llvm::StringRef& pKey) const
213  {
214    uint32_t hash_val = 0;
215    for(uint32_t i = 0; i < pKey.size(); ++i)
216      hash_val = hash_val << 7 ^ pKey[i];
217
218    return hash_val;
219  }
220};
221
222/** \class StringHash<FNV>
223 *  \brief FNV hash function
224 *  0.058s
225 */
226template<>
227struct StringHash<FNV> : public std::unary_function<const llvm::StringRef&, uint32_t>
228{
229  uint32_t operator()(const llvm::StringRef& pKey) const
230  {
231    const uint32_t fnv_prime = 0x811C9DC5;
232    uint32_t hash_val = 0;
233    for(uint32_t i = 0; i < pKey.size(); ++i) {
234      hash_val *= fnv_prime;
235      hash_val ^= pKey[i];
236    }
237
238    return hash_val;
239  }
240};
241
242/** \class StringHash<AP>
243 *  \brief AP hash function
244 *  0.060s
245 */
246template<>
247struct StringHash<AP> : public std::unary_function<const llvm::StringRef&, uint32_t>
248{
249  uint32_t operator()(const llvm::StringRef& pKey) const
250  {
251    unsigned int hash_val = 0xAAAAAAAA;
252
253    for(uint32_t i = 0; i < pKey.size(); ++i) {
254      hash_val ^= ((i & 1) == 0)?
255                          ((hash_val <<  7) ^ pKey[i] * (hash_val >> 3)):
256                          (~((hash_val << 11) + (pKey[i] ^ (hash_val >> 5))));
257    }
258
259    return hash_val;
260  }
261};
262
263/** \class StringHash<ES>
264 *  \brief This is a revision of Edward Sayers' string characteristic function.
265 *
266 *  31-28  27  26  25   -   0
267 *  +----+---+---+------------+
268 *  | .  | N | - | a/A  ~ z/Z |
269 *  +----+---+---+------------+
270 *
271 *  . (bit 31~28) - The number of '.' characters
272 *  N (bit 27)    - Are there any numbers in the string
273 *  - (bit 26)    - Does the string have '-' character
274 *  bit 25~0      - Bit 25 is set only if the string contains a 'a' or 'A', and
275 *                  Bit 24 is set only if ...                   'b' or 'B', ...
276 */
277template<>
278struct StringHash<ES> : public std::unary_function<const llvm::StringRef&, uint32_t>
279{
280  uint32_t operator()(const llvm::StringRef& pString) const
281  {
282    uint32_t result = 0x0;
283    unsigned int dot = 0;
284    std::string::size_type idx;
285    for (idx = 0; idx < pString.size(); ++idx) {
286      int cur_char = pString[idx];
287
288      if ('.' == cur_char) {
289        ++dot;
290        continue;
291      }
292
293      if (isdigit(cur_char)) {
294        result |= (1 << 27);
295        continue;
296      }
297
298      if ('_' == cur_char) {
299        result |= (1 << 26);
300        continue;
301      }
302
303      if (isupper(cur_char)) {
304        result |= (1 << (cur_char - 'A'));
305        continue;
306      }
307
308      if (islower(cur_char)) {
309        result |= (1 << (cur_char - 'a'));
310        continue;
311      }
312    }
313    result |= (dot << 28);
314    return result;
315  }
316
317
318  /** \func may_include
319   *  \brief is it possible that pRule is a sub-string of pInString
320   */
321  static bool may_include(uint32_t pRule, uint32_t pInString)
322  {
323    uint32_t in_c = pInString << 4;
324    uint32_t r_c  = pRule << 4;
325
326    uint32_t res = (in_c ^ r_c) & r_c;
327    if (0 != res)
328      return false;
329
330    uint32_t in_dot = pInString >> 28;
331    uint32_t r_dot  = pRule >> 28;
332    if (r_dot > in_dot)
333      return false;
334
335    return true;
336  }
337};
338
339/** \class template<uint32_t TYPE> StringCompare
340 *  \brief the template StringCompare class, for specification
341 */
342template<typename STRING_TYPE>
343struct StringCompare : public std::binary_function<const STRING_TYPE&, const STRING_TYPE&, bool>
344{
345  bool operator()(const STRING_TYPE& X, const STRING_TYPE& Y) const
346  { return X == Y; }
347};
348
349template<>
350struct StringCompare<const char*> : public std::binary_function<const char*, const char*, bool>
351{
352  bool operator()(const char* X, const char* Y) const
353  { return (0 == std::strcmp(X, Y)); }
354};
355
356template<>
357struct StringCompare<char*> : public std::binary_function<const char*, const char*, bool>
358{
359  bool operator()(const char* X, const char* Y) const
360  { return (0 == std::strcmp(X, Y)); }
361};
362
363} // namespace of hash
364} // namespace of mcld
365
366#endif
367
368