StringHash.h revision 37b74a387bb3993387029859c2d9d051c41c724e
1//===- StringHash.h -------------------------------------------------------===//
2//
3//                     The MCLinker Project
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9#ifndef MCLD_ADT_STRINGHASH_H_
10#define MCLD_ADT_STRINGHASH_H_
11
12#include <llvm/ADT/StringRef.h>
13#include <llvm/Support/DataTypes.h>
14
15#include <cassert>
16#include <cctype>
17#include <functional>
18
19namespace mcld {
20namespace hash {
21
22enum Type { RS, JS, PJW, ELF, BKDR, SDBM, DJB, DEK, BP, FNV, AP, ES };
23
24/** \class template<uint32_t TYPE> StringHash
25 *  \brief the template StringHash class, for specification
26 */
27template <uint32_t TYPE>
28struct StringHash
29    : public std::unary_function<const llvm::StringRef, uint32_t> {
30  uint32_t operator()(const llvm::StringRef pKey) const {
31    assert(false && "Undefined StringHash function.\n");
32    return 0;
33  }
34};
35
36/** \class StringHash<RSHash>
37 *  \brief RS StringHash funciton
38 */
39template <>
40struct StringHash<RS>
41    : public std::unary_function<const llvm::StringRef, uint32_t> {
42  uint32_t operator()(const llvm::StringRef pKey) const {
43    const unsigned int b = 378551;
44    uint32_t a = 63689;
45    uint32_t hash_val = 0;
46
47    for (unsigned int i = 0; i < pKey.size(); ++i) {
48      hash_val = hash_val * a + pKey[i];
49      a = a * b;
50    }
51    return hash_val;
52  }
53};
54
55/** \class StringHash<JSHash>
56 *  \brief JS hash funciton
57 */
58template <>
59struct StringHash<JS>
60    : public std::unary_function<const llvm::StringRef, uint32_t> {
61  uint32_t operator()(const llvm::StringRef pKey) const {
62    uint32_t hash_val = 1315423911;
63
64    for (unsigned int i = 0; i < pKey.size(); ++i) {
65      hash_val ^= ((hash_val << 5) + pKey[i] + (hash_val >> 2));
66    }
67    return hash_val;
68  }
69};
70
71/** \class StringHash<PJW>
72 *  \brief P.J. Weinberger hash function
73 */
74template <>
75struct StringHash<PJW>
76    : public std::unary_function<const llvm::StringRef, uint32_t> {
77  uint32_t operator()(const llvm::StringRef pKey) const {
78    const unsigned int BitsInUnsignedInt =
79        (unsigned int)(sizeof(unsigned int) * 8);
80    const unsigned int ThreeQuarters =
81        (unsigned int)((BitsInUnsignedInt * 3) / 4);
82    const unsigned int OneEighth = (unsigned int)(BitsInUnsignedInt / 8);
83    const unsigned int HighBits = (unsigned int)(0xFFFFFFFF)
84                                  << (BitsInUnsignedInt - OneEighth);
85    uint32_t hash_val = 0;
86    uint32_t test = 0;
87
88    for (unsigned int i = 0; i < pKey.size(); ++i) {
89      hash_val = (hash_val << OneEighth) + pKey[i];
90
91      if ((test = hash_val & HighBits) != 0) {
92        hash_val = ((hash_val ^ (test >> ThreeQuarters)) & (~HighBits));
93      }
94    }
95    return hash_val;
96  }
97};
98
99/** \class StringHash<ELF>
100 *  \brief ELF hash function.
101 */
102template <>
103struct StringHash<ELF>
104    : public std::unary_function<const llvm::StringRef, uint32_t> {
105  uint32_t operator()(const llvm::StringRef pKey) const {
106    uint32_t hash_val = 0;
107    uint32_t x = 0;
108
109    for (unsigned int i = 0; i < pKey.size(); ++i) {
110      hash_val = (hash_val << 4) + pKey[i];
111      if ((x = hash_val & 0xF0000000L) != 0)
112        hash_val ^= (x >> 24);
113      hash_val &= ~x;
114    }
115    return hash_val;
116  }
117};
118
119/** \class StringHash<BKDR>
120 *  \brief BKDR hash function
121 */
122template <>
123struct StringHash<BKDR>
124    : public std::unary_function<const llvm::StringRef, uint32_t> {
125  uint32_t operator()(const llvm::StringRef pKey) const {
126    const uint32_t seed = 131;
127    uint32_t hash_val = 0;
128
129    for (uint32_t i = 0; i < pKey.size(); ++i)
130      hash_val = (hash_val * seed) + pKey[i];
131    return hash_val;
132  }
133};
134
135/** \class StringHash<SDBM>
136 *  \brief SDBM hash function
137 *  0.049s in 100000 test
138 */
139template <>
140struct StringHash<SDBM>
141    : public std::unary_function<const llvm::StringRef, uint32_t> {
142  uint32_t operator()(const llvm::StringRef pKey) const {
143    uint32_t hash_val = 0;
144
145    for (uint32_t i = 0; i < pKey.size(); ++i)
146      hash_val = pKey[i] + (hash_val << 6) + (hash_val << 16) - hash_val;
147    return hash_val;
148  }
149};
150
151/** \class StringHash<DJB>
152 *  \brief DJB hash function
153 *  0.057s in 100000 test
154 */
155template <>
156struct StringHash<DJB>
157    : public std::unary_function<const llvm::StringRef, uint32_t> {
158  uint32_t operator()(const llvm::StringRef pKey) const {
159    uint32_t hash_val = 5381;
160
161    for (uint32_t i = 0; i < pKey.size(); ++i)
162      hash_val = ((hash_val << 5) + hash_val) + pKey[i];
163
164    return hash_val;
165  }
166};
167
168/** \class StringHash<DEK>
169 *  \brief DEK hash function
170 *  0.60s
171 */
172template <>
173struct StringHash<DEK>
174    : public std::unary_function<const llvm::StringRef, uint32_t> {
175  uint32_t operator()(const llvm::StringRef pKey) const {
176    uint32_t hash_val = pKey.size();
177
178    for (uint32_t i = 0; i < pKey.size(); ++i)
179      hash_val = ((hash_val << 5) ^ (hash_val >> 27)) ^ pKey[i];
180
181    return hash_val;
182  }
183};
184
185/** \class StringHash<BP>
186 *  \brief BP hash function
187 *  0.057s
188 */
189template <>
190struct StringHash<BP>
191    : public std::unary_function<const llvm::StringRef, uint32_t> {
192  uint32_t operator()(const llvm::StringRef pKey) const {
193    uint32_t hash_val = 0;
194    for (uint32_t i = 0; i < pKey.size(); ++i)
195      hash_val = hash_val << 7 ^ pKey[i];
196
197    return hash_val;
198  }
199};
200
201/** \class StringHash<FNV>
202 *  \brief FNV hash function
203 *  0.058s
204 */
205template <>
206struct StringHash<FNV>
207    : public std::unary_function<const llvm::StringRef, uint32_t> {
208  uint32_t operator()(const llvm::StringRef pKey) const {
209    const uint32_t fnv_prime = 0x811C9DC5;
210    uint32_t hash_val = 0;
211    for (uint32_t i = 0; i < pKey.size(); ++i) {
212      hash_val *= fnv_prime;
213      hash_val ^= pKey[i];
214    }
215
216    return hash_val;
217  }
218};
219
220/** \class StringHash<AP>
221 *  \brief AP hash function
222 *  0.060s
223 */
224template <>
225struct StringHash<AP>
226    : public std::unary_function<const llvm::StringRef, uint32_t> {
227  uint32_t operator()(const llvm::StringRef pKey) const {
228    unsigned int hash_val = 0xAAAAAAAA;
229
230    for (uint32_t i = 0; i < pKey.size(); ++i) {
231      hash_val ^= ((i & 1) == 0)
232                      ? ((hash_val << 7) ^ pKey[i] * (hash_val >> 3))
233                      : (~((hash_val << 11) + (pKey[i] ^ (hash_val >> 5))));
234    }
235
236    return hash_val;
237  }
238};
239
240/** \class StringHash<ES>
241 *  \brief This is a revision of Edward Sayers' string characteristic function.
242 *
243 *  31-28  27  26  25   -   0
244 *  +----+---+---+------------+
245 *  | .  | N | - | a/A  ~ z/Z |
246 *  +----+---+---+------------+
247 *
248 *  . (bit 31~28) - The number of '.' characters
249 *  N (bit 27)    - Are there any numbers in the string
250 *  - (bit 26)    - Does the string have '-' character
251 *  bit 25~0      - Bit 25 is set only if the string contains a 'a' or 'A', and
252 *                  Bit 24 is set only if ...                   'b' or 'B', ...
253 */
254template <>
255struct StringHash<ES>
256    : public std::unary_function<const llvm::StringRef, uint32_t> {
257  uint32_t operator()(const llvm::StringRef pString) const {
258    uint32_t result = 0x0;
259    unsigned int dot = 0;
260    std::string::size_type idx;
261    for (idx = 0; idx < pString.size(); ++idx) {
262      int cur_char = pString[idx];
263
264      if ('.' == cur_char) {
265        ++dot;
266        continue;
267      }
268
269      if (isdigit(cur_char)) {
270        result |= (1 << 27);
271        continue;
272      }
273
274      if ('_' == cur_char) {
275        result |= (1 << 26);
276        continue;
277      }
278
279      if (isupper(cur_char)) {
280        result |= (1 << (cur_char - 'A'));
281        continue;
282      }
283
284      if (islower(cur_char)) {
285        result |= (1 << (cur_char - 'a'));
286        continue;
287      }
288    }
289    result |= (dot << 28);
290    return result;
291  }
292
293  /** \func may_include
294   *  \brief is it possible that pRule is a sub-string of pInString
295   */
296  static bool may_include(uint32_t pRule, uint32_t pInString) {
297    uint32_t in_c = pInString << 4;
298    uint32_t r_c = pRule << 4;
299
300    uint32_t res = (in_c ^ r_c) & r_c;
301    if (0 != res)
302      return false;
303
304    uint32_t in_dot = pInString >> 28;
305    uint32_t r_dot = pRule >> 28;
306    if (r_dot > in_dot)
307      return false;
308
309    return true;
310  }
311};
312
313/** \class template<uint32_t TYPE> StringCompare
314 *  \brief the template StringCompare class, for specification
315 */
316template <typename STRING_TYPE>
317struct StringCompare : public std::binary_function<const STRING_TYPE&,
318                                                   const STRING_TYPE&,
319                                                   bool> {
320  bool operator()(const STRING_TYPE& X, const STRING_TYPE& Y) const {
321    return X == Y;
322  }
323};
324
325template <>
326struct StringCompare<const char*>
327    : public std::binary_function<const char*, const char*, bool> {
328  bool operator()(const char* X, const char* Y) const {
329    return (std::strcmp(X, Y) == 0);
330  }
331};
332
333template <>
334struct StringCompare<char*>
335    : public std::binary_function<const char*, const char*, bool> {
336  bool operator()(const char* X, const char* Y) const {
337    return (std::strcmp(X, Y) == 0);
338  }
339};
340
341}  // namespace hash
342}  // namespace mcld
343
344#endif  // MCLD_ADT_STRINGHASH_H_
345