StringHash.h revision 22add6ff3426df1a85089fe6a6e1597ee3b6f300
1//===- StringHash.h ---------------------------------------------------------===//
2//
3//                     The MCLinker Project
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef MCLD_STRING_HASH_FUNCTION_H
11#define MCLD_STRING_HASH_FUNCTION_H
12#ifdef ENABLE_UNITTEST
13#include <gtest.h>
14#endif
15#include <llvm/ADT/StringRef.h>
16#include <llvm/Support/DataTypes.h>
17#include <llvm/Support/ErrorHandling.h>
18#include <cctype>
19#include <functional>
20
21namespace mcld
22{
23
24enum StringHashType
25{
26  RS,
27  JS,
28  PJW,
29  ELF,
30  BKDR,
31  SDBM,
32  DJB,
33  DEK,
34  BP,
35  FNV,
36  AP,
37  ES
38};
39
40/** \class template<uint32_t TYPE> StringHash
41 *  \brief the template StringHash class, for specification
42 */
43template<uint32_t TYPE>
44struct StringHash : public std::unary_function<const llvm::StringRef&, uint32_t>
45{
46  uint32_t operator()(const llvm::StringRef& pKey) const
47  {
48    llvm::report_fatal_error("Undefined StringHash function.\n");
49  }
50};
51
52/** \class StringHash<RSHash>
53 *  \brief RS StringHash funciton
54 */
55template<>
56struct StringHash<RS> : public std::unary_function<const llvm::StringRef&, uint32_t>
57{
58  uint32_t operator()(const llvm::StringRef& pKey) const
59  {
60    const unsigned int b = 378551;
61    uint32_t a = 63689;
62    uint32_t hash_val = 0;
63
64    for(unsigned int i = 0; i < pKey.size(); ++i) {
65      hash_val = hash_val * a + pKey[i];
66      a = a * b;
67    }
68    return hash_val;
69  }
70};
71
72/** \class StringHash<JSHash>
73 *  \brief JS hash funciton
74 */
75template<>
76struct StringHash<JS> : public std::unary_function<const llvm::StringRef&, uint32_t>
77{
78  uint32_t operator()(const llvm::StringRef& pKey) const
79  {
80    uint32_t hash_val = 1315423911;
81
82    for(unsigned int i = 0; i < pKey.size(); ++i) {
83       hash_val ^= ((hash_val << 5) + pKey[i] + (hash_val >> 2));
84    }
85    return hash_val;
86  }
87};
88
89/** \class StringHash<PJW>
90 *  \brief P.J. Weinberger hash function
91 */
92template<>
93struct StringHash<PJW> : public std::unary_function<const llvm::StringRef&, uint32_t>
94{
95  uint32_t operator()(const llvm::StringRef& pKey) const
96  {
97    const unsigned int BitsInUnsignedInt = (unsigned int)(sizeof(unsigned int) * 8);
98    const unsigned int ThreeQuarters     = (unsigned int)((BitsInUnsignedInt  * 3) / 4);
99    const unsigned int OneEighth         = (unsigned int)(BitsInUnsignedInt / 8);
100    const unsigned int HighBits          = (unsigned int)(0xFFFFFFFF) << (BitsInUnsignedInt - OneEighth);
101    uint32_t hash_val = 0;
102    uint32_t test = 0;
103
104    for(unsigned int i = 0; i < pKey.size(); ++i) {
105      hash_val = (hash_val << OneEighth) + pKey[i];
106
107      if((test = hash_val & HighBits) != 0) {
108        hash_val = (( hash_val ^ (test >> ThreeQuarters)) & (~HighBits));
109      }
110    }
111    return hash_val;
112  }
113};
114
115/** \class StringHash<ELF>
116 *  \brief ELF hash function.
117 */
118template<>
119struct StringHash<ELF> : public std::unary_function<const llvm::StringRef&, uint32_t>
120{
121  uint32_t operator()(const llvm::StringRef& pKey) const
122  {
123    uint32_t hash_val = 0;
124    uint32_t x = 0;
125
126    for (unsigned int i = 0; i < pKey.size(); ++i) {
127      hash_val = (hash_val << 4) + pKey[i];
128      if((x = hash_val & 0xF0000000L) != 0)
129        hash_val ^= (x >> 24);
130      hash_val &= ~x;
131    }
132    return hash_val;
133  }
134};
135
136/** \class StringHash<BKDR>
137 *  \brief BKDR hash function
138 */
139template<>
140struct StringHash<BKDR> : public std::unary_function<const llvm::StringRef&, uint32_t>
141{
142  uint32_t operator()(const llvm::StringRef& pKey) const
143  {
144    const uint32_t seed = 131;
145    uint32_t hash_val = 0;
146
147    for(uint32_t i = 0; i < pKey.size(); ++i)
148      hash_val = (hash_val * seed) + pKey[i];
149    return hash_val;
150  }
151};
152
153
154/** \class StringHash<SDBM>
155 *  \brief SDBM hash function
156 *  0.049s in 100000 test
157 */
158template<>
159struct StringHash<SDBM> : public std::unary_function<const llvm::StringRef&, uint32_t>
160{
161  uint32_t operator()(const llvm::StringRef& pKey) const
162  {
163    uint32_t hash_val = 0;
164
165    for(uint32_t i = 0; i < pKey.size(); ++i)
166      hash_val = pKey[i] + (hash_val << 6) + (hash_val << 16) - hash_val;
167    return hash_val;
168  }
169};
170
171/** \class StringHash<DJB>
172 *  \brief DJB hash function
173 *  0.057s in 100000 test
174 */
175template<>
176struct StringHash<DJB> : public std::unary_function<const llvm::StringRef&, uint32_t>
177{
178  uint32_t operator()(const llvm::StringRef& pKey) const
179  {
180    uint32_t hash_val = 5381;
181
182    for(uint32_t i = 0; i < pKey.size(); ++i)
183      hash_val = ((hash_val << 5) + hash_val) + pKey[i];
184
185    return hash_val;
186  }
187};
188
189/** \class StringHash<DEK>
190 *  \brief DEK hash function
191 *  0.60s
192 */
193template<>
194struct StringHash<DEK> : public std::unary_function<const llvm::StringRef&, uint32_t>
195{
196  uint32_t operator()(const llvm::StringRef& pKey) const
197  {
198    uint32_t hash_val = pKey.size();
199
200    for(uint32_t i = 0; i < pKey.size(); ++i)
201      hash_val = ((hash_val << 5) ^ (hash_val >> 27)) ^ pKey[i];
202
203    return hash_val;
204  }
205};
206
207/** \class StringHash<BP>
208 *  \brief BP hash function
209 *  0.057s
210 */
211template<>
212struct StringHash<BP> : public std::unary_function<const llvm::StringRef&, uint32_t>
213{
214  uint32_t operator()(const llvm::StringRef& pKey) const
215  {
216    uint32_t hash_val = 0;
217    for(uint32_t i = 0; i < pKey.size(); ++i)
218      hash_val = hash_val << 7 ^ pKey[i];
219
220    return hash_val;
221  }
222};
223
224/** \class StringHash<FNV>
225 *  \brief FNV hash function
226 *  0.058s
227 */
228template<>
229struct StringHash<FNV> : public std::unary_function<const llvm::StringRef&, uint32_t>
230{
231  uint32_t operator()(const llvm::StringRef& pKey) const
232  {
233    const uint32_t fnv_prime = 0x811C9DC5;
234    uint32_t hash_val = 0;
235    for(uint32_t i = 0; i < pKey.size(); ++i) {
236      hash_val *= fnv_prime;
237      hash_val ^= pKey[i];
238    }
239
240    return hash_val;
241  }
242};
243
244/** \class StringHash<AP>
245 *  \brief AP hash function
246 *  0.060s
247 */
248template<>
249struct StringHash<AP> : public std::unary_function<const llvm::StringRef&, uint32_t>
250{
251  uint32_t operator()(const llvm::StringRef& pKey) const
252  {
253    unsigned int hash_val = 0xAAAAAAAA;
254
255    for(uint32_t i = 0; i < pKey.size(); ++i) {
256      hash_val ^= ((i & 1) == 0)?
257                          ((hash_val <<  7) ^ pKey[i] * (hash_val >> 3)):
258                          (~((hash_val << 11) + (pKey[i] ^ (hash_val >> 5))));
259    }
260
261    return hash_val;
262  }
263};
264
265/** \class StringHash<ES>
266 *  \brief This is a revision of Edward Sayers' string characteristic function.
267 *
268 *  31-28  27  26  25   -   0
269 *  +----+---+---+------------+
270 *  | .  | N | - | a/A  ~ z/Z |
271 *  +----+---+---+------------+
272 *
273 *  . (bit 31~28) - The number of '.' characters
274 *  N (bit 27)    - Are there any numbers in the string
275 *  - (bit 26)    - Does the string have '-' character
276 *  bit 25~0      - Bit 25 is set only if the string contains a 'a' or 'A', and
277 *                  Bit 24 is set only if ...                   'b' or 'B', ...
278 */
279template<>
280struct StringHash<ES> : public std::unary_function<const llvm::StringRef&, uint32_t>
281{
282  uint32_t operator()(const llvm::StringRef& pString) const
283  {
284    uint32_t result = 0x0;
285    unsigned int dot = 0;
286    std::string::size_type idx;
287    for (idx = 0; idx < pString.size(); ++idx) {
288      int cur_char = pString[idx];
289
290      if ('.' == cur_char) {
291        ++dot;
292        continue;
293      }
294
295      if (isdigit(cur_char)) {
296        result |= (1 << 27);
297        continue;
298      }
299
300      if ('_' == cur_char) {
301        result |= (1 << 26);
302        continue;
303      }
304
305      if (isupper(cur_char)) {
306        result |= (1 << (cur_char - 'A'));
307        continue;
308      }
309
310      if (islower(cur_char)) {
311        result |= (1 << (cur_char - 'a'));
312        continue;
313      }
314    }
315    result |= (dot << 28);
316    return result;
317  }
318
319
320  /** \func may_include
321   *  \brief is it possible that pRule is a sub-string of pInString
322   */
323  static bool may_include(uint32_t pRule, uint32_t pInString)
324  {
325    uint32_t in_c = pInString << 4;
326    uint32_t r_c  = pRule << 4;
327
328    uint32_t res = (in_c ^ r_c) & r_c;
329    if (0 != res)
330      return false;
331
332    uint32_t in_dot = pInString >> 28;
333    uint32_t r_dot  = pRule >> 28;
334    if (r_dot > in_dot)
335      return false;
336
337    return true;
338  }
339};
340
341/** \class template<uint32_t TYPE> StringCompare
342 *  \brief the template StringCompare class, for specification
343 */
344template<typename STRING_TYPE>
345struct StringCompare : public std::binary_function<const STRING_TYPE&, const STRING_TYPE&, bool>
346{
347  bool operator()(const STRING_TYPE& X, const STRING_TYPE& Y) const
348  { return X == Y; }
349};
350
351template<>
352struct StringCompare<const char*> : public std::binary_function<const char*, const char*, bool>
353{
354  bool operator()(const char* X, const char* Y) const
355  { return (0 == std::strcmp(X, Y)); }
356};
357
358template<>
359struct StringCompare<char*> : public std::binary_function<const char*, const char*, bool>
360{
361  bool operator()(const char* X, const char* Y) const
362  { return (0 == std::strcmp(X, Y)); }
363};
364
365} // namespace of mcld
366
367#endif
368
369