StringHash.h revision 533eae20118036f425f27bf0536ef0ccbb090b65
1//===- StringHash.h -------------------------------------------------------===//
2//
3//                     The MCLinker Project
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9#ifndef MCLD_ADT_STRINGHASH_H
10#define MCLD_ADT_STRINGHASH_H
11#include <llvm/ADT/StringRef.h>
12#include <llvm/Support/DataTypes.h>
13#include <cctype>
14#include <cassert>
15#include <functional>
16
17namespace mcld {
18namespace hash {
19
20enum Type {
21  RS,
22  JS,
23  PJW,
24  ELF,
25  BKDR,
26  SDBM,
27  DJB,
28  DEK,
29  BP,
30  FNV,
31  AP,
32  ES
33};
34
35/** \class template<uint32_t TYPE> StringHash
36 *  \brief the template StringHash class, for specification
37 */
38template<uint32_t TYPE>
39struct StringHash : public std::unary_function<const llvm::StringRef&, uint32_t>
40{
41  uint32_t operator()(const llvm::StringRef& pKey) const
42  {
43    assert(false && "Undefined StringHash function.\n");
44    return 0;
45  }
46};
47
48/** \class StringHash<RSHash>
49 *  \brief RS StringHash funciton
50 */
51template<>
52struct StringHash<RS> : public std::unary_function<const llvm::StringRef&, uint32_t>
53{
54  uint32_t operator()(const llvm::StringRef& pKey) const
55  {
56    const unsigned int b = 378551;
57    uint32_t a = 63689;
58    uint32_t hash_val = 0;
59
60    for(unsigned int i = 0; i < pKey.size(); ++i) {
61      hash_val = hash_val * a + pKey[i];
62      a = a * b;
63    }
64    return hash_val;
65  }
66};
67
68/** \class StringHash<JSHash>
69 *  \brief JS hash funciton
70 */
71template<>
72struct StringHash<JS> : public std::unary_function<const llvm::StringRef&, uint32_t>
73{
74  uint32_t operator()(const llvm::StringRef& pKey) const
75  {
76    uint32_t hash_val = 1315423911;
77
78    for(unsigned int i = 0; i < pKey.size(); ++i) {
79       hash_val ^= ((hash_val << 5) + pKey[i] + (hash_val >> 2));
80    }
81    return hash_val;
82  }
83};
84
85/** \class StringHash<PJW>
86 *  \brief P.J. Weinberger hash function
87 */
88template<>
89struct StringHash<PJW> : public std::unary_function<const llvm::StringRef&, uint32_t>
90{
91  uint32_t operator()(const llvm::StringRef& pKey) const
92  {
93    const unsigned int BitsInUnsignedInt = (unsigned int)(sizeof(unsigned int) * 8);
94    const unsigned int ThreeQuarters     = (unsigned int)((BitsInUnsignedInt  * 3) / 4);
95    const unsigned int OneEighth         = (unsigned int)(BitsInUnsignedInt / 8);
96    const unsigned int HighBits          = (unsigned int)(0xFFFFFFFF) << (BitsInUnsignedInt - OneEighth);
97    uint32_t hash_val = 0;
98    uint32_t test = 0;
99
100    for(unsigned int i = 0; i < pKey.size(); ++i) {
101      hash_val = (hash_val << OneEighth) + pKey[i];
102
103      if((test = hash_val & HighBits) != 0) {
104        hash_val = (( hash_val ^ (test >> ThreeQuarters)) & (~HighBits));
105      }
106    }
107    return hash_val;
108  }
109};
110
111/** \class StringHash<ELF>
112 *  \brief ELF hash function.
113 */
114template<>
115struct StringHash<ELF> : public std::unary_function<const llvm::StringRef&, uint32_t>
116{
117  uint32_t operator()(const llvm::StringRef& pKey) const
118  {
119    uint32_t hash_val = 0;
120    uint32_t x = 0;
121
122    for (unsigned int i = 0; i < pKey.size(); ++i) {
123      hash_val = (hash_val << 4) + pKey[i];
124      if((x = hash_val & 0xF0000000L) != 0)
125        hash_val ^= (x >> 24);
126      hash_val &= ~x;
127    }
128    return hash_val;
129  }
130};
131
132/** \class StringHash<BKDR>
133 *  \brief BKDR hash function
134 */
135template<>
136struct StringHash<BKDR> : public std::unary_function<const llvm::StringRef&, uint32_t>
137{
138  uint32_t operator()(const llvm::StringRef& pKey) const
139  {
140    const uint32_t seed = 131;
141    uint32_t hash_val = 0;
142
143    for(uint32_t i = 0; i < pKey.size(); ++i)
144      hash_val = (hash_val * seed) + pKey[i];
145    return hash_val;
146  }
147};
148
149
150/** \class StringHash<SDBM>
151 *  \brief SDBM hash function
152 *  0.049s in 100000 test
153 */
154template<>
155struct StringHash<SDBM> : public std::unary_function<const llvm::StringRef&, uint32_t>
156{
157  uint32_t operator()(const llvm::StringRef& pKey) const
158  {
159    uint32_t hash_val = 0;
160
161    for(uint32_t i = 0; i < pKey.size(); ++i)
162      hash_val = pKey[i] + (hash_val << 6) + (hash_val << 16) - hash_val;
163    return hash_val;
164  }
165};
166
167/** \class StringHash<DJB>
168 *  \brief DJB hash function
169 *  0.057s in 100000 test
170 */
171template<>
172struct StringHash<DJB> : public std::unary_function<const llvm::StringRef&, uint32_t>
173{
174  uint32_t operator()(const llvm::StringRef& pKey) const
175  {
176    uint32_t hash_val = 5381;
177
178    for(uint32_t i = 0; i < pKey.size(); ++i)
179      hash_val = ((hash_val << 5) + hash_val) + pKey[i];
180
181    return hash_val;
182  }
183};
184
185/** \class StringHash<DEK>
186 *  \brief DEK hash function
187 *  0.60s
188 */
189template<>
190struct StringHash<DEK> : public std::unary_function<const llvm::StringRef&, uint32_t>
191{
192  uint32_t operator()(const llvm::StringRef& pKey) const
193  {
194    uint32_t hash_val = pKey.size();
195
196    for(uint32_t i = 0; i < pKey.size(); ++i)
197      hash_val = ((hash_val << 5) ^ (hash_val >> 27)) ^ pKey[i];
198
199    return hash_val;
200  }
201};
202
203/** \class StringHash<BP>
204 *  \brief BP hash function
205 *  0.057s
206 */
207template<>
208struct StringHash<BP> : public std::unary_function<const llvm::StringRef&, uint32_t>
209{
210  uint32_t operator()(const llvm::StringRef& pKey) const
211  {
212    uint32_t hash_val = 0;
213    for(uint32_t i = 0; i < pKey.size(); ++i)
214      hash_val = hash_val << 7 ^ pKey[i];
215
216    return hash_val;
217  }
218};
219
220/** \class StringHash<FNV>
221 *  \brief FNV hash function
222 *  0.058s
223 */
224template<>
225struct StringHash<FNV> : public std::unary_function<const llvm::StringRef&, uint32_t>
226{
227  uint32_t operator()(const llvm::StringRef& pKey) const
228  {
229    const uint32_t fnv_prime = 0x811C9DC5;
230    uint32_t hash_val = 0;
231    for(uint32_t i = 0; i < pKey.size(); ++i) {
232      hash_val *= fnv_prime;
233      hash_val ^= pKey[i];
234    }
235
236    return hash_val;
237  }
238};
239
240/** \class StringHash<AP>
241 *  \brief AP hash function
242 *  0.060s
243 */
244template<>
245struct StringHash<AP> : public std::unary_function<const llvm::StringRef&, uint32_t>
246{
247  uint32_t operator()(const llvm::StringRef& pKey) const
248  {
249    unsigned int hash_val = 0xAAAAAAAA;
250
251    for(uint32_t i = 0; i < pKey.size(); ++i) {
252      hash_val ^= ((i & 1) == 0)?
253                          ((hash_val <<  7) ^ pKey[i] * (hash_val >> 3)):
254                          (~((hash_val << 11) + (pKey[i] ^ (hash_val >> 5))));
255    }
256
257    return hash_val;
258  }
259};
260
261/** \class StringHash<ES>
262 *  \brief This is a revision of Edward Sayers' string characteristic function.
263 *
264 *  31-28  27  26  25   -   0
265 *  +----+---+---+------------+
266 *  | .  | N | - | a/A  ~ z/Z |
267 *  +----+---+---+------------+
268 *
269 *  . (bit 31~28) - The number of '.' characters
270 *  N (bit 27)    - Are there any numbers in the string
271 *  - (bit 26)    - Does the string have '-' character
272 *  bit 25~0      - Bit 25 is set only if the string contains a 'a' or 'A', and
273 *                  Bit 24 is set only if ...                   'b' or 'B', ...
274 */
275template<>
276struct StringHash<ES> : public std::unary_function<const llvm::StringRef&, uint32_t>
277{
278  uint32_t operator()(const llvm::StringRef& pString) const
279  {
280    uint32_t result = 0x0;
281    unsigned int dot = 0;
282    std::string::size_type idx;
283    for (idx = 0; idx < pString.size(); ++idx) {
284      int cur_char = pString[idx];
285
286      if ('.' == cur_char) {
287        ++dot;
288        continue;
289      }
290
291      if (isdigit(cur_char)) {
292        result |= (1 << 27);
293        continue;
294      }
295
296      if ('_' == cur_char) {
297        result |= (1 << 26);
298        continue;
299      }
300
301      if (isupper(cur_char)) {
302        result |= (1 << (cur_char - 'A'));
303        continue;
304      }
305
306      if (islower(cur_char)) {
307        result |= (1 << (cur_char - 'a'));
308        continue;
309      }
310    }
311    result |= (dot << 28);
312    return result;
313  }
314
315
316  /** \func may_include
317   *  \brief is it possible that pRule is a sub-string of pInString
318   */
319  static bool may_include(uint32_t pRule, uint32_t pInString)
320  {
321    uint32_t in_c = pInString << 4;
322    uint32_t r_c  = pRule << 4;
323
324    uint32_t res = (in_c ^ r_c) & r_c;
325    if (0 != res)
326      return false;
327
328    uint32_t in_dot = pInString >> 28;
329    uint32_t r_dot  = pRule >> 28;
330    if (r_dot > in_dot)
331      return false;
332
333    return true;
334  }
335};
336
337/** \class template<uint32_t TYPE> StringCompare
338 *  \brief the template StringCompare class, for specification
339 */
340template<typename STRING_TYPE>
341struct StringCompare : public std::binary_function<const STRING_TYPE&, const STRING_TYPE&, bool>
342{
343  bool operator()(const STRING_TYPE& X, const STRING_TYPE& Y) const
344  { return X == Y; }
345};
346
347template<>
348struct StringCompare<const char*> : public std::binary_function<const char*, const char*, bool>
349{
350  bool operator()(const char* X, const char* Y) const
351  { return (0 == std::strcmp(X, Y)); }
352};
353
354template<>
355struct StringCompare<char*> : public std::binary_function<const char*, const char*, bool>
356{
357  bool operator()(const char* X, const char* Y) const
358  { return (0 == std::strcmp(X, Y)); }
359};
360
361} // namespace of hash
362} // namespace of mcld
363
364#endif
365
366