StringHash.h revision 551ae4ebd3e9d137ea668fb83ae4a55b8cfba451
1//===- StringHash.h -------------------------------------------------------===//
2//
3//                     The MCLinker Project
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9#ifndef MCLD_ADT_STRINGHASH_H
10#define MCLD_ADT_STRINGHASH_H
11#ifdef ENABLE_UNITTEST
12#include <gtest.h>
13#endif
14#include <llvm/ADT/StringRef.h>
15#include <llvm/Support/DataTypes.h>
16#include <cctype>
17#include <cassert>
18#include <functional>
19
20namespace mcld {
21namespace hash {
22
23enum Type {
24  RS,
25  JS,
26  PJW,
27  ELF,
28  BKDR,
29  SDBM,
30  DJB,
31  DEK,
32  BP,
33  FNV,
34  AP,
35  ES
36};
37
38/** \class template<uint32_t TYPE> StringHash
39 *  \brief the template StringHash class, for specification
40 */
41template<uint32_t TYPE>
42struct StringHash : public std::unary_function<const llvm::StringRef&, uint32_t>
43{
44  uint32_t operator()(const llvm::StringRef& pKey) const
45  {
46    assert(false && "Undefined StringHash function.\n");
47    return 0;
48  }
49};
50
51/** \class StringHash<RSHash>
52 *  \brief RS StringHash funciton
53 */
54template<>
55struct StringHash<RS> : public std::unary_function<const llvm::StringRef&, uint32_t>
56{
57  uint32_t operator()(const llvm::StringRef& pKey) const
58  {
59    const unsigned int b = 378551;
60    uint32_t a = 63689;
61    uint32_t hash_val = 0;
62
63    for(unsigned int i = 0; i < pKey.size(); ++i) {
64      hash_val = hash_val * a + pKey[i];
65      a = a * b;
66    }
67    return hash_val;
68  }
69};
70
71/** \class StringHash<JSHash>
72 *  \brief JS hash funciton
73 */
74template<>
75struct StringHash<JS> : public std::unary_function<const llvm::StringRef&, uint32_t>
76{
77  uint32_t operator()(const llvm::StringRef& pKey) const
78  {
79    uint32_t hash_val = 1315423911;
80
81    for(unsigned int i = 0; i < pKey.size(); ++i) {
82       hash_val ^= ((hash_val << 5) + pKey[i] + (hash_val >> 2));
83    }
84    return hash_val;
85  }
86};
87
88/** \class StringHash<PJW>
89 *  \brief P.J. Weinberger hash function
90 */
91template<>
92struct StringHash<PJW> : public std::unary_function<const llvm::StringRef&, uint32_t>
93{
94  uint32_t operator()(const llvm::StringRef& pKey) const
95  {
96    const unsigned int BitsInUnsignedInt = (unsigned int)(sizeof(unsigned int) * 8);
97    const unsigned int ThreeQuarters     = (unsigned int)((BitsInUnsignedInt  * 3) / 4);
98    const unsigned int OneEighth         = (unsigned int)(BitsInUnsignedInt / 8);
99    const unsigned int HighBits          = (unsigned int)(0xFFFFFFFF) << (BitsInUnsignedInt - OneEighth);
100    uint32_t hash_val = 0;
101    uint32_t test = 0;
102
103    for(unsigned int i = 0; i < pKey.size(); ++i) {
104      hash_val = (hash_val << OneEighth) + pKey[i];
105
106      if((test = hash_val & HighBits) != 0) {
107        hash_val = (( hash_val ^ (test >> ThreeQuarters)) & (~HighBits));
108      }
109    }
110    return hash_val;
111  }
112};
113
114/** \class StringHash<ELF>
115 *  \brief ELF hash function.
116 */
117template<>
118struct StringHash<ELF> : public std::unary_function<const llvm::StringRef&, uint32_t>
119{
120  uint32_t operator()(const llvm::StringRef& pKey) const
121  {
122    uint32_t hash_val = 0;
123    uint32_t x = 0;
124
125    for (unsigned int i = 0; i < pKey.size(); ++i) {
126      hash_val = (hash_val << 4) + pKey[i];
127      if((x = hash_val & 0xF0000000L) != 0)
128        hash_val ^= (x >> 24);
129      hash_val &= ~x;
130    }
131    return hash_val;
132  }
133};
134
135/** \class StringHash<BKDR>
136 *  \brief BKDR hash function
137 */
138template<>
139struct StringHash<BKDR> : public std::unary_function<const llvm::StringRef&, uint32_t>
140{
141  uint32_t operator()(const llvm::StringRef& pKey) const
142  {
143    const uint32_t seed = 131;
144    uint32_t hash_val = 0;
145
146    for(uint32_t i = 0; i < pKey.size(); ++i)
147      hash_val = (hash_val * seed) + pKey[i];
148    return hash_val;
149  }
150};
151
152
153/** \class StringHash<SDBM>
154 *  \brief SDBM hash function
155 *  0.049s in 100000 test
156 */
157template<>
158struct StringHash<SDBM> : public std::unary_function<const llvm::StringRef&, uint32_t>
159{
160  uint32_t operator()(const llvm::StringRef& pKey) const
161  {
162    uint32_t hash_val = 0;
163
164    for(uint32_t i = 0; i < pKey.size(); ++i)
165      hash_val = pKey[i] + (hash_val << 6) + (hash_val << 16) - hash_val;
166    return hash_val;
167  }
168};
169
170/** \class StringHash<DJB>
171 *  \brief DJB hash function
172 *  0.057s in 100000 test
173 */
174template<>
175struct StringHash<DJB> : public std::unary_function<const llvm::StringRef&, uint32_t>
176{
177  uint32_t operator()(const llvm::StringRef& pKey) const
178  {
179    uint32_t hash_val = 5381;
180
181    for(uint32_t i = 0; i < pKey.size(); ++i)
182      hash_val = ((hash_val << 5) + hash_val) + pKey[i];
183
184    return hash_val;
185  }
186};
187
188/** \class StringHash<DEK>
189 *  \brief DEK hash function
190 *  0.60s
191 */
192template<>
193struct StringHash<DEK> : public std::unary_function<const llvm::StringRef&, uint32_t>
194{
195  uint32_t operator()(const llvm::StringRef& pKey) const
196  {
197    uint32_t hash_val = pKey.size();
198
199    for(uint32_t i = 0; i < pKey.size(); ++i)
200      hash_val = ((hash_val << 5) ^ (hash_val >> 27)) ^ pKey[i];
201
202    return hash_val;
203  }
204};
205
206/** \class StringHash<BP>
207 *  \brief BP hash function
208 *  0.057s
209 */
210template<>
211struct StringHash<BP> : public std::unary_function<const llvm::StringRef&, uint32_t>
212{
213  uint32_t operator()(const llvm::StringRef& pKey) const
214  {
215    uint32_t hash_val = 0;
216    for(uint32_t i = 0; i < pKey.size(); ++i)
217      hash_val = hash_val << 7 ^ pKey[i];
218
219    return hash_val;
220  }
221};
222
223/** \class StringHash<FNV>
224 *  \brief FNV hash function
225 *  0.058s
226 */
227template<>
228struct StringHash<FNV> : public std::unary_function<const llvm::StringRef&, uint32_t>
229{
230  uint32_t operator()(const llvm::StringRef& pKey) const
231  {
232    const uint32_t fnv_prime = 0x811C9DC5;
233    uint32_t hash_val = 0;
234    for(uint32_t i = 0; i < pKey.size(); ++i) {
235      hash_val *= fnv_prime;
236      hash_val ^= pKey[i];
237    }
238
239    return hash_val;
240  }
241};
242
243/** \class StringHash<AP>
244 *  \brief AP hash function
245 *  0.060s
246 */
247template<>
248struct StringHash<AP> : public std::unary_function<const llvm::StringRef&, uint32_t>
249{
250  uint32_t operator()(const llvm::StringRef& pKey) const
251  {
252    unsigned int hash_val = 0xAAAAAAAA;
253
254    for(uint32_t i = 0; i < pKey.size(); ++i) {
255      hash_val ^= ((i & 1) == 0)?
256                          ((hash_val <<  7) ^ pKey[i] * (hash_val >> 3)):
257                          (~((hash_val << 11) + (pKey[i] ^ (hash_val >> 5))));
258    }
259
260    return hash_val;
261  }
262};
263
264/** \class StringHash<ES>
265 *  \brief This is a revision of Edward Sayers' string characteristic function.
266 *
267 *  31-28  27  26  25   -   0
268 *  +----+---+---+------------+
269 *  | .  | N | - | a/A  ~ z/Z |
270 *  +----+---+---+------------+
271 *
272 *  . (bit 31~28) - The number of '.' characters
273 *  N (bit 27)    - Are there any numbers in the string
274 *  - (bit 26)    - Does the string have '-' character
275 *  bit 25~0      - Bit 25 is set only if the string contains a 'a' or 'A', and
276 *                  Bit 24 is set only if ...                   'b' or 'B', ...
277 */
278template<>
279struct StringHash<ES> : public std::unary_function<const llvm::StringRef&, uint32_t>
280{
281  uint32_t operator()(const llvm::StringRef& pString) const
282  {
283    uint32_t result = 0x0;
284    unsigned int dot = 0;
285    std::string::size_type idx;
286    for (idx = 0; idx < pString.size(); ++idx) {
287      int cur_char = pString[idx];
288
289      if ('.' == cur_char) {
290        ++dot;
291        continue;
292      }
293
294      if (isdigit(cur_char)) {
295        result |= (1 << 27);
296        continue;
297      }
298
299      if ('_' == cur_char) {
300        result |= (1 << 26);
301        continue;
302      }
303
304      if (isupper(cur_char)) {
305        result |= (1 << (cur_char - 'A'));
306        continue;
307      }
308
309      if (islower(cur_char)) {
310        result |= (1 << (cur_char - 'a'));
311        continue;
312      }
313    }
314    result |= (dot << 28);
315    return result;
316  }
317
318
319  /** \func may_include
320   *  \brief is it possible that pRule is a sub-string of pInString
321   */
322  static bool may_include(uint32_t pRule, uint32_t pInString)
323  {
324    uint32_t in_c = pInString << 4;
325    uint32_t r_c  = pRule << 4;
326
327    uint32_t res = (in_c ^ r_c) & r_c;
328    if (0 != res)
329      return false;
330
331    uint32_t in_dot = pInString >> 28;
332    uint32_t r_dot  = pRule >> 28;
333    if (r_dot > in_dot)
334      return false;
335
336    return true;
337  }
338};
339
340/** \class template<uint32_t TYPE> StringCompare
341 *  \brief the template StringCompare class, for specification
342 */
343template<typename STRING_TYPE>
344struct StringCompare : public std::binary_function<const STRING_TYPE&, const STRING_TYPE&, bool>
345{
346  bool operator()(const STRING_TYPE& X, const STRING_TYPE& Y) const
347  { return X == Y; }
348};
349
350template<>
351struct StringCompare<const char*> : public std::binary_function<const char*, const char*, bool>
352{
353  bool operator()(const char* X, const char* Y) const
354  { return (0 == std::strcmp(X, Y)); }
355};
356
357template<>
358struct StringCompare<char*> : public std::binary_function<const char*, const char*, bool>
359{
360  bool operator()(const char* X, const char* Y) const
361  { return (0 == std::strcmp(X, Y)); }
362};
363
364} // namespace of hash
365} // namespace of mcld
366
367#endif
368
369