StringHash.h revision 551ae4ebd3e9d137ea668fb83ae4a55b8cfba451
1//===- StringHash.h -------------------------------------------------------===// 2// 3// The MCLinker Project 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9#ifndef MCLD_ADT_STRINGHASH_H 10#define MCLD_ADT_STRINGHASH_H 11#ifdef ENABLE_UNITTEST 12#include <gtest.h> 13#endif 14#include <llvm/ADT/StringRef.h> 15#include <llvm/Support/DataTypes.h> 16#include <cctype> 17#include <cassert> 18#include <functional> 19 20namespace mcld { 21namespace hash { 22 23enum Type { 24 RS, 25 JS, 26 PJW, 27 ELF, 28 BKDR, 29 SDBM, 30 DJB, 31 DEK, 32 BP, 33 FNV, 34 AP, 35 ES 36}; 37 38/** \class template<uint32_t TYPE> StringHash 39 * \brief the template StringHash class, for specification 40 */ 41template<uint32_t TYPE> 42struct StringHash : public std::unary_function<const llvm::StringRef&, uint32_t> 43{ 44 uint32_t operator()(const llvm::StringRef& pKey) const 45 { 46 assert(false && "Undefined StringHash function.\n"); 47 return 0; 48 } 49}; 50 51/** \class StringHash<RSHash> 52 * \brief RS StringHash funciton 53 */ 54template<> 55struct StringHash<RS> : public std::unary_function<const llvm::StringRef&, uint32_t> 56{ 57 uint32_t operator()(const llvm::StringRef& pKey) const 58 { 59 const unsigned int b = 378551; 60 uint32_t a = 63689; 61 uint32_t hash_val = 0; 62 63 for(unsigned int i = 0; i < pKey.size(); ++i) { 64 hash_val = hash_val * a + pKey[i]; 65 a = a * b; 66 } 67 return hash_val; 68 } 69}; 70 71/** \class StringHash<JSHash> 72 * \brief JS hash funciton 73 */ 74template<> 75struct StringHash<JS> : public std::unary_function<const llvm::StringRef&, uint32_t> 76{ 77 uint32_t operator()(const llvm::StringRef& pKey) const 78 { 79 uint32_t hash_val = 1315423911; 80 81 for(unsigned int i = 0; i < pKey.size(); ++i) { 82 hash_val ^= ((hash_val << 5) + pKey[i] + (hash_val >> 2)); 83 } 84 return hash_val; 85 } 86}; 87 88/** \class StringHash<PJW> 89 * \brief P.J. Weinberger hash function 90 */ 91template<> 92struct StringHash<PJW> : public std::unary_function<const llvm::StringRef&, uint32_t> 93{ 94 uint32_t operator()(const llvm::StringRef& pKey) const 95 { 96 const unsigned int BitsInUnsignedInt = (unsigned int)(sizeof(unsigned int) * 8); 97 const unsigned int ThreeQuarters = (unsigned int)((BitsInUnsignedInt * 3) / 4); 98 const unsigned int OneEighth = (unsigned int)(BitsInUnsignedInt / 8); 99 const unsigned int HighBits = (unsigned int)(0xFFFFFFFF) << (BitsInUnsignedInt - OneEighth); 100 uint32_t hash_val = 0; 101 uint32_t test = 0; 102 103 for(unsigned int i = 0; i < pKey.size(); ++i) { 104 hash_val = (hash_val << OneEighth) + pKey[i]; 105 106 if((test = hash_val & HighBits) != 0) { 107 hash_val = (( hash_val ^ (test >> ThreeQuarters)) & (~HighBits)); 108 } 109 } 110 return hash_val; 111 } 112}; 113 114/** \class StringHash<ELF> 115 * \brief ELF hash function. 116 */ 117template<> 118struct StringHash<ELF> : public std::unary_function<const llvm::StringRef&, uint32_t> 119{ 120 uint32_t operator()(const llvm::StringRef& pKey) const 121 { 122 uint32_t hash_val = 0; 123 uint32_t x = 0; 124 125 for (unsigned int i = 0; i < pKey.size(); ++i) { 126 hash_val = (hash_val << 4) + pKey[i]; 127 if((x = hash_val & 0xF0000000L) != 0) 128 hash_val ^= (x >> 24); 129 hash_val &= ~x; 130 } 131 return hash_val; 132 } 133}; 134 135/** \class StringHash<BKDR> 136 * \brief BKDR hash function 137 */ 138template<> 139struct StringHash<BKDR> : public std::unary_function<const llvm::StringRef&, uint32_t> 140{ 141 uint32_t operator()(const llvm::StringRef& pKey) const 142 { 143 const uint32_t seed = 131; 144 uint32_t hash_val = 0; 145 146 for(uint32_t i = 0; i < pKey.size(); ++i) 147 hash_val = (hash_val * seed) + pKey[i]; 148 return hash_val; 149 } 150}; 151 152 153/** \class StringHash<SDBM> 154 * \brief SDBM hash function 155 * 0.049s in 100000 test 156 */ 157template<> 158struct StringHash<SDBM> : public std::unary_function<const llvm::StringRef&, uint32_t> 159{ 160 uint32_t operator()(const llvm::StringRef& pKey) const 161 { 162 uint32_t hash_val = 0; 163 164 for(uint32_t i = 0; i < pKey.size(); ++i) 165 hash_val = pKey[i] + (hash_val << 6) + (hash_val << 16) - hash_val; 166 return hash_val; 167 } 168}; 169 170/** \class StringHash<DJB> 171 * \brief DJB hash function 172 * 0.057s in 100000 test 173 */ 174template<> 175struct StringHash<DJB> : public std::unary_function<const llvm::StringRef&, uint32_t> 176{ 177 uint32_t operator()(const llvm::StringRef& pKey) const 178 { 179 uint32_t hash_val = 5381; 180 181 for(uint32_t i = 0; i < pKey.size(); ++i) 182 hash_val = ((hash_val << 5) + hash_val) + pKey[i]; 183 184 return hash_val; 185 } 186}; 187 188/** \class StringHash<DEK> 189 * \brief DEK hash function 190 * 0.60s 191 */ 192template<> 193struct StringHash<DEK> : public std::unary_function<const llvm::StringRef&, uint32_t> 194{ 195 uint32_t operator()(const llvm::StringRef& pKey) const 196 { 197 uint32_t hash_val = pKey.size(); 198 199 for(uint32_t i = 0; i < pKey.size(); ++i) 200 hash_val = ((hash_val << 5) ^ (hash_val >> 27)) ^ pKey[i]; 201 202 return hash_val; 203 } 204}; 205 206/** \class StringHash<BP> 207 * \brief BP hash function 208 * 0.057s 209 */ 210template<> 211struct StringHash<BP> : public std::unary_function<const llvm::StringRef&, uint32_t> 212{ 213 uint32_t operator()(const llvm::StringRef& pKey) const 214 { 215 uint32_t hash_val = 0; 216 for(uint32_t i = 0; i < pKey.size(); ++i) 217 hash_val = hash_val << 7 ^ pKey[i]; 218 219 return hash_val; 220 } 221}; 222 223/** \class StringHash<FNV> 224 * \brief FNV hash function 225 * 0.058s 226 */ 227template<> 228struct StringHash<FNV> : public std::unary_function<const llvm::StringRef&, uint32_t> 229{ 230 uint32_t operator()(const llvm::StringRef& pKey) const 231 { 232 const uint32_t fnv_prime = 0x811C9DC5; 233 uint32_t hash_val = 0; 234 for(uint32_t i = 0; i < pKey.size(); ++i) { 235 hash_val *= fnv_prime; 236 hash_val ^= pKey[i]; 237 } 238 239 return hash_val; 240 } 241}; 242 243/** \class StringHash<AP> 244 * \brief AP hash function 245 * 0.060s 246 */ 247template<> 248struct StringHash<AP> : public std::unary_function<const llvm::StringRef&, uint32_t> 249{ 250 uint32_t operator()(const llvm::StringRef& pKey) const 251 { 252 unsigned int hash_val = 0xAAAAAAAA; 253 254 for(uint32_t i = 0; i < pKey.size(); ++i) { 255 hash_val ^= ((i & 1) == 0)? 256 ((hash_val << 7) ^ pKey[i] * (hash_val >> 3)): 257 (~((hash_val << 11) + (pKey[i] ^ (hash_val >> 5)))); 258 } 259 260 return hash_val; 261 } 262}; 263 264/** \class StringHash<ES> 265 * \brief This is a revision of Edward Sayers' string characteristic function. 266 * 267 * 31-28 27 26 25 - 0 268 * +----+---+---+------------+ 269 * | . | N | - | a/A ~ z/Z | 270 * +----+---+---+------------+ 271 * 272 * . (bit 31~28) - The number of '.' characters 273 * N (bit 27) - Are there any numbers in the string 274 * - (bit 26) - Does the string have '-' character 275 * bit 25~0 - Bit 25 is set only if the string contains a 'a' or 'A', and 276 * Bit 24 is set only if ... 'b' or 'B', ... 277 */ 278template<> 279struct StringHash<ES> : public std::unary_function<const llvm::StringRef&, uint32_t> 280{ 281 uint32_t operator()(const llvm::StringRef& pString) const 282 { 283 uint32_t result = 0x0; 284 unsigned int dot = 0; 285 std::string::size_type idx; 286 for (idx = 0; idx < pString.size(); ++idx) { 287 int cur_char = pString[idx]; 288 289 if ('.' == cur_char) { 290 ++dot; 291 continue; 292 } 293 294 if (isdigit(cur_char)) { 295 result |= (1 << 27); 296 continue; 297 } 298 299 if ('_' == cur_char) { 300 result |= (1 << 26); 301 continue; 302 } 303 304 if (isupper(cur_char)) { 305 result |= (1 << (cur_char - 'A')); 306 continue; 307 } 308 309 if (islower(cur_char)) { 310 result |= (1 << (cur_char - 'a')); 311 continue; 312 } 313 } 314 result |= (dot << 28); 315 return result; 316 } 317 318 319 /** \func may_include 320 * \brief is it possible that pRule is a sub-string of pInString 321 */ 322 static bool may_include(uint32_t pRule, uint32_t pInString) 323 { 324 uint32_t in_c = pInString << 4; 325 uint32_t r_c = pRule << 4; 326 327 uint32_t res = (in_c ^ r_c) & r_c; 328 if (0 != res) 329 return false; 330 331 uint32_t in_dot = pInString >> 28; 332 uint32_t r_dot = pRule >> 28; 333 if (r_dot > in_dot) 334 return false; 335 336 return true; 337 } 338}; 339 340/** \class template<uint32_t TYPE> StringCompare 341 * \brief the template StringCompare class, for specification 342 */ 343template<typename STRING_TYPE> 344struct StringCompare : public std::binary_function<const STRING_TYPE&, const STRING_TYPE&, bool> 345{ 346 bool operator()(const STRING_TYPE& X, const STRING_TYPE& Y) const 347 { return X == Y; } 348}; 349 350template<> 351struct StringCompare<const char*> : public std::binary_function<const char*, const char*, bool> 352{ 353 bool operator()(const char* X, const char* Y) const 354 { return (0 == std::strcmp(X, Y)); } 355}; 356 357template<> 358struct StringCompare<char*> : public std::binary_function<const char*, const char*, bool> 359{ 360 bool operator()(const char* X, const char* Y) const 361 { return (0 == std::strcmp(X, Y)); } 362}; 363 364} // namespace of hash 365} // namespace of mcld 366 367#endif 368 369