1//----------------------------------------------------------------------------- 2// MurmurHash2 was written by Austin Appleby, and is placed in the public 3// domain. The author hereby disclaims copyright to this source code. 4 5// Note - This code makes a few assumptions about how your machine behaves - 6 7// 1. We can read a 4-byte value from any address without crashing 8// 2. sizeof(int) == 4 9 10// And it has a few limitations - 11 12// 1. It will not work incrementally. 13// 2. It will not produce the same results on little-endian and big-endian 14// machines. 15 16#include "MurmurHash2.h" 17 18//----------------------------------------------------------------------------- 19// Platform-specific functions and macros 20 21// Microsoft Visual Studio 22 23#if defined(_MSC_VER) 24 25#define BIG_CONSTANT(x) (x) 26 27// Other compilers 28 29#else // defined(_MSC_VER) 30 31#define BIG_CONSTANT(x) (x##LLU) 32 33#endif // !defined(_MSC_VER) 34 35//----------------------------------------------------------------------------- 36 37uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed ) 38{ 39 // 'm' and 'r' are mixing constants generated offline. 40 // They're not really 'magic', they just happen to work well. 41 42 const uint32_t m = 0x5bd1e995; 43 const int r = 24; 44 45 // Initialize the hash to a 'random' value 46 47 uint32_t h = seed ^ len; 48 49 // Mix 4 bytes at a time into the hash 50 51 const unsigned char * data = (const unsigned char *)key; 52 53 while(len >= 4) 54 { 55 uint32_t k = *(uint32_t*)data; 56 57 k *= m; 58 k ^= k >> r; 59 k *= m; 60 61 h *= m; 62 h ^= k; 63 64 data += 4; 65 len -= 4; 66 } 67 68 // Handle the last few bytes of the input array 69 70 switch(len) 71 { 72 case 3: h ^= data[2] << 16; 73 case 2: h ^= data[1] << 8; 74 case 1: h ^= data[0]; 75 h *= m; 76 }; 77 78 // Do a few final mixes of the hash to ensure the last few 79 // bytes are well-incorporated. 80 81 h ^= h >> 13; 82 h *= m; 83 h ^= h >> 15; 84 85 return h; 86} 87 88//----------------------------------------------------------------------------- 89// MurmurHash2, 64-bit versions, by Austin Appleby 90 91// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment 92// and endian-ness issues if used across multiple platforms. 93 94// 64-bit hash for 64-bit platforms 95 96uint64_t MurmurHash64A ( const void * key, int len, uint64_t seed ) 97{ 98 const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995); 99 const int r = 47; 100 101 uint64_t h = seed ^ (len * m); 102 103 const uint64_t * data = (const uint64_t *)key; 104 const uint64_t * end = data + (len/8); 105 106 while(data != end) 107 { 108 uint64_t k = *data++; 109 110 k *= m; 111 k ^= k >> r; 112 k *= m; 113 114 h ^= k; 115 h *= m; 116 } 117 118 const unsigned char * data2 = (const unsigned char*)data; 119 120 switch(len & 7) 121 { 122 case 7: h ^= uint64_t(data2[6]) << 48; 123 case 6: h ^= uint64_t(data2[5]) << 40; 124 case 5: h ^= uint64_t(data2[4]) << 32; 125 case 4: h ^= uint64_t(data2[3]) << 24; 126 case 3: h ^= uint64_t(data2[2]) << 16; 127 case 2: h ^= uint64_t(data2[1]) << 8; 128 case 1: h ^= uint64_t(data2[0]); 129 h *= m; 130 }; 131 132 h ^= h >> r; 133 h *= m; 134 h ^= h >> r; 135 136 return h; 137} 138 139 140// 64-bit hash for 32-bit platforms 141 142uint64_t MurmurHash64B ( const void * key, int len, uint64_t seed ) 143{ 144 const uint32_t m = 0x5bd1e995; 145 const int r = 24; 146 147 uint32_t h1 = uint32_t(seed) ^ len; 148 uint32_t h2 = uint32_t(seed >> 32); 149 150 const uint32_t * data = (const uint32_t *)key; 151 152 while(len >= 8) 153 { 154 uint32_t k1 = *data++; 155 k1 *= m; k1 ^= k1 >> r; k1 *= m; 156 h1 *= m; h1 ^= k1; 157 len -= 4; 158 159 uint32_t k2 = *data++; 160 k2 *= m; k2 ^= k2 >> r; k2 *= m; 161 h2 *= m; h2 ^= k2; 162 len -= 4; 163 } 164 165 if(len >= 4) 166 { 167 uint32_t k1 = *data++; 168 k1 *= m; k1 ^= k1 >> r; k1 *= m; 169 h1 *= m; h1 ^= k1; 170 len -= 4; 171 } 172 173 switch(len) 174 { 175 case 3: h2 ^= ((unsigned char*)data)[2] << 16; 176 case 2: h2 ^= ((unsigned char*)data)[1] << 8; 177 case 1: h2 ^= ((unsigned char*)data)[0]; 178 h2 *= m; 179 }; 180 181 h1 ^= h2 >> 18; h1 *= m; 182 h2 ^= h1 >> 22; h2 *= m; 183 h1 ^= h2 >> 17; h1 *= m; 184 h2 ^= h1 >> 19; h2 *= m; 185 186 uint64_t h = h1; 187 188 h = (h << 32) | h2; 189 190 return h; 191} 192 193//----------------------------------------------------------------------------- 194// MurmurHash2A, by Austin Appleby 195 196// This is a variant of MurmurHash2 modified to use the Merkle-Damgard 197// construction. Bulk speed should be identical to Murmur2, small-key speed 198// will be 10%-20% slower due to the added overhead at the end of the hash. 199 200// This variant fixes a minor issue where null keys were more likely to 201// collide with each other than expected, and also makes the function 202// more amenable to incremental implementations. 203 204#define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } 205 206uint32_t MurmurHash2A ( const void * key, int len, uint32_t seed ) 207{ 208 const uint32_t m = 0x5bd1e995; 209 const int r = 24; 210 uint32_t l = len; 211 212 const unsigned char * data = (const unsigned char *)key; 213 214 uint32_t h = seed; 215 216 while(len >= 4) 217 { 218 uint32_t k = *(uint32_t*)data; 219 220 mmix(h,k); 221 222 data += 4; 223 len -= 4; 224 } 225 226 uint32_t t = 0; 227 228 switch(len) 229 { 230 case 3: t ^= data[2] << 16; 231 case 2: t ^= data[1] << 8; 232 case 1: t ^= data[0]; 233 }; 234 235 mmix(h,t); 236 mmix(h,l); 237 238 h ^= h >> 13; 239 h *= m; 240 h ^= h >> 15; 241 242 return h; 243} 244 245//----------------------------------------------------------------------------- 246// CMurmurHash2A, by Austin Appleby 247 248// This is a sample implementation of MurmurHash2A designed to work 249// incrementally. 250 251// Usage - 252 253// CMurmurHash2A hasher 254// hasher.Begin(seed); 255// hasher.Add(data1,size1); 256// hasher.Add(data2,size2); 257// ... 258// hasher.Add(dataN,sizeN); 259// uint32_t hash = hasher.End() 260 261class CMurmurHash2A 262{ 263public: 264 265 void Begin ( uint32_t seed = 0 ) 266 { 267 m_hash = seed; 268 m_tail = 0; 269 m_count = 0; 270 m_size = 0; 271 } 272 273 void Add ( const unsigned char * data, int len ) 274 { 275 m_size += len; 276 277 MixTail(data,len); 278 279 while(len >= 4) 280 { 281 uint32_t k = *(uint32_t*)data; 282 283 mmix(m_hash,k); 284 285 data += 4; 286 len -= 4; 287 } 288 289 MixTail(data,len); 290 } 291 292 uint32_t End ( void ) 293 { 294 mmix(m_hash,m_tail); 295 mmix(m_hash,m_size); 296 297 m_hash ^= m_hash >> 13; 298 m_hash *= m; 299 m_hash ^= m_hash >> 15; 300 301 return m_hash; 302 } 303 304private: 305 306 static const uint32_t m = 0x5bd1e995; 307 static const int r = 24; 308 309 void MixTail ( const unsigned char * & data, int & len ) 310 { 311 while( len && ((len<4) || m_count) ) 312 { 313 m_tail |= (*data++) << (m_count * 8); 314 315 m_count++; 316 len--; 317 318 if(m_count == 4) 319 { 320 mmix(m_hash,m_tail); 321 m_tail = 0; 322 m_count = 0; 323 } 324 } 325 } 326 327 uint32_t m_hash; 328 uint32_t m_tail; 329 uint32_t m_count; 330 uint32_t m_size; 331}; 332 333//----------------------------------------------------------------------------- 334// MurmurHashNeutral2, by Austin Appleby 335 336// Same as MurmurHash2, but endian- and alignment-neutral. 337// Half the speed though, alas. 338 339uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed ) 340{ 341 const uint32_t m = 0x5bd1e995; 342 const int r = 24; 343 344 uint32_t h = seed ^ len; 345 346 const unsigned char * data = (const unsigned char *)key; 347 348 while(len >= 4) 349 { 350 uint32_t k; 351 352 k = data[0]; 353 k |= data[1] << 8; 354 k |= data[2] << 16; 355 k |= data[3] << 24; 356 357 k *= m; 358 k ^= k >> r; 359 k *= m; 360 361 h *= m; 362 h ^= k; 363 364 data += 4; 365 len -= 4; 366 } 367 368 switch(len) 369 { 370 case 3: h ^= data[2] << 16; 371 case 2: h ^= data[1] << 8; 372 case 1: h ^= data[0]; 373 h *= m; 374 }; 375 376 h ^= h >> 13; 377 h *= m; 378 h ^= h >> 15; 379 380 return h; 381} 382 383//----------------------------------------------------------------------------- 384// MurmurHashAligned2, by Austin Appleby 385 386// Same algorithm as MurmurHash2, but only does aligned reads - should be safer 387// on certain platforms. 388 389// Performance will be lower than MurmurHash2 390 391#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } 392 393 394uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed ) 395{ 396 const uint32_t m = 0x5bd1e995; 397 const int r = 24; 398 399 const unsigned char * data = (const unsigned char *)key; 400 401 uint32_t h = seed ^ len; 402 403 int align = (uint64_t)data & 3; 404 405 if(align && (len >= 4)) 406 { 407 // Pre-load the temp registers 408 409 uint32_t t = 0, d = 0; 410 411 switch(align) 412 { 413 case 1: t |= data[2] << 16; 414 case 2: t |= data[1] << 8; 415 case 3: t |= data[0]; 416 } 417 418 t <<= (8 * align); 419 420 data += 4-align; 421 len -= 4-align; 422 423 int sl = 8 * (4-align); 424 int sr = 8 * align; 425 426 // Mix 427 428 while(len >= 4) 429 { 430 d = *(uint32_t *)data; 431 t = (t >> sr) | (d << sl); 432 433 uint32_t k = t; 434 435 MIX(h,k,m); 436 437 t = d; 438 439 data += 4; 440 len -= 4; 441 } 442 443 // Handle leftover data in temp registers 444 445 d = 0; 446 447 if(len >= align) 448 { 449 switch(align) 450 { 451 case 3: d |= data[2] << 16; 452 case 2: d |= data[1] << 8; 453 case 1: d |= data[0]; 454 } 455 456 uint32_t k = (t >> sr) | (d << sl); 457 MIX(h,k,m); 458 459 data += align; 460 len -= align; 461 462 //---------- 463 // Handle tail bytes 464 465 switch(len) 466 { 467 case 3: h ^= data[2] << 16; 468 case 2: h ^= data[1] << 8; 469 case 1: h ^= data[0]; 470 h *= m; 471 }; 472 } 473 else 474 { 475 switch(len) 476 { 477 case 3: d |= data[2] << 16; 478 case 2: d |= data[1] << 8; 479 case 1: d |= data[0]; 480 case 0: h ^= (t >> sr) | (d << sl); 481 h *= m; 482 } 483 } 484 485 h ^= h >> 13; 486 h *= m; 487 h ^= h >> 15; 488 489 return h; 490 } 491 else 492 { 493 while(len >= 4) 494 { 495 uint32_t k = *(uint32_t *)data; 496 497 MIX(h,k,m); 498 499 data += 4; 500 len -= 4; 501 } 502 503 //---------- 504 // Handle tail bytes 505 506 switch(len) 507 { 508 case 3: h ^= data[2] << 16; 509 case 2: h ^= data[1] << 8; 510 case 1: h ^= data[0]; 511 h *= m; 512 }; 513 514 h ^= h >> 13; 515 h *= m; 516 h ^= h >> 15; 517 518 return h; 519 } 520} 521 522//----------------------------------------------------------------------------- 523 524