1f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com//----------------------------------------------------------------------------- 2f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// MurmurHash was written by Austin Appleby, and is placed in the public 3f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// domain. The author hereby disclaims copyright to this source code. 4f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 5f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// Note - This code makes a few assumptions about how your machine behaves - 6f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 7f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// 1. We can read a 4-byte value from any address without crashing 8f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// 2. sizeof(int) == 4 9f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 10f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// And it has a few limitations - 11f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 12f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// 1. It will not work incrementally. 13f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// 2. It will not produce the same results on little-endian and big-endian 14f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// machines. 15f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 16f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com#include "MurmurHash1.h" 17f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 18f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com//----------------------------------------------------------------------------- 19f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 20f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.comuint32_t MurmurHash1 ( const void * key, int len, uint32_t seed ) 21f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com{ 22f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com const unsigned int m = 0xc6a4a793; 23f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 24f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com const int r = 16; 25f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 26f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com unsigned int h = seed ^ (len * m); 27f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 28f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com //---------- 29f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 30f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com const unsigned char * data = (const unsigned char *)key; 31f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 32f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com while(len >= 4) 33f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com { 34f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com unsigned int k = *(unsigned int *)data; 35f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 36f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com h += k; 37f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com h *= m; 38f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com h ^= h >> 16; 39f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 40f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com data += 4; 41f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com len -= 4; 42f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com } 43f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 44f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com //---------- 45f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 46f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com switch(len) 47f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com { 48f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com case 3: 49f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com h += data[2] << 16; 50f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com case 2: 51f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com h += data[1] << 8; 52f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com case 1: 53f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com h += data[0]; 54f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com h *= m; 55f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com h ^= h >> r; 56f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com }; 57f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 58f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com //---------- 59f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 60f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com h *= m; 61f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com h ^= h >> 10; 62f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com h *= m; 63f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com h ^= h >> 17; 64f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 65f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com return h; 66f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com} 67f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 68f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com//----------------------------------------------------------------------------- 69f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// MurmurHash1Aligned, by Austin Appleby 70f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 71f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// Same algorithm as MurmurHash1, but only does aligned reads - should be safer 72f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// on certain platforms. 73f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 74f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// Performance should be equal to or better than the simple version. 75f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 76f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.comunsigned int MurmurHash1Aligned ( const void * key, int len, unsigned int seed ) 77f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com{ 78f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com const unsigned int m = 0xc6a4a793; 79f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com const int r = 16; 80f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 81f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com const unsigned char * data = (const unsigned char *)key; 82f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 83f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com unsigned int h = seed ^ (len * m); 84f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 85f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com int align = (uint64_t)data & 3; 86f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 87f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com if(align && (len >= 4)) 88f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com { 89f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com // Pre-load the temp registers 90f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 91f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com unsigned int t = 0, d = 0; 92f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 93f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com switch(align) 94f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com { 95f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com case 1: t |= data[2] << 16; 96f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com case 2: t |= data[1] << 8; 97f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com case 3: t |= data[0]; 98f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com } 99f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 100f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com t <<= (8 * align); 101f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 102f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com data += 4-align; 103f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com len -= 4-align; 104f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 105f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com int sl = 8 * (4-align); 106f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com int sr = 8 * align; 107f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 108f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com // Mix 109f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 110f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com while(len >= 4) 111f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com { 112f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com d = *(unsigned int *)data; 113f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com t = (t >> sr) | (d << sl); 114f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com h += t; 115f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com h *= m; 116f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com h ^= h >> r; 117f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com t = d; 118f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 119f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com data += 4; 120f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com len -= 4; 121f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com } 122f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 123f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com // Handle leftover data in temp registers 124f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 125f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com int pack = len < align ? len : align; 126f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 127f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com d = 0; 128f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 129f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com switch(pack) 130f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com { 131f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com case 3: d |= data[2] << 16; 132f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com case 2: d |= data[1] << 8; 133f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com case 1: d |= data[0]; 134f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com case 0: h += (t >> sr) | (d << sl); 135f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com h *= m; 136f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com h ^= h >> r; 137f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com } 138f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 139f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com data += pack; 140f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com len -= pack; 141f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com } 142f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com else 143f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com { 144f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com while(len >= 4) 145f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com { 146f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com h += *(unsigned int *)data; 147f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com h *= m; 148f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com h ^= h >> r; 149f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 150f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com data += 4; 151f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com len -= 4; 152f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com } 153f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com } 154f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 155f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com //---------- 156f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com // Handle tail bytes 157f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 158f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com switch(len) 159f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com { 160f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com case 3: h += data[2] << 16; 161f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com case 2: h += data[1] << 8; 162f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com case 1: h += data[0]; 163f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com h *= m; 164f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com h ^= h >> r; 165f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com }; 166f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 167f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com h *= m; 168f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com h ^= h >> 10; 169f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com h *= m; 170f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com h ^= h >> 17; 171f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 172f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com return h; 173f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com} 174f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com 175