1f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com//-----------------------------------------------------------------------------
2f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// MurmurHash was written by Austin Appleby, and is placed in the public
3f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// domain. The author hereby disclaims copyright to this source code.
4f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
5f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// Note - This code makes a few assumptions about how your machine behaves -
6f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
7f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// 1. We can read a 4-byte value from any address without crashing
8f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// 2. sizeof(int) == 4
9f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
10f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// And it has a few limitations -
11f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
12f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// 1. It will not work incrementally.
13f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// 2. It will not produce the same results on little-endian and big-endian
14f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com//    machines.
15f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
16f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com#include "MurmurHash1.h"
17f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
18f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com//-----------------------------------------------------------------------------
19f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
20f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.comuint32_t MurmurHash1 ( const void * key, int len, uint32_t seed )
21f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com{
22f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  const unsigned int m = 0xc6a4a793;
23f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
24f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  const int r = 16;
25f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
26f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  unsigned int h = seed ^ (len * m);
27f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
28f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  //----------
29f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
30f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  const unsigned char * data = (const unsigned char *)key;
31f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
32f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  while(len >= 4)
33f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  {
34f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    unsigned int k = *(unsigned int *)data;
35f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
36f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    h += k;
37f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    h *= m;
38f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    h ^= h >> 16;
39f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
40f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    data += 4;
41f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    len -= 4;
42f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  }
43f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
44f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  //----------
45f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
46f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  switch(len)
47f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  {
48f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  case 3:
49f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    h += data[2] << 16;
50f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  case 2:
51f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    h += data[1] << 8;
52f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  case 1:
53f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    h += data[0];
54f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    h *= m;
55f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    h ^= h >> r;
56f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  };
57f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
58f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  //----------
59f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
60f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  h *= m;
61f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  h ^= h >> 10;
62f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  h *= m;
63f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  h ^= h >> 17;
64f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
65f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  return h;
66f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com}
67f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
68f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com//-----------------------------------------------------------------------------
69f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// MurmurHash1Aligned, by Austin Appleby
70f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
71f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// Same algorithm as MurmurHash1, but only does aligned reads - should be safer
72f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// on certain platforms.
73f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
74f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com// Performance should be equal to or better than the simple version.
75f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
76f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.comunsigned int MurmurHash1Aligned ( const void * key, int len, unsigned int seed )
77f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com{
78f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  const unsigned int m = 0xc6a4a793;
79f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  const int r = 16;
80f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
81f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  const unsigned char * data = (const unsigned char *)key;
82f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
83f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  unsigned int h = seed ^ (len * m);
84f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
85f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  int align = (uint64_t)data & 3;
86f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
87f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  if(align && (len >= 4))
88f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  {
89f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    // Pre-load the temp registers
90f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
91f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    unsigned int t = 0, d = 0;
92f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
93f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    switch(align)
94f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    {
95f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com      case 1: t |= data[2] << 16;
96f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com      case 2: t |= data[1] << 8;
97f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com      case 3: t |= data[0];
98f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    }
99f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
100f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    t <<= (8 * align);
101f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
102f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    data += 4-align;
103f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    len -= 4-align;
104f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
105f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    int sl = 8 * (4-align);
106f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    int sr = 8 * align;
107f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
108f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    // Mix
109f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
110f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    while(len >= 4)
111f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    {
112f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com      d = *(unsigned int *)data;
113f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com      t = (t >> sr) | (d << sl);
114f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com      h += t;
115f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com      h *= m;
116f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com      h ^= h >> r;
117f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com      t = d;
118f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
119f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com      data += 4;
120f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com      len -= 4;
121f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    }
122f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
123f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    // Handle leftover data in temp registers
124f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
125f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    int pack = len < align ? len : align;
126f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
127f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    d = 0;
128f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
129f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    switch(pack)
130f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    {
131f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    case 3: d |= data[2] << 16;
132f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    case 2: d |= data[1] << 8;
133f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    case 1: d |= data[0];
134f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    case 0: h += (t >> sr) | (d << sl);
135f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com        h *= m;
136f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com        h ^= h >> r;
137f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    }
138f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
139f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    data += pack;
140f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    len -= pack;
141f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  }
142f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  else
143f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  {
144f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    while(len >= 4)
145f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    {
146f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com      h += *(unsigned int *)data;
147f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com      h *= m;
148f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com      h ^= h >> r;
149f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
150f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com      data += 4;
151f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com      len -= 4;
152f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com    }
153f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  }
154f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
155f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  //----------
156f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  // Handle tail bytes
157f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
158f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  switch(len)
159f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  {
160f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  case 3: h += data[2] << 16;
161f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  case 2: h += data[1] << 8;
162f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  case 1: h += data[0];
163f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com      h *= m;
164f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com      h ^= h >> r;
165f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  };
166f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
167f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  h *= m;
168f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  h ^= h >> 10;
169f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  h *= m;
170f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  h ^= h >> 17;
171f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
172f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com  return h;
173f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com}
174f3b789787b93945c974e2cc517b7dc352b28354etanjent@gmail.com
175