1#include "Platform.h"
2#include "Hashes.h"
3#include "KeysetTest.h"
4#include "SpeedTest.h"
5#include "AvalancheTest.h"
6#include "DifferentialTest.h"
7#include "PMurHash.h"
8
9#include <stdio.h>
10#include <time.h>
11
12//-----------------------------------------------------------------------------
13// Configuration. TODO - move these to command-line flags
14
15bool g_testAll = false;
16
17bool g_testSanity      = false;
18bool g_testSpeed       = false;
19bool g_testDiff        = false;
20bool g_testDiffDist    = false;
21bool g_testAvalanche   = false;
22bool g_testBIC         = false;
23bool g_testCyclic      = false;
24bool g_testTwoBytes    = false;
25bool g_testSparse      = false;
26bool g_testPermutation = false;
27bool g_testWindow      = false;
28bool g_testText        = false;
29bool g_testZeroes      = false;
30bool g_testSeed        = false;
31
32//-----------------------------------------------------------------------------
33// This is the list of all hashes that SMHasher can test.
34
35struct HashInfo
36{
37  pfHash hash;
38  int hashbits;
39  uint32_t verification;
40  const char * name;
41  const char * desc;
42};
43
44HashInfo g_hashes[] =
45{
46  { DoNothingHash,        32, 0x00000000, "donothing32", "Do-Nothing function (only valid for measuring call overhead)" },
47  { DoNothingHash,        64, 0x00000000, "donothing64", "Do-Nothing function (only valid for measuring call overhead)" },
48  { DoNothingHash,       128, 0x00000000, "donothing128", "Do-Nothing function (only valid for measuring call overhead)" },
49
50  { crc32,                32, 0x3719DB20, "crc32",       "CRC-32" },
51
52  { md5_32,               32, 0xC10C356B, "md5_32a",     "MD5, first 32 bits of result" },
53  { sha1_32a,             32, 0xF9376EA7, "sha1_32a",    "SHA1, first 32 bits of result" },
54
55  { FNV,                  32, 0xE3CBBE91, "FNV",         "Fowler-Noll-Vo hash, 32-bit" },
56  { Bernstein,            32, 0xBDB4B640, "bernstein",   "Bernstein, 32-bit" },
57  { lookup3_test,         32, 0x3D83917A, "lookup3",     "Bob Jenkins' lookup3" },
58  { SuperFastHash,        32, 0x980ACD1D, "superfast",   "Paul Hsieh's SuperFastHash" },
59  { MurmurOAAT_test,      32, 0x5363BD98, "MurmurOAAT",  "Murmur one-at-a-time" },
60  { Crap8_test,           32, 0x743E97A1, "Crap8",       "Crap8" },
61
62  { CityHash64_test,      64, 0x25A20825, "City64",      "Google CityHash64WithSeed" },
63  { CityHash128_test,    128, 0x6531F54E, "City128",     "Google CityHash128WithSeed" },
64
65  { SpookyHash32_test,    32, 0x3F798BBB, "Spooky32",    "Bob Jenkins' SpookyHash, 32-bit result" },
66  { SpookyHash64_test,    64, 0xA7F955F1, "Spooky64",    "Bob Jenkins' SpookyHash, 64-bit result" },
67  { SpookyHash128_test,  128, 0x8D263080, "Spooky128",   "Bob Jenkins' SpookyHash, 128-bit result" },
68
69  // MurmurHash2
70
71  { MurmurHash2_test,     32, 0x27864C1E, "Murmur2",     "MurmurHash2 for x86, 32-bit" },
72  { MurmurHash2A_test,    32, 0x7FBD4396, "Murmur2A",    "MurmurHash2A for x86, 32-bit" },
73  { MurmurHash64A_test,   64, 0x1F0D3804, "Murmur2B",    "MurmurHash2 for x64, 64-bit" },
74  { MurmurHash64B_test,   64, 0xDD537C05, "Murmur2C",    "MurmurHash2 for x86, 64-bit" },
75
76  // MurmurHash3
77
78  { MurmurHash3_x86_32,   32, 0xB0F57EE3, "Murmur3A",    "MurmurHash3 for x86, 32-bit" },
79  { MurmurHash3_x86_128, 128, 0xB3ECE62A, "Murmur3C",    "MurmurHash3 for x86, 128-bit" },
80  { MurmurHash3_x64_128, 128, 0x6384BA69, "Murmur3F",    "MurmurHash3 for x64, 128-bit" },
81
82  { PMurHash32_test,      32, 0xB0F57EE3, "PMurHash32",  "Shane Day's portable-ized MurmurHash3 for x86, 32-bit." },
83};
84
85HashInfo * findHash ( const char * name )
86{
87  for(size_t i = 0; i < sizeof(g_hashes) / sizeof(HashInfo); i++)
88  {
89    if(_stricmp(name,g_hashes[i].name) == 0) return &g_hashes[i];
90  }
91
92  return NULL;
93}
94
95//-----------------------------------------------------------------------------
96// Self-test on startup - verify that all installed hashes work correctly.
97
98void SelfTest ( void )
99{
100  bool pass = true;
101
102  for(size_t i = 0; i < sizeof(g_hashes) / sizeof(HashInfo); i++)
103  {
104    HashInfo * info = & g_hashes[i];
105
106    pass &= VerificationTest(info->hash,info->hashbits,info->verification,false);
107  }
108
109  if(!pass)
110  {
111    printf("Self-test FAILED!\n");
112
113    for(size_t i = 0; i < sizeof(g_hashes) / sizeof(HashInfo); i++)
114    {
115      HashInfo * info = & g_hashes[i];
116
117      printf("%16s - ",info->name);
118      pass &= VerificationTest(info->hash,info->hashbits,info->verification,true);
119    }
120
121    exit(1);
122  }
123}
124
125//----------------------------------------------------------------------------
126
127template < typename hashtype >
128void test ( hashfunc<hashtype> hash, HashInfo * info )
129{
130  const int hashbits = sizeof(hashtype) * 8;
131
132  printf("-------------------------------------------------------------------------------\n");
133  printf("--- Testing %s (%s)\n\n",info->name,info->desc);
134
135  //-----------------------------------------------------------------------------
136  // Sanity tests
137
138  if(g_testSanity || g_testAll)
139  {
140    printf("[[[ Sanity Tests ]]]\n\n");
141
142    VerificationTest(hash,hashbits,info->verification,true);
143    SanityTest(hash,hashbits);
144    AppendedZeroesTest(hash,hashbits);
145    printf("\n");
146  }
147
148  //-----------------------------------------------------------------------------
149  // Speed tests
150
151  if(g_testSpeed || g_testAll)
152  {
153    printf("[[[ Speed Tests ]]]\n\n");
154
155    BulkSpeedTest(info->hash,info->verification);
156    printf("\n");
157
158    for(int i = 1; i < 32; i++)
159    {
160      double cycles;
161
162      TinySpeedTest(hashfunc<hashtype>(info->hash),sizeof(hashtype),i,info->verification,true,cycles);
163    }
164
165    printf("\n");
166  }
167
168  //-----------------------------------------------------------------------------
169  // Differential tests
170
171  if(g_testDiff || g_testAll)
172  {
173    printf("[[[ Differential Tests ]]]\n\n");
174
175    bool result = true;
176    bool dumpCollisions = false;
177
178    result &= DiffTest< Blob<64>,  hashtype >(hash,5,1000,dumpCollisions);
179    result &= DiffTest< Blob<128>, hashtype >(hash,4,1000,dumpCollisions);
180    result &= DiffTest< Blob<256>, hashtype >(hash,3,1000,dumpCollisions);
181
182    if(!result) printf("*********FAIL*********\n");
183    printf("\n");
184  }
185
186  //-----------------------------------------------------------------------------
187  // Differential-distribution tests
188
189  if(g_testDiffDist /*|| g_testAll*/)
190  {
191    printf("[[[ Differential Distribution Tests ]]]\n\n");
192
193    bool result = true;
194
195    result &= DiffDistTest2<uint64_t,hashtype>(hash);
196
197    printf("\n");
198  }
199
200  //-----------------------------------------------------------------------------
201  // Avalanche tests
202
203  if(g_testAvalanche || g_testAll)
204  {
205    printf("[[[ Avalanche Tests ]]]\n\n");
206
207    bool result = true;
208
209    result &= AvalancheTest< Blob< 32>, hashtype > (hash,300000);
210    result &= AvalancheTest< Blob< 40>, hashtype > (hash,300000);
211    result &= AvalancheTest< Blob< 48>, hashtype > (hash,300000);
212    result &= AvalancheTest< Blob< 56>, hashtype > (hash,300000);
213
214    result &= AvalancheTest< Blob< 64>, hashtype > (hash,300000);
215    result &= AvalancheTest< Blob< 72>, hashtype > (hash,300000);
216    result &= AvalancheTest< Blob< 80>, hashtype > (hash,300000);
217    result &= AvalancheTest< Blob< 88>, hashtype > (hash,300000);
218
219    result &= AvalancheTest< Blob< 96>, hashtype > (hash,300000);
220    result &= AvalancheTest< Blob<104>, hashtype > (hash,300000);
221    result &= AvalancheTest< Blob<112>, hashtype > (hash,300000);
222    result &= AvalancheTest< Blob<120>, hashtype > (hash,300000);
223
224    result &= AvalancheTest< Blob<128>, hashtype > (hash,300000);
225    result &= AvalancheTest< Blob<136>, hashtype > (hash,300000);
226    result &= AvalancheTest< Blob<144>, hashtype > (hash,300000);
227    result &= AvalancheTest< Blob<152>, hashtype > (hash,300000);
228
229    if(!result) printf("*********FAIL*********\n");
230    printf("\n");
231  }
232
233  //-----------------------------------------------------------------------------
234  // Bit Independence Criteria. Interesting, but doesn't tell us much about
235  // collision or distribution.
236
237  if(g_testBIC)
238  {
239    printf("[[[ Bit Independence Criteria ]]]\n\n");
240
241    bool result = true;
242
243    //result &= BicTest<uint64_t,hashtype>(hash,2000000);
244    BicTest3<Blob<88>,hashtype>(hash,2000000);
245
246    if(!result) printf("*********FAIL*********\n");
247    printf("\n");
248  }
249
250  //-----------------------------------------------------------------------------
251  // Keyset 'Cyclic' - keys of the form "abcdabcdabcd..."
252
253  if(g_testCyclic || g_testAll)
254  {
255    printf("[[[ Keyset 'Cyclic' Tests ]]]\n\n");
256
257    bool result = true;
258    bool drawDiagram = false;
259
260    result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+0,8,10000000,drawDiagram);
261    result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+1,8,10000000,drawDiagram);
262    result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+2,8,10000000,drawDiagram);
263    result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+3,8,10000000,drawDiagram);
264    result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+4,8,10000000,drawDiagram);
265
266    if(!result) printf("*********FAIL*********\n");
267    printf("\n");
268  }
269
270  //-----------------------------------------------------------------------------
271  // Keyset 'TwoBytes' - all keys up to N bytes containing two non-zero bytes
272
273  // This generates some huge keysets, 128-bit tests will take ~1.3 gigs of RAM.
274
275  if(g_testTwoBytes || g_testAll)
276  {
277    printf("[[[ Keyset 'TwoBytes' Tests ]]]\n\n");
278
279    bool result = true;
280    bool drawDiagram = false;
281
282    for(int i = 4; i <= 20; i += 4)
283    {
284      result &= TwoBytesTest2<hashtype>(hash,i,drawDiagram);
285    }
286
287    if(!result) printf("*********FAIL*********\n");
288    printf("\n");
289  }
290
291  //-----------------------------------------------------------------------------
292  // Keyset 'Sparse' - keys with all bits 0 except a few
293
294  if(g_testSparse || g_testAll)
295  {
296    printf("[[[ Keyset 'Sparse' Tests ]]]\n\n");
297
298    bool result = true;
299    bool drawDiagram = false;
300
301    result &= SparseKeyTest<  32,hashtype>(hash,6,true,true,true,drawDiagram);
302    result &= SparseKeyTest<  40,hashtype>(hash,6,true,true,true,drawDiagram);
303    result &= SparseKeyTest<  48,hashtype>(hash,5,true,true,true,drawDiagram);
304    result &= SparseKeyTest<  56,hashtype>(hash,5,true,true,true,drawDiagram);
305    result &= SparseKeyTest<  64,hashtype>(hash,5,true,true,true,drawDiagram);
306    result &= SparseKeyTest<  96,hashtype>(hash,4,true,true,true,drawDiagram);
307    result &= SparseKeyTest< 256,hashtype>(hash,3,true,true,true,drawDiagram);
308    result &= SparseKeyTest<2048,hashtype>(hash,2,true,true,true,drawDiagram);
309
310    if(!result) printf("*********FAIL*********\n");
311    printf("\n");
312  }
313
314  //-----------------------------------------------------------------------------
315  // Keyset 'Permutation' - all possible combinations of a set of blocks
316
317  if(g_testPermutation || g_testAll)
318  {
319    {
320      // This one breaks lookup3, surprisingly
321
322      printf("[[[ Keyset 'Combination Lowbits' Tests ]]]\n\n");
323
324      bool result = true;
325      bool drawDiagram = false;
326
327      uint32_t blocks[] =
328      {
329        0x00000000,
330
331        0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007,
332      };
333
334      result &= CombinationKeyTest<hashtype>(hash,8,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
335
336      if(!result) printf("*********FAIL*********\n");
337      printf("\n");
338    }
339
340    {
341      printf("[[[ Keyset 'Combination Highbits' Tests ]]]\n\n");
342
343      bool result = true;
344      bool drawDiagram = false;
345
346      uint32_t blocks[] =
347      {
348        0x00000000,
349
350        0x20000000, 0x40000000, 0x60000000, 0x80000000, 0xA0000000, 0xC0000000, 0xE0000000
351      };
352
353      result &= CombinationKeyTest<hashtype>(hash,8,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
354
355      if(!result) printf("*********FAIL*********\n");
356      printf("\n");
357    }
358
359    {
360      printf("[[[ Keyset 'Combination 0x8000000' Tests ]]]\n\n");
361
362      bool result = true;
363      bool drawDiagram = false;
364
365      uint32_t blocks[] =
366      {
367        0x00000000,
368
369        0x80000000,
370      };
371
372      result &= CombinationKeyTest<hashtype>(hash,20,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
373
374      if(!result) printf("*********FAIL*********\n");
375      printf("\n");
376    }
377
378    {
379      printf("[[[ Keyset 'Combination 0x0000001' Tests ]]]\n\n");
380
381      bool result = true;
382      bool drawDiagram = false;
383
384      uint32_t blocks[] =
385      {
386        0x00000000,
387
388        0x00000001,
389      };
390
391      result &= CombinationKeyTest<hashtype>(hash,20,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
392
393      if(!result) printf("*********FAIL*********\n");
394      printf("\n");
395    }
396
397    {
398      printf("[[[ Keyset 'Combination Hi-Lo' Tests ]]]\n\n");
399
400      bool result = true;
401      bool drawDiagram = false;
402
403      uint32_t blocks[] =
404      {
405        0x00000000,
406
407        0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007,
408
409        0x80000000, 0x40000000, 0xC0000000, 0x20000000, 0xA0000000, 0x60000000, 0xE0000000
410      };
411
412      result &= CombinationKeyTest<hashtype>(hash,6,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
413
414      if(!result) printf("*********FAIL*********\n");
415      printf("\n");
416    }
417  }
418
419  //-----------------------------------------------------------------------------
420  // Keyset 'Window'
421
422  // Skip distribution test for these - they're too easy to distribute well,
423  // and it generates a _lot_ of testing
424
425  if(g_testWindow || g_testAll)
426  {
427    printf("[[[ Keyset 'Window' Tests ]]]\n\n");
428
429    bool result = true;
430    bool testCollision = true;
431    bool testDistribution = false;
432    bool drawDiagram = false;
433
434    result &= WindowedKeyTest< Blob<hashbits*2>, hashtype > ( hash, 20, testCollision, testDistribution, drawDiagram );
435
436    if(!result) printf("*********FAIL*********\n");
437    printf("\n");
438  }
439
440  //-----------------------------------------------------------------------------
441  // Keyset 'Text'
442
443  if(g_testText || g_testAll)
444  {
445    printf("[[[ Keyset 'Text' Tests ]]]\n\n");
446
447    bool result = true;
448    bool drawDiagram = false;
449
450    const char * alnum = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
451
452    result &= TextKeyTest( hash, "Foo",    alnum,4, "Bar",    drawDiagram );
453    result &= TextKeyTest( hash, "FooBar", alnum,4, "",       drawDiagram );
454    result &= TextKeyTest( hash, "",       alnum,4, "FooBar", drawDiagram );
455
456    if(!result) printf("*********FAIL*********\n");
457    printf("\n");
458  }
459
460  //-----------------------------------------------------------------------------
461  // Keyset 'Zeroes'
462
463  if(g_testZeroes || g_testAll)
464  {
465    printf("[[[ Keyset 'Zeroes' Tests ]]]\n\n");
466
467    bool result = true;
468    bool drawDiagram = false;
469
470    result &= ZeroKeyTest<hashtype>( hash, drawDiagram );
471
472    if(!result) printf("*********FAIL*********\n");
473    printf("\n");
474  }
475
476  //-----------------------------------------------------------------------------
477  // Keyset 'Seed'
478
479  if(g_testSeed || g_testAll)
480  {
481    printf("[[[ Keyset 'Seed' Tests ]]]\n\n");
482
483    bool result = true;
484    bool drawDiagram = false;
485
486    result &= SeedTest<hashtype>( hash, 1000000, drawDiagram );
487
488    if(!result) printf("*********FAIL*********\n");
489    printf("\n");
490  }
491}
492
493//-----------------------------------------------------------------------------
494
495uint32_t g_inputVCode = 1;
496uint32_t g_outputVCode = 1;
497uint32_t g_resultVCode = 1;
498
499HashInfo * g_hashUnderTest = NULL;
500
501void VerifyHash ( const void * key, int len, uint32_t seed, void * out )
502{
503  g_inputVCode = MurmurOAAT(key,len,g_inputVCode);
504  g_inputVCode = MurmurOAAT(&seed,sizeof(uint32_t),g_inputVCode);
505
506  g_hashUnderTest->hash(key,len,seed,out);
507
508  g_outputVCode = MurmurOAAT(out,g_hashUnderTest->hashbits/8,g_outputVCode);
509}
510
511//-----------------------------------------------------------------------------
512
513void testHash ( const char * name )
514{
515  HashInfo * pInfo = findHash(name);
516
517  if(pInfo == NULL)
518  {
519    printf("Invalid hash '%s' specified\n",name);
520    return;
521  }
522  else
523  {
524    g_hashUnderTest = pInfo;
525
526    if(pInfo->hashbits == 32)
527    {
528      test<uint32_t>( VerifyHash, pInfo );
529    }
530    else if(pInfo->hashbits == 64)
531    {
532      test<uint64_t>( pInfo->hash, pInfo );
533    }
534    else if(pInfo->hashbits == 128)
535    {
536      test<uint128_t>( pInfo->hash, pInfo );
537    }
538    else if(pInfo->hashbits == 256)
539    {
540      test<uint256_t>( pInfo->hash, pInfo );
541    }
542    else
543    {
544      printf("Invalid hash bit width %d for hash '%s'",pInfo->hashbits,pInfo->name);
545    }
546  }
547}
548//-----------------------------------------------------------------------------
549
550int main ( int argc, char ** argv )
551{
552  const char * hashToTest = "murmur3a";
553
554  if(argc < 2)
555  {
556    printf("(No test hash given on command line, testing Murmur3_x86_32.)\n");
557  }
558  else
559  {
560    hashToTest = argv[1];
561  }
562
563  // Code runs on the 3rd CPU by default
564
565  SetAffinity((1 << 2));
566
567  SelfTest();
568
569  int timeBegin = clock();
570
571  g_testAll = true;
572
573  //g_testSanity = true;
574  //g_testSpeed = true;
575  //g_testAvalanche = true;
576  //g_testBIC = true;
577  //g_testCyclic = true;
578  //g_testTwoBytes = true;
579  //g_testDiff = true;
580  //g_testDiffDist = true;
581  //g_testSparse = true;
582  //g_testPermutation = true;
583  //g_testWindow = true;
584  //g_testZeroes = true;
585
586  testHash(hashToTest);
587
588  //----------
589
590  int timeEnd = clock();
591
592  printf("\n");
593  printf("Input vcode 0x%08x, Output vcode 0x%08x, Result vcode 0x%08x\n",g_inputVCode,g_outputVCode,g_resultVCode);
594  printf("Verification value is 0x%08x - Testing took %f seconds\n",g_verify,double(timeEnd-timeBegin)/double(CLOCKS_PER_SEC));
595  printf("-------------------------------------------------------------------------------\n");
596  return 0;
597}
598