1//-----------------------------------------------------------------------------
2// Keyset tests generate various sorts of difficult-to-hash keysets and compare
3// the distribution and collision frequency of the hash results against an
4// ideal random distribution
5
6// The sanity checks are also in this cpp/h
7
8#pragma once
9
10#include "Types.h"
11#include "Stats.h"
12#include "Random.h"   // for rand_p
13
14#include <algorithm>  // for std::swap
15#include <assert.h>
16
17//-----------------------------------------------------------------------------
18// Sanity tests
19
20bool VerificationTest   ( pfHash hash, const int hashbits, uint32_t expected, bool verbose );
21bool SanityTest         ( pfHash hash, const int hashbits );
22void AppendedZeroesTest ( pfHash hash, const int hashbits );
23
24//-----------------------------------------------------------------------------
25// Keyset 'Combination' - all possible combinations of input blocks
26
27template< typename hashtype >
28void CombinationKeygenRecurse ( uint32_t * key, int len, int maxlen,
29                  uint32_t * blocks, int blockcount,
30                pfHash hash, std::vector<hashtype> & hashes )
31{
32  if(len == maxlen) return;
33
34  for(int i = 0; i < blockcount; i++)
35  {
36    key[len] = blocks[i];
37
38    //if(len == maxlen-1)
39    {
40      hashtype h;
41      hash(key,(len+1) * sizeof(uint32_t),0,&h);
42      hashes.push_back(h);
43    }
44
45    //else
46    {
47      CombinationKeygenRecurse(key,len+1,maxlen,blocks,blockcount,hash,hashes);
48    }
49  }
50}
51
52template< typename hashtype >
53bool CombinationKeyTest ( hashfunc<hashtype> hash, int maxlen, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram )
54{
55  printf("Keyset 'Combination' - up to %d blocks from a set of %d - ",maxlen,blockcount);
56
57  //----------
58
59  std::vector<hashtype> hashes;
60
61  uint32_t * key = new uint32_t[maxlen];
62
63  CombinationKeygenRecurse<hashtype>(key,0,maxlen,blocks,blockcount,hash,hashes);
64
65  delete [] key;
66
67  printf("%d keys\n",(int)hashes.size());
68
69  //----------
70
71  bool result = true;
72
73  result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
74
75  printf("\n");
76
77  return result;
78}
79
80//----------------------------------------------------------------------------
81// Keyset 'Permutation' - given a set of 32-bit blocks, generate keys
82// consisting of all possible permutations of those blocks
83
84template< typename hashtype >
85void PermutationKeygenRecurse ( pfHash hash, uint32_t * blocks, int blockcount, int k, std::vector<hashtype> & hashes )
86{
87  if(k == blockcount-1)
88  {
89    hashtype h;
90
91    hash(blocks,blockcount * sizeof(uint32_t),0,&h);
92
93    hashes.push_back(h);
94
95    return;
96  }
97
98  for(int i = k; i < blockcount; i++)
99  {
100    std::swap(blocks[k],blocks[i]);
101
102    PermutationKeygenRecurse(hash,blocks,blockcount,k+1,hashes);
103
104    std::swap(blocks[k],blocks[i]);
105  }
106}
107
108template< typename hashtype >
109bool PermutationKeyTest ( hashfunc<hashtype> hash, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram )
110{
111  printf("Keyset 'Permutation' - %d blocks - ",blockcount);
112
113  //----------
114
115  std::vector<hashtype> hashes;
116
117  PermutationKeygenRecurse<hashtype>(hash,blocks,blockcount,0,hashes);
118
119  printf("%d keys\n",(int)hashes.size());
120
121  //----------
122
123  bool result = true;
124
125  result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
126
127  printf("\n");
128
129  return result;
130}
131
132//-----------------------------------------------------------------------------
133// Keyset 'Sparse' - generate all possible N-bit keys with up to K bits set
134
135template < typename keytype, typename hashtype >
136void SparseKeygenRecurse ( pfHash hash, int start, int bitsleft, bool inclusive, keytype & k, std::vector<hashtype> & hashes )
137{
138  const int nbytes = sizeof(keytype);
139  const int nbits = nbytes * 8;
140
141  hashtype h;
142
143  for(int i = start; i < nbits; i++)
144  {
145    flipbit(&k,nbytes,i);
146
147    if(inclusive || (bitsleft == 1))
148    {
149      hash(&k,sizeof(keytype),0,&h);
150      hashes.push_back(h);
151    }
152
153    if(bitsleft > 1)
154    {
155      SparseKeygenRecurse(hash,i+1,bitsleft-1,inclusive,k,hashes);
156    }
157
158    flipbit(&k,nbytes,i);
159  }
160}
161
162//----------
163
164template < int keybits, typename hashtype >
165bool SparseKeyTest ( hashfunc<hashtype> hash, const int setbits, bool inclusive, bool testColl, bool testDist, bool drawDiagram  )
166{
167  printf("Keyset 'Sparse' - %d-bit keys with %s %d bits set - ",keybits, inclusive ? "up to" : "exactly", setbits);
168
169  typedef Blob<keybits> keytype;
170
171  std::vector<hashtype> hashes;
172
173  keytype k;
174  memset(&k,0,sizeof(k));
175
176  if(inclusive)
177  {
178    hashtype h;
179
180    hash(&k,sizeof(keytype),0,&h);
181
182    hashes.push_back(h);
183  }
184
185  SparseKeygenRecurse(hash,0,setbits,inclusive,k,hashes);
186
187  printf("%d keys\n",(int)hashes.size());
188
189  bool result = true;
190
191  result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
192
193  printf("\n");
194
195  return result;
196}
197
198//-----------------------------------------------------------------------------
199// Keyset 'Windows' - for all possible N-bit windows of a K-bit key, generate
200// all possible keys with bits set in that window
201
202template < typename keytype, typename hashtype >
203bool WindowedKeyTest ( hashfunc<hashtype> hash, const int windowbits, bool testCollision, bool testDistribution, bool drawDiagram )
204{
205  const int keybits = sizeof(keytype) * 8;
206  const int keycount = 1 << windowbits;
207
208  std::vector<hashtype> hashes;
209  hashes.resize(keycount);
210
211  bool result = true;
212
213  int testcount = keybits;
214
215  printf("Keyset 'Windowed' - %3d-bit key, %3d-bit window - %d tests, %d keys per test\n",keybits,windowbits,testcount,keycount);
216
217  for(int j = 0; j <= testcount; j++)
218  {
219    int minbit = j;
220
221    keytype key;
222
223    for(int i = 0; i < keycount; i++)
224    {
225      key = i;
226      //key = key << minbit;
227
228      lrot(&key,sizeof(keytype),minbit);
229
230      hash(&key,sizeof(keytype),0,&hashes[i]);
231    }
232
233    printf("Window at %3d - ",j);
234
235    result &= TestHashList(hashes,testCollision,testDistribution,drawDiagram);
236
237    //printf("\n");
238  }
239
240  return result;
241}
242
243//-----------------------------------------------------------------------------
244// Keyset 'Cyclic' - generate keys that consist solely of N repetitions of M
245// bytes.
246
247// (This keyset type is designed to make MurmurHash2 fail)
248
249template < typename hashtype >
250bool CyclicKeyTest ( pfHash hash, int cycleLen, int cycleReps, const int keycount, bool drawDiagram )
251{
252  printf("Keyset 'Cyclic' - %d cycles of %d bytes - %d keys\n",cycleReps,cycleLen,keycount);
253
254  Rand r(483723);
255
256  std::vector<hashtype> hashes;
257  hashes.resize(keycount);
258
259  int keyLen = cycleLen * cycleReps;
260
261  uint8_t * cycle = new uint8_t[cycleLen + 16];
262  uint8_t * key = new uint8_t[keyLen];
263
264  //----------
265
266  for(int i = 0; i < keycount; i++)
267  {
268    r.rand_p(cycle,cycleLen);
269
270    *(uint32_t*)cycle = f3mix(i ^ 0x746a94f1);
271
272    for(int j = 0; j < keyLen; j++)
273    {
274      key[j] = cycle[j % cycleLen];
275    }
276
277    hash(key,keyLen,0,&hashes[i]);
278  }
279
280  //----------
281
282  bool result = true;
283
284  result &= TestHashList(hashes,true,true,drawDiagram);
285  printf("\n");
286
287  delete [] cycle;
288  delete [] key;
289
290  return result;
291}
292
293//-----------------------------------------------------------------------------
294// Keyset 'TwoBytes' - generate all keys up to length N with two non-zero bytes
295
296void TwoBytesKeygen ( int maxlen, KeyCallback & c );
297
298template < typename hashtype >
299bool TwoBytesTest2 ( pfHash hash, int maxlen, bool drawDiagram )
300{
301  std::vector<hashtype> hashes;
302
303  HashCallback<hashtype> c(hash,hashes);
304
305  TwoBytesKeygen(maxlen,c);
306
307  bool result = true;
308
309  result &= TestHashList(hashes,true,true,drawDiagram);
310  printf("\n");
311
312  return result;
313}
314
315//-----------------------------------------------------------------------------
316// Keyset 'Text' - generate all keys of the form "prefix"+"core"+"suffix",
317// where "core" consists of all possible combinations of the given character
318// set of length N.
319
320template < typename hashtype >
321bool TextKeyTest ( hashfunc<hashtype> hash, const char * prefix, const char * coreset, const int corelen, const char * suffix, bool drawDiagram )
322{
323  const int prefixlen = (int)strlen(prefix);
324  const int suffixlen = (int)strlen(suffix);
325  const int corecount = (int)strlen(coreset);
326
327  const int keybytes = prefixlen + corelen + suffixlen;
328  const int keycount = (int)pow(double(corecount),double(corelen));
329
330  printf("Keyset 'Text' - keys of form \"%s[",prefix);
331  for(int i = 0; i < corelen; i++) printf("X");
332  printf("]%s\" - %d keys\n",suffix,keycount);
333
334  uint8_t * key = new uint8_t[keybytes+1];
335
336  key[keybytes] = 0;
337
338  memcpy(key,prefix,prefixlen);
339  memcpy(key+prefixlen+corelen,suffix,suffixlen);
340
341  //----------
342
343  std::vector<hashtype> hashes;
344  hashes.resize(keycount);
345
346  for(int i = 0; i < keycount; i++)
347  {
348    int t = i;
349
350    for(int j = 0; j < corelen; j++)
351    {
352      key[prefixlen+j] = coreset[t % corecount]; t /= corecount;
353    }
354
355    hash(key,keybytes,0,&hashes[i]);
356  }
357
358  //----------
359
360  bool result = true;
361
362  result &= TestHashList(hashes,true,true,drawDiagram);
363
364  printf("\n");
365
366  delete [] key;
367
368  return result;
369}
370
371//-----------------------------------------------------------------------------
372// Keyset 'Zeroes' - keys consisting of all zeroes, differing only in length
373
374// We reuse one block of empty bytes, otherwise the RAM cost is enormous.
375
376template < typename hashtype >
377bool ZeroKeyTest ( pfHash hash, bool drawDiagram )
378{
379  int keycount = 64*1024;
380
381  printf("Keyset 'Zeroes' - %d keys\n",keycount);
382
383  unsigned char * nullblock = new unsigned char[keycount];
384  memset(nullblock,0,keycount);
385
386  //----------
387
388  std::vector<hashtype> hashes;
389
390  hashes.resize(keycount);
391
392  for(int i = 0; i < keycount; i++)
393  {
394    hash(nullblock,i,0,&hashes[i]);
395  }
396
397  bool result = true;
398
399  result &= TestHashList(hashes,true,true,drawDiagram);
400
401  printf("\n");
402
403  delete [] nullblock;
404
405  return result;
406}
407
408//-----------------------------------------------------------------------------
409// Keyset 'Seed' - hash "the quick brown fox..." using different seeds
410
411template < typename hashtype >
412bool SeedTest ( pfHash hash, int keycount, bool drawDiagram )
413{
414  printf("Keyset 'Seed' - %d keys\n",keycount);
415
416  const char * text = "The quick brown fox jumps over the lazy dog";
417  const int len = (int)strlen(text);
418
419  //----------
420
421  std::vector<hashtype> hashes;
422
423  hashes.resize(keycount);
424
425  for(int i = 0; i < keycount; i++)
426  {
427    hash(text,len,i,&hashes[i]);
428  }
429
430  bool result = true;
431
432  result &= TestHashList(hashes,true,true,drawDiagram);
433
434  printf("\n");
435
436  return result;
437}
438
439//-----------------------------------------------------------------------------
440