1//-----------------------------------------------------------------------------
2// Flipping a single bit of a key should cause an "avalanche" of changes in
3// the hash function's output. Ideally, each output bits should flip 50% of
4// the time - if the probability of an output bit flipping is not 50%, that bit
5// is "biased". Too much bias means that patterns applied to the input will
6// cause "echoes" of the patterns in the output, which in turn can cause the
7// hash function to fail to create an even, random distribution of hash values.
8
9
10#pragma once
11
12#include "Types.h"
13#include "Random.h"
14
15#include <vector>
16#include <stdio.h>
17#include <math.h>
18
19// Avalanche fails if a bit is biased by more than 1%
20
21#define AVALANCHE_FAIL 0.01
22
23double maxBias ( std::vector<int> & counts, int reps );
24
25//-----------------------------------------------------------------------------
26
27template < typename keytype, typename hashtype >
28void calcBias ( pfHash hash, std::vector<int> & counts, int reps, Rand & r )
29{
30  const int keybytes = sizeof(keytype);
31  const int hashbytes = sizeof(hashtype);
32
33  const int keybits = keybytes * 8;
34  const int hashbits = hashbytes * 8;
35
36  keytype K;
37  hashtype A,B;
38
39  for(int irep = 0; irep < reps; irep++)
40  {
41    if(irep % (reps/10) == 0) printf(".");
42
43    r.rand_p(&K,keybytes);
44
45    hash(&K,keybytes,0,&A);
46
47    int * cursor = &counts[0];
48
49    for(int iBit = 0; iBit < keybits; iBit++)
50    {
51      flipbit(&K,keybytes,iBit);
52      hash(&K,keybytes,0,&B);
53      flipbit(&K,keybytes,iBit);
54
55      for(int iOut = 0; iOut < hashbits; iOut++)
56      {
57        int bitA = getbit(&A,hashbytes,iOut);
58        int bitB = getbit(&B,hashbytes,iOut);
59
60        (*cursor++) += (bitA ^ bitB);
61      }
62    }
63  }
64}
65
66//-----------------------------------------------------------------------------
67
68template < typename keytype, typename hashtype >
69bool AvalancheTest ( pfHash hash, const int reps )
70{
71  Rand r(48273);
72
73  const int keybytes = sizeof(keytype);
74  const int hashbytes = sizeof(hashtype);
75
76  const int keybits = keybytes * 8;
77  const int hashbits = hashbytes * 8;
78
79  printf("Testing %3d-bit keys -> %3d-bit hashes, %8d reps",keybits,hashbits,reps);
80
81  //----------
82
83  std::vector<int> bins(keybits*hashbits,0);
84
85  calcBias<keytype,hashtype>(hash,bins,reps,r);
86
87  //----------
88
89  bool result = true;
90
91  double b = maxBias(bins,reps);
92
93  printf(" worst bias is %f%%",b * 100.0);
94
95  if(b > AVALANCHE_FAIL)
96  {
97    printf(" !!!!! ");
98    result = false;
99  }
100
101  printf("\n");
102
103  return result;
104}
105
106//----------------------------------------------------------------------------
107// Tests the Bit Independence Criteron. Stricter than Avalanche, but slow and
108// not really all that useful.
109
110template< typename keytype, typename hashtype >
111void BicTest ( pfHash hash, const int keybit, const int reps, double & maxBias, int & maxA, int & maxB, bool verbose )
112{
113  Rand r(11938);
114
115  const int keybytes = sizeof(keytype);
116  const int hashbytes = sizeof(hashtype);
117  const int hashbits = hashbytes * 8;
118
119  std::vector<int> bins(hashbits*hashbits*4,0);
120
121  keytype key;
122  hashtype h1,h2;
123
124  for(int irep = 0; irep < reps; irep++)
125  {
126    if(verbose)
127    {
128      if(irep % (reps/10) == 0) printf(".");
129    }
130
131    r.rand_p(&key,keybytes);
132    hash(&key,keybytes,0,&h1);
133
134    flipbit(key,keybit);
135    hash(&key,keybytes,0,&h2);
136
137    hashtype d = h1 ^ h2;
138
139    for(int out1 = 0; out1 < hashbits; out1++)
140    for(int out2 = 0; out2 < hashbits; out2++)
141    {
142      if(out1 == out2) continue;
143
144      uint32_t b = getbit(d,out1) | (getbit(d,out2) << 1);
145
146      bins[(out1 * hashbits + out2) * 4 + b]++;
147    }
148  }
149
150  if(verbose) printf("\n");
151
152  maxBias = 0;
153
154  for(int out1 = 0; out1 < hashbits; out1++)
155  {
156    for(int out2 = 0; out2 < hashbits; out2++)
157    {
158      if(out1 == out2)
159      {
160        if(verbose) printf("\\");
161        continue;
162      }
163
164      double bias = 0;
165
166      for(int b = 0; b < 4; b++)
167      {
168        double b2 = double(bins[(out1 * hashbits + out2) * 4 + b]) / double(reps / 2);
169        b2 = fabs(b2 * 2 - 1);
170
171        if(b2 > bias) bias = b2;
172      }
173
174      if(bias > maxBias)
175      {
176        maxBias = bias;
177        maxA = out1;
178        maxB = out2;
179      }
180
181      if(verbose)
182      {
183        if     (bias < 0.01) printf(".");
184        else if(bias < 0.05) printf("o");
185        else if(bias < 0.33) printf("O");
186        else                 printf("X");
187      }
188    }
189
190    if(verbose) printf("\n");
191  }
192}
193
194//----------
195
196template< typename keytype, typename hashtype >
197bool BicTest ( pfHash hash, const int reps )
198{
199  const int keybytes = sizeof(keytype);
200  const int keybits = keybytes * 8;
201
202  double maxBias = 0;
203  int maxK = 0;
204  int maxA = 0;
205  int maxB = 0;
206
207  for(int i = 0; i < keybits; i++)
208  {
209    if(i % (keybits/10) == 0) printf(".");
210
211    double bias;
212    int a,b;
213
214    BicTest<keytype,hashtype>(hash,i,reps,bias,a,b,true);
215
216    if(bias > maxBias)
217    {
218      maxBias = bias;
219      maxK = i;
220      maxA = a;
221      maxB = b;
222    }
223  }
224
225  printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
226
227  // Bit independence is harder to pass than avalanche, so we're a bit more lax here.
228
229  bool result = (maxBias < 0.05);
230
231  return result;
232}
233
234//-----------------------------------------------------------------------------
235// BIC test variant - store all intermediate data in a table, draw diagram
236// afterwards (much faster)
237
238template< typename keytype, typename hashtype >
239void BicTest3 ( pfHash hash, const int reps, bool verbose = true )
240{
241  const int keybytes = sizeof(keytype);
242  const int keybits = keybytes * 8;
243  const int hashbytes = sizeof(hashtype);
244  const int hashbits = hashbytes * 8;
245  const int pagesize = hashbits*hashbits*4;
246
247  Rand r(11938);
248
249  double maxBias = 0;
250  int maxK = 0;
251  int maxA = 0;
252  int maxB = 0;
253
254  keytype key;
255  hashtype h1,h2;
256
257  std::vector<int> bins(keybits*pagesize,0);
258
259  for(int keybit = 0; keybit < keybits; keybit++)
260  {
261    if(keybit % (keybits/10) == 0) printf(".");
262
263    int * page = &bins[keybit*pagesize];
264
265    for(int irep = 0; irep < reps; irep++)
266    {
267      r.rand_p(&key,keybytes);
268      hash(&key,keybytes,0,&h1);
269      flipbit(key,keybit);
270      hash(&key,keybytes,0,&h2);
271
272      hashtype d = h1 ^ h2;
273
274      for(int out1 = 0; out1 < hashbits-1; out1++)
275      for(int out2 = out1+1; out2 < hashbits; out2++)
276      {
277        int * b = &page[(out1*hashbits+out2)*4];
278
279        uint32_t x = getbit(d,out1) | (getbit(d,out2) << 1);
280
281        b[x]++;
282      }
283    }
284  }
285
286  printf("\n");
287
288  for(int out1 = 0; out1 < hashbits-1; out1++)
289  {
290    for(int out2 = out1+1; out2 < hashbits; out2++)
291    {
292      if(verbose) printf("(%3d,%3d) - ",out1,out2);
293
294      for(int keybit = 0; keybit < keybits; keybit++)
295      {
296        int * page = &bins[keybit*pagesize];
297        int * bins = &page[(out1*hashbits+out2)*4];
298
299        double bias = 0;
300
301        for(int b = 0; b < 4; b++)
302        {
303          double b2 = double(bins[b]) / double(reps / 2);
304          b2 = fabs(b2 * 2 - 1);
305
306          if(b2 > bias) bias = b2;
307        }
308
309        if(bias > maxBias)
310        {
311          maxBias = bias;
312          maxK = keybit;
313          maxA = out1;
314          maxB = out2;
315        }
316
317        if(verbose)
318        {
319          if     (bias < 0.01) printf(".");
320          else if(bias < 0.05) printf("o");
321          else if(bias < 0.33) printf("O");
322          else                 printf("X");
323        }
324      }
325
326      // Finished keybit
327
328      if(verbose) printf("\n");
329    }
330
331    if(verbose)
332    {
333      for(int i = 0; i < keybits+12; i++) printf("-");
334      printf("\n");
335    }
336  }
337
338  printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
339}
340
341
342//-----------------------------------------------------------------------------
343// BIC test variant - iterate over output bits, then key bits. No temp storage,
344// but slooooow
345
346template< typename keytype, typename hashtype >
347void BicTest2 ( pfHash hash, const int reps, bool verbose = true )
348{
349  const int keybytes = sizeof(keytype);
350  const int keybits = keybytes * 8;
351  const int hashbytes = sizeof(hashtype);
352  const int hashbits = hashbytes * 8;
353
354  Rand r(11938);
355
356  double maxBias = 0;
357  int maxK = 0;
358  int maxA = 0;
359  int maxB = 0;
360
361  keytype key;
362  hashtype h1,h2;
363
364  for(int out1 = 0; out1 < hashbits-1; out1++)
365  for(int out2 = out1+1; out2 < hashbits; out2++)
366  {
367    if(verbose) printf("(%3d,%3d) - ",out1,out2);
368
369    for(int keybit = 0; keybit < keybits; keybit++)
370    {
371      int bins[4] = { 0, 0, 0, 0 };
372
373      for(int irep = 0; irep < reps; irep++)
374      {
375        r.rand_p(&key,keybytes);
376        hash(&key,keybytes,0,&h1);
377        flipbit(key,keybit);
378        hash(&key,keybytes,0,&h2);
379
380        hashtype d = h1 ^ h2;
381
382        uint32_t b = getbit(d,out1) | (getbit(d,out2) << 1);
383
384        bins[b]++;
385      }
386
387      double bias = 0;
388
389      for(int b = 0; b < 4; b++)
390      {
391        double b2 = double(bins[b]) / double(reps / 2);
392        b2 = fabs(b2 * 2 - 1);
393
394        if(b2 > bias) bias = b2;
395      }
396
397      if(bias > maxBias)
398      {
399        maxBias = bias;
400        maxK = keybit;
401        maxA = out1;
402        maxB = out2;
403      }
404
405      if(verbose)
406      {
407        if     (bias < 0.05) printf(".");
408        else if(bias < 0.10) printf("o");
409        else if(bias < 0.50) printf("O");
410        else                 printf("X");
411      }
412    }
413
414    // Finished keybit
415
416    if(verbose) printf("\n");
417  }
418
419  printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
420}
421
422//-----------------------------------------------------------------------------
423