1/*
2 * Copyright (C) 2011 The Guava Authors
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.google.common.hash;
18
19import static org.junit.Assert.assertEquals;
20
21import com.google.common.base.Charsets;
22import com.google.common.collect.ImmutableSet;
23import com.google.common.collect.Sets;
24import com.google.common.jdk5backport.Arrays;
25import com.google.common.primitives.Ints;
26import com.google.common.testing.EqualsTester;
27
28import org.junit.Assert;
29
30import java.nio.charset.Charset;
31import java.util.Random;
32import java.util.Set;
33
34/**
35 * Various utilities for testing {@link HashFunction}s.
36 *
37 * @author Dimitris Andreou
38 * @author Kurt Alfred Kluever
39 */
40final class HashTestUtils {
41  private HashTestUtils() {}
42
43  /**
44   * Converts a string, which should contain only ascii-representable characters, to a byte[].
45   */
46  static byte[] ascii(String string) {
47    byte[] bytes = new byte[string.length()];
48    for (int i = 0; i < string.length(); i++) {
49      bytes[i] = (byte) string.charAt(i);
50    }
51    return bytes;
52  }
53
54  interface HashFn {
55    byte[] hash(byte[] input, int seed);
56  }
57
58  static void verifyHashFunction(HashFn hashFunction, int hashbits, int expected) {
59    int hashBytes = hashbits / 8;
60
61    byte[] key = new byte[256];
62    byte[] hashes = new byte[hashBytes * 256];
63
64    // Hash keys of the form {}, {0}, {0,1}, {0,1,2}... up to N=255,using 256-N as the seed
65    for (int i = 0; i < 256; i++) {
66      key[i] = (byte) i;
67      int seed = 256 - i;
68      byte[] hash = hashFunction.hash(Arrays.copyOf(key, i), seed);
69      System.arraycopy(hash, 0, hashes, i * hashBytes, hash.length);
70    }
71
72    // Then hash the result array
73    byte[] result = hashFunction.hash(hashes, 0);
74
75    // interpreted in little-endian order.
76    int verification = Integer.reverseBytes(Ints.fromByteArray(result));
77
78    if (expected != verification) {
79      throw new AssertionError("Expected: " + Integer.toHexString(expected)
80          + " got: " + Integer.toHexString(verification));
81    }
82  }
83
84  static final Funnel<Object> BAD_FUNNEL = new Funnel<Object>() {
85    @Override public void funnel(Object object, PrimitiveSink bytePrimitiveSink) {
86      bytePrimitiveSink.putInt(object.hashCode());
87    }
88  };
89
90  static enum RandomHasherAction {
91    PUT_BOOLEAN() {
92      @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
93        boolean value = random.nextBoolean();
94        for (PrimitiveSink sink : sinks) {
95          sink.putBoolean(value);
96        }
97      }
98    },
99    PUT_BYTE() {
100      @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
101        int value = random.nextInt();
102        for (PrimitiveSink sink : sinks) {
103          sink.putByte((byte) value);
104        }
105      }
106    },
107    PUT_SHORT() {
108      @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
109        short value = (short) random.nextInt();
110        for (PrimitiveSink sink : sinks) {
111          sink.putShort(value);
112        }
113      }
114    },
115    PUT_CHAR() {
116      @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
117        char value = (char) random.nextInt();
118        for (PrimitiveSink sink : sinks) {
119          sink.putChar(value);
120        }
121      }
122    },
123    PUT_INT() {
124      @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
125        int value = random.nextInt();
126        for (PrimitiveSink sink : sinks) {
127          sink.putInt(value);
128        }
129      }
130    },
131    PUT_LONG() {
132      @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
133        long value = random.nextLong();
134        for (PrimitiveSink sink : sinks) {
135          sink.putLong(value);
136        }
137      }
138    },
139    PUT_FLOAT() {
140      @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
141        float value = random.nextFloat();
142        for (PrimitiveSink sink : sinks) {
143          sink.putFloat(value);
144        }
145      }
146    },
147    PUT_DOUBLE() {
148      @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
149        double value = random.nextDouble();
150        for (PrimitiveSink sink : sinks) {
151          sink.putDouble(value);
152        }
153      }
154    },
155    PUT_BYTES() {
156      @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
157        byte[] value = new byte[random.nextInt(128)];
158        random.nextBytes(value);
159        for (PrimitiveSink sink : sinks) {
160          sink.putBytes(value);
161        }
162      }
163    },
164    PUT_BYTES_INT_INT() {
165      @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
166        byte[] value = new byte[random.nextInt(128)];
167        random.nextBytes(value);
168        int off = random.nextInt(value.length + 1);
169        int len = random.nextInt(value.length - off + 1);
170        for (PrimitiveSink sink : sinks) {
171          sink.putBytes(value);
172        }
173      }
174    },
175    PUT_STRING() {
176      @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
177        char[] value = new char[random.nextInt(128)];
178        for (int i = 0; i < value.length; i++) {
179          value[i] = (char) random.nextInt();
180        }
181        String s = new String(value);
182        for (PrimitiveSink sink : sinks) {
183          sink.putUnencodedChars(s);
184        }
185      }
186    },
187    PUT_STRING_LOW_SURROGATE() {
188      @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
189        String s = new String(new char[] { randomLowSurrogate(random) });
190        for (PrimitiveSink sink : sinks) {
191          sink.putUnencodedChars(s);
192        }
193      }
194    },
195    PUT_STRING_HIGH_SURROGATE() {
196      @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
197        String s = new String(new char[] { randomHighSurrogate(random) });
198        for (PrimitiveSink sink : sinks) {
199          sink.putUnencodedChars(s);
200        }
201      }
202    },
203    PUT_STRING_LOW_HIGH_SURROGATE() {
204      @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
205        String s = new String(new char[] {
206            randomLowSurrogate(random), randomHighSurrogate(random)});
207        for (PrimitiveSink sink : sinks) {
208          sink.putUnencodedChars(s);
209        }
210      }
211    },
212    PUT_STRING_HIGH_LOW_SURROGATE() {
213      @Override void performAction(Random random, Iterable<? extends PrimitiveSink> sinks) {
214        String s = new String(new char[] {
215            randomHighSurrogate(random), randomLowSurrogate(random)});
216        for (PrimitiveSink sink : sinks) {
217          sink.putUnencodedChars(s);
218        }
219      }
220    };
221
222    abstract void performAction(Random random, Iterable<? extends PrimitiveSink> sinks);
223
224    private static final RandomHasherAction[] actions = values();
225
226    static RandomHasherAction pickAtRandom(Random random) {
227      return actions[random.nextInt(actions.length)];
228    }
229  }
230
231  /**
232   * Test that the hash function contains no funnels. A funnel is a situation where a set of input
233   * (key) bits 'affects' a strictly smaller set of output bits. Funneling is bad because it can
234   * result in more-than-ideal collisions for a non-uniformly distributed key space. In practice,
235   * most key spaces are ANYTHING BUT uniformly distributed. A bit(i) in the input is said to
236   * 'affect' a bit(j) in the output if two inputs, identical but for bit(i), will differ at output
237   * bit(j) about half the time
238   *
239   * <p>Funneling is pretty simple to detect. The key idea is to find example keys which
240   * unequivocably demonstrate that funneling cannot be occuring. This is done bit-by-bit. For
241   * each input bit(i) and output bit(j), two pairs of keys must be found with all bits identical
242   * except bit(i). One pair must differ in output bit(j), and one pair must not. This proves that
243   * input bit(i) can alter output bit(j).
244   */
245  static void checkNoFunnels(HashFunction function) {
246    Random rand = new Random(0);
247    int keyBits = 32;
248    int hashBits = function.bits();
249
250    // output loop tests input bit
251    for (int i = 0; i < keyBits; i++) {
252      int same = 0x0; // bitset for output bits with same values
253      int diff = 0x0; // bitset for output bits with different values
254      int count = 0;
255      // originally was 2 * Math.log(...), making it try more times to avoid flakiness issues
256      int maxCount = (int) (4 * Math.log(2 * keyBits * hashBits) + 1);
257      while (same != 0xffffffff || diff != 0xffffffff) {
258        int key1 = rand.nextInt();
259        // flip input bit for key2
260        int key2 = key1 ^ (1 << i);
261        // get hashes
262        int hash1 = function.hashInt(key1).asInt();
263        int hash2 = function.hashInt(key2).asInt();
264        // test whether the hash values have same output bits
265        same |= ~(hash1 ^ hash2);
266        // test whether the hash values have different output bits
267        diff |= (hash1 ^ hash2);
268
269        count++;
270        // check whether we've exceeded the probabilistically
271        // likely number of trials to have proven no funneling
272        if (count > maxCount) {
273          Assert.fail("input bit(" + i + ") was found not to affect all " +
274               hashBits + " output bits; The unaffected bits are " +
275               "as follows: " + ~(same & diff) + ". This was " +
276               "determined after " + count + " trials.");
277        }
278      }
279    }
280  }
281
282  /**
283   * Test for avalanche. Avalanche means that output bits differ with roughly 1/2 probability on
284   * different input keys. This test verifies that each possible 1-bit key delta achieves avalanche.
285   *
286   * <p>For more information: http://burtleburtle.net/bob/hash/avalanche.html
287   */
288  static void checkAvalanche(HashFunction function, int trials, double epsilon) {
289    Random rand = new Random(0);
290    int keyBits = 32;
291    int hashBits = function.bits();
292    for (int i = 0; i < keyBits; i++) {
293      int[] same = new int[hashBits];
294      int[] diff = new int[hashBits];
295      // go through trials to compute probability
296      for (int j = 0; j < trials; j++) {
297        int key1 = rand.nextInt();
298        // flip input bit for key2
299        int key2 = key1 ^ (1 << i);
300        // compute hash values
301        int hash1 = function.hashInt(key1).asInt();
302        int hash2 = function.hashInt(key2).asInt();
303        for (int k = 0; k < hashBits; k++) {
304          if ((hash1 & (1 << k)) == (hash2 & (1 << k))) {
305            same[k] += 1;
306          } else {
307            diff[k] += 1;
308          }
309        }
310      }
311      // measure probability and assert it's within margin of error
312      for (int j = 0; j < hashBits; j++) {
313        double prob = (double) diff[j] / (double) (diff[j] + same[j]);
314        Assert.assertEquals(0.50d, prob, epsilon);
315      }
316    }
317  }
318
319  /**
320   * Test for 2-bit characteristics. A characteristic is a delta in the input which is repeated in
321   * the output. For example, if f() is a block cipher and c is a characteristic, then
322   * f(x^c) = f(x)^c with greater than expected probability. The test for funneling is merely a test
323   * for 1-bit characteristics.
324   *
325   * <p>There is more general code provided by Bob Jenkins to test arbitrarily sized characteristics
326   * using the magic of gaussian elimination: http://burtleburtle.net/bob/crypto/findingc.html.
327   */
328  static void checkNo2BitCharacteristics(HashFunction function) {
329    Random rand = new Random(0);
330    int keyBits = 32;
331
332    // get every one of (keyBits choose 2) deltas:
333    for (int i = 0; i < keyBits; i++) {
334      for (int j = 0; j < keyBits; j++) {
335        if (j <= i) continue;
336        int count = 0;
337        int maxCount = 20; // the probability of error here is miniscule
338        boolean diff = false;
339
340        while (!diff) {
341          int delta = (1 << i) | (1 << j);
342          int key1 = rand.nextInt();
343          // apply delta
344          int key2 = key1 ^ delta;
345
346          // get hashes
347          int hash1 = function.hashInt(key1).asInt();
348          int hash2 = function.hashInt(key2).asInt();
349
350          // this 2-bit candidate delta is not a characteristic
351          // if deltas are different
352          if ((hash1 ^ hash2) != delta) {
353            diff = true;
354            continue;
355          }
356
357          // check if we've exceeded the probabilistically
358          // likely number of trials to have proven 2-bit candidate
359          // is not a characteristic
360          count++;
361          if (count > maxCount) {
362            Assert.fail("2-bit delta (" + i + ", " + j + ") is likely a " +
363                 "characteristic for this hash. This was " +
364                 "determined after " + count + " trials");
365          }
366        }
367      }
368    }
369  }
370
371  /**
372   * Test for avalanche with 2-bit deltas. Most probabilities of output bit(j) differing are well
373   * within 50%.
374   */
375  static void check2BitAvalanche(HashFunction function, int trials, double epsilon) {
376    Random rand = new Random(0);
377    int keyBits = 32;
378    int hashBits = function.bits();
379    for (int bit1 = 0; bit1 < keyBits; bit1++) {
380      for (int bit2 = 0; bit2 < keyBits; bit2++) {
381        if (bit2 <= bit1) continue;
382        int delta = (1 << bit1) | (1 << bit2);
383        int[] same = new int[hashBits];
384        int[] diff = new int[hashBits];
385        // go through trials to compute probability
386        for (int j = 0; j < trials; j++) {
387          int key1 = rand.nextInt();
388          // flip input bit for key2
389          int key2 = key1 ^ delta;
390          // compute hash values
391          int hash1 = function.hashInt(key1).asInt();
392          int hash2 = function.hashInt(key2).asInt();
393          for (int k = 0; k < hashBits; k++) {
394            if ((hash1 & (1 << k)) == (hash2 & (1 << k))) {
395              same[k] += 1;
396            } else {
397              diff[k] += 1;
398            }
399          }
400        }
401        // measure probability and assert it's within margin of error
402        for (int j = 0; j < hashBits; j++) {
403          double prob = (double) diff[j] / (double) (diff[j] + same[j]);
404          Assert.assertEquals(0.50d, prob, epsilon);
405        }
406      }
407    }
408  }
409
410  /**
411   * Checks that a Hasher returns the same HashCode when given the same input, and also
412   * that the collision rate looks sane.
413   */
414  static void assertInvariants(HashFunction hashFunction) {
415    int objects = 100;
416    Set<HashCode> hashcodes = Sets.newHashSetWithExpectedSize(objects);
417    for (int i = 0; i < objects; i++) {
418      Object o = new Object();
419      HashCode hashcode1 = hashFunction.hashObject(o, HashTestUtils.BAD_FUNNEL);
420      HashCode hashcode2 = hashFunction.hashObject(o, HashTestUtils.BAD_FUNNEL);
421      Assert.assertEquals(hashcode1, hashcode2); // idempotent
422      Assert.assertEquals(hashFunction.bits(), hashcode1.bits());
423      Assert.assertEquals(hashFunction.bits(), hashcode1.asBytes().length * 8);
424      hashcodes.add(hashcode1);
425    }
426    Assert.assertTrue(hashcodes.size() > objects * 0.95); // quite relaxed test
427
428    assertHashBytesThrowsCorrectExceptions(hashFunction);
429    assertIndependentHashers(hashFunction);
430    assertShortcutsAreEquivalent(hashFunction, 512);
431  }
432
433  static void assertHashBytesThrowsCorrectExceptions(HashFunction hashFunction) {
434    hashFunction.hashBytes(new byte[64], 0, 0);
435
436    try {
437      hashFunction.hashBytes(new byte[128], -1, 128);
438      Assert.fail();
439    } catch (IndexOutOfBoundsException expected) {}
440    try {
441      hashFunction.hashBytes(new byte[128], 64, 256 /* too long len */);
442      Assert.fail();
443    } catch (IndexOutOfBoundsException expected) {}
444    try {
445      hashFunction.hashBytes(new byte[64], 0, -1);
446      Assert.fail();
447    } catch (IndexOutOfBoundsException expected) {}
448  }
449
450  static void assertIndependentHashers(HashFunction hashFunction) {
451    int numActions = 100;
452    // hashcodes from non-overlapping hash computations
453    HashCode expected1 = randomHash(hashFunction, new Random(1L), numActions);
454    HashCode expected2 = randomHash(hashFunction, new Random(2L), numActions);
455
456    // equivalent, but overlapping, computations (should produce the same results as above)
457    Random random1 = new Random(1L);
458    Random random2 = new Random(2L);
459    Hasher hasher1 = hashFunction.newHasher();
460    Hasher hasher2 = hashFunction.newHasher();
461    for (int i = 0; i < numActions; i++) {
462      RandomHasherAction.pickAtRandom(random1).performAction(random1, ImmutableSet.of(hasher1));
463      RandomHasherAction.pickAtRandom(random2).performAction(random2, ImmutableSet.of(hasher2));
464    }
465
466    Assert.assertEquals(expected1, hasher1.hash());
467    Assert.assertEquals(expected2, hasher2.hash());
468  }
469
470  static HashCode randomHash(HashFunction hashFunction, Random random, int numActions) {
471    Hasher hasher = hashFunction.newHasher();
472    for (int i = 0; i < numActions; i++) {
473      RandomHasherAction.pickAtRandom(random).performAction(random, ImmutableSet.of(hasher));
474    }
475    return hasher.hash();
476  }
477
478  private static void assertShortcutsAreEquivalent(HashFunction hashFunction, int trials) {
479    Random random = new Random(42085L);
480    for (int i = 0; i < trials; i++) {
481      assertHashBytesEquivalence(hashFunction, random);
482      assertHashIntEquivalence(hashFunction, random);
483      assertHashLongEquivalence(hashFunction, random);
484      assertHashStringEquivalence(hashFunction, random);
485      assertHashStringWithSurrogatesEquivalence(hashFunction, random);
486    }
487  }
488
489  private static void assertHashBytesEquivalence(HashFunction hashFunction, Random random) {
490    int size = random.nextInt(2048);
491    byte[] bytes = new byte[size];
492    random.nextBytes(bytes);
493    assertEquals(hashFunction.hashBytes(bytes),
494        hashFunction.newHasher(size).putBytes(bytes).hash());
495    int off = random.nextInt(size);
496    int len = random.nextInt(size - off);
497    assertEquals(hashFunction.hashBytes(bytes, off, len),
498        hashFunction.newHasher(size).putBytes(bytes, off, len).hash());
499  }
500
501  private static void assertHashIntEquivalence(HashFunction hashFunction, Random random) {
502    int i = random.nextInt();
503    assertEquals(hashFunction.hashInt(i),
504        hashFunction.newHasher().putInt(i).hash());
505  }
506
507  private static void assertHashLongEquivalence(HashFunction hashFunction, Random random) {
508    long l = random.nextLong();
509    assertEquals(hashFunction.hashLong(l),
510        hashFunction.newHasher().putLong(l).hash());
511  }
512
513  private static final ImmutableSet<Charset> CHARSETS = ImmutableSet.of(
514      Charsets.ISO_8859_1,
515      Charsets.US_ASCII,
516      Charsets.UTF_16,
517      Charsets.UTF_16BE,
518      Charsets.UTF_16LE,
519      Charsets.UTF_8);
520
521  private static void assertHashStringEquivalence(HashFunction hashFunction, Random random) {
522    // Test that only data and data-order is important, not the individual operations.
523    new EqualsTester()
524        .addEqualityGroup(
525            hashFunction.hashUnencodedChars("abc"),
526            hashFunction.newHasher().putUnencodedChars("abc").hash(),
527            hashFunction.newHasher().putUnencodedChars("ab").putUnencodedChars("c").hash(),
528            hashFunction.newHasher().putUnencodedChars("a").putUnencodedChars("bc").hash(),
529            hashFunction.newHasher().putUnencodedChars("a").putUnencodedChars("b")
530                .putUnencodedChars("c").hash(),
531            hashFunction.newHasher().putChar('a').putUnencodedChars("bc").hash(),
532            hashFunction.newHasher().putUnencodedChars("ab").putChar('c').hash(),
533            hashFunction.newHasher().putChar('a').putChar('b').putChar('c').hash())
534        .testEquals();
535
536    int size = random.nextInt(2048);
537    byte[] bytes = new byte[size];
538    random.nextBytes(bytes);
539    String string = new String(bytes);
540    assertEquals(hashFunction.hashUnencodedChars(string),
541        hashFunction.newHasher().putUnencodedChars(string).hash());
542    for (Charset charset : CHARSETS) {
543      assertEquals(hashFunction.hashString(string, charset),
544          hashFunction.newHasher().putString(string, charset).hash());
545    }
546  }
547
548  /**
549   * This verifies that putUnencodedChars(String) and hashUnencodedChars(String) are equivalent,
550   * even for funny strings composed by (possibly unmatched, and mostly illegal) surrogate
551   * characters. (But doesn't test that they do the right thing - just their consistency).
552   */
553  private static void assertHashStringWithSurrogatesEquivalence(
554      HashFunction hashFunction, Random random) {
555    int size = random.nextInt(8) + 1;
556    char[] chars = new char[size];
557    for (int i = 0; i < chars.length; i++) {
558      chars[i] = random.nextBoolean() ? randomLowSurrogate(random) : randomHighSurrogate(random);
559    }
560    String string = new String(chars);
561    assertEquals(hashFunction.hashUnencodedChars(string),
562        hashFunction.newHasher().putUnencodedChars(string).hash());
563  }
564
565  static char randomLowSurrogate(Random random) {
566    return (char) (Character.MIN_LOW_SURROGATE
567        + random.nextInt(Character.MAX_LOW_SURROGATE - Character.MIN_LOW_SURROGATE + 1));
568  }
569
570  static char randomHighSurrogate(Random random) {
571    return (char) (Character.MIN_HIGH_SURROGATE
572        + random.nextInt(Character.MAX_HIGH_SURROGATE - Character.MIN_HIGH_SURROGATE + 1));
573  }
574}
575