1/*
2 * Copyright (C) 2011 The Guava Authors
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.google.common.hash;
18
19import static com.google.common.hash.BloomFilterStrategies.BitArray;
20
21import com.google.common.collect.ImmutableSet;
22import com.google.common.math.LongMath;
23import com.google.common.primitives.Ints;
24import com.google.common.testing.EqualsTester;
25import com.google.common.testing.NullPointerTester;
26import com.google.common.testing.SerializableTester;
27
28import junit.framework.TestCase;
29
30import java.math.RoundingMode;
31import java.util.Random;
32
33import javax.annotation.Nullable;
34
35/**
36 * Tests for SimpleGenericBloomFilter and derived BloomFilter views.
37 *
38 * @author Dimitris Andreou
39 */
40public class BloomFilterTest extends TestCase {
41  public void testLargeBloomFilterDoesntOverflow() {
42    long numBits = Integer.MAX_VALUE;
43    numBits++;
44
45    BitArray bitArray = new BitArray(numBits);
46    assertTrue(
47        "BitArray.bitSize() must return a positive number, but was " + bitArray.bitSize(),
48        bitArray.bitSize() > 0);
49
50    // Ideally we would also test the bitSize() overflow of this BF, but it runs out of heap space
51    // BloomFilter.create(Funnels.unencodedCharsFunnel(), 244412641, 1e-11);
52  }
53
54  public void testCreateAndCheckMitz32BloomFilterWithKnownFalsePositives() {
55    int numInsertions = 1000000;
56    BloomFilter<CharSequence> bf = BloomFilter.create(
57        Funnels.unencodedCharsFunnel(), numInsertions, 0.03,
58        BloomFilterStrategies.MURMUR128_MITZ_32);
59
60    // Insert "numInsertions" even numbers into the BF.
61    for (int i = 0; i < numInsertions * 2; i += 2) {
62      bf.put(Integer.toString(i));
63    }
64
65    // Assert that the BF "might" have all of the even numbers.
66    for (int i = 0; i < numInsertions * 2; i += 2) {
67      assertTrue(bf.mightContain(Integer.toString(i)));
68    }
69
70    // Now we check for known false positives using a set of known false positives.
71    // (These are all of the false positives under 900.)
72    ImmutableSet<Integer> falsePositives = ImmutableSet.of(
73        49, 51, 59, 163, 199, 321, 325, 363, 367, 469, 545, 561, 727, 769, 773, 781);
74    for (int i = 1; i < 900; i += 2) {
75      if (!falsePositives.contains(i)) {
76        assertFalse("BF should not contain " + i, bf.mightContain(Integer.toString(i)));
77      }
78    }
79
80    // Check that there are exactly 29824 false positives for this BF.
81    int knownNumberOfFalsePositives = 29824;
82    int numFpp = 0;
83    for (int i = 1; i < numInsertions * 2; i += 2) {
84      if (bf.mightContain(Integer.toString(i))) {
85        numFpp++;
86      }
87    }
88    assertEquals(knownNumberOfFalsePositives, numFpp);
89    double actualFpp = (double) knownNumberOfFalsePositives / numInsertions;
90    double expectedFpp = bf.expectedFpp();
91    // The normal order of (expected, actual) is reversed here on purpose.
92    assertEquals(actualFpp, expectedFpp, 0.00015);
93  }
94
95  public void testCreateAndCheckBloomFilterWithKnownFalsePositives64() {
96    int numInsertions = 1000000;
97    BloomFilter<CharSequence> bf = BloomFilter.create(
98        Funnels.unencodedCharsFunnel(), numInsertions, 0.03,
99        BloomFilterStrategies.MURMUR128_MITZ_64);
100
101    // Insert "numInsertions" even numbers into the BF.
102    for (int i = 0; i < numInsertions * 2; i += 2) {
103      bf.put(Integer.toString(i));
104    }
105
106    // Assert that the BF "might" have all of the even numbers.
107    for (int i = 0; i < numInsertions * 2; i += 2) {
108      assertTrue(bf.mightContain(Integer.toString(i)));
109    }
110
111    // Now we check for known false positives using a set of known false positives.
112    // (These are all of the false positives under 900.)
113    ImmutableSet<Integer> falsePositives = ImmutableSet.of(
114        15, 25, 287, 319, 381, 399, 421, 465, 529, 697, 767, 857);
115    for (int i = 1; i < 900; i += 2) {
116      if (!falsePositives.contains(i)) {
117        assertFalse("BF should not contain " + i, bf.mightContain(Integer.toString(i)));
118      }
119    }
120
121    // Check that there are exactly 30104 false positives for this BF.
122    int knownNumberOfFalsePositives = 30104;
123    int numFpp = 0;
124    for (int i = 1; i < numInsertions * 2; i += 2) {
125      if (bf.mightContain(Integer.toString(i))) {
126        numFpp++;
127      }
128    }
129    assertEquals(knownNumberOfFalsePositives, numFpp);
130    double actualFpp = (double) knownNumberOfFalsePositives / numInsertions;
131    double expectedFpp = bf.expectedFpp();
132    // The normal order of (expected, actual) is reversed here on purpose.
133    assertEquals(actualFpp, expectedFpp, 0.00033);
134  }
135
136  /**
137   * Sanity checking with many combinations of false positive rates and expected insertions
138   */
139  public void testBasic() {
140    for (double fpr = 0.0000001; fpr < 0.1; fpr *= 10) {
141      for (int expectedInsertions = 1; expectedInsertions <= 10000; expectedInsertions *= 10) {
142        checkSanity(BloomFilter.create(HashTestUtils.BAD_FUNNEL, expectedInsertions, fpr));
143      }
144    }
145  }
146
147  public void testPreconditions() {
148    try {
149      BloomFilter.create(Funnels.unencodedCharsFunnel(), -1);
150      fail();
151    } catch (IllegalArgumentException expected) {}
152    try {
153      BloomFilter.create(Funnels.unencodedCharsFunnel(), -1, 0.03);
154      fail();
155    } catch (IllegalArgumentException expected) {}
156    try {
157      BloomFilter.create(Funnels.unencodedCharsFunnel(), 1, 0.0);
158      fail();
159    } catch (IllegalArgumentException expected) {}
160    try {
161      BloomFilter.create(Funnels.unencodedCharsFunnel(), 1, 1.0);
162      fail();
163    } catch (IllegalArgumentException expected) {}
164  }
165
166  public void testFailureWhenMoreThan255HashFunctionsAreNeeded() {
167    try {
168      int n = 1000;
169      double p = 0.00000000000000000000000000000000000000000000000000000000000000000000000000000001;
170      BloomFilter.create(Funnels.unencodedCharsFunnel(), n, p);
171      fail();
172    } catch (IllegalArgumentException expected) {}
173  }
174
175  public void testNullPointers() {
176    NullPointerTester tester = new NullPointerTester();
177    tester.testAllPublicInstanceMethods(BloomFilter.create(Funnels.unencodedCharsFunnel(), 100));
178    tester.testAllPublicStaticMethods(BloomFilter.class);
179  }
180
181  /**
182   * Tests that we never get an optimal hashes number of zero.
183   */
184  public void testOptimalHashes() {
185    for (int n = 1; n < 1000; n++) {
186      for (int m = 0; m < 1000; m++) {
187        assertTrue(BloomFilter.optimalNumOfHashFunctions(n, m) > 0);
188      }
189    }
190  }
191
192  /**
193   * Tests that we always get a non-negative optimal size.
194   */
195  public void testOptimalSize() {
196    for (int n = 1; n < 1000; n++) {
197      for (double fpp = Double.MIN_VALUE; fpp < 1.0; fpp += 0.001) {
198        assertTrue(BloomFilter.optimalNumOfBits(n, fpp) >= 0);
199      }
200    }
201
202    // some random values
203    Random random = new Random(0);
204    for (int repeats = 0; repeats < 10000; repeats++) {
205      assertTrue(BloomFilter.optimalNumOfBits(random.nextInt(1 << 16), random.nextDouble()) >= 0);
206    }
207
208    // and some crazy values (this used to be capped to Integer.MAX_VALUE, now it can go bigger
209    assertEquals(3327428144502L, BloomFilter.optimalNumOfBits(
210        Integer.MAX_VALUE, Double.MIN_VALUE));
211    try {
212      BloomFilter.create(HashTestUtils.BAD_FUNNEL, Integer.MAX_VALUE, Double.MIN_VALUE);
213      fail("we can't represent such a large BF!");
214    } catch (IllegalArgumentException expected) {
215      assertEquals("Could not create BloomFilter of 3327428144502 bits", expected.getMessage());
216    }
217  }
218
219  private void checkSanity(BloomFilter<Object> bf) {
220    assertFalse(bf.mightContain(new Object()));
221    assertFalse(bf.apply(new Object()));
222    for (int i = 0; i < 100; i++) {
223      Object o = new Object();
224      bf.put(o);
225      assertTrue(bf.mightContain(o));
226      assertTrue(bf.apply(o));
227    }
228  }
229
230  public void testCopy() {
231    BloomFilter<CharSequence> original = BloomFilter.create(Funnels.unencodedCharsFunnel(), 100);
232    BloomFilter<CharSequence> copy = original.copy();
233    assertNotSame(original, copy);
234    assertEquals(original, copy);
235  }
236
237  public void testExpectedFpp() {
238    BloomFilter<Object> bf = BloomFilter.create(HashTestUtils.BAD_FUNNEL, 10, 0.03);
239    double fpp = bf.expectedFpp();
240    assertEquals(0.0, fpp);
241    // usually completed in less than 200 iterations
242    while (fpp != 1.0) {
243      boolean changed = bf.put(new Object());
244      double newFpp = bf.expectedFpp();
245      // if changed, the new fpp is strictly higher, otherwise it is the same
246      assertTrue(changed ? newFpp > fpp : newFpp == fpp);
247      fpp = newFpp;
248    }
249  }
250
251  public void testBitSize() {
252    double fpp = 0.03;
253    for (int i = 1; i < 10000; i++) {
254      long numBits = BloomFilter.optimalNumOfBits(i, fpp);
255      int arraySize = Ints.checkedCast(LongMath.divide(numBits, 64, RoundingMode.CEILING));
256      assertEquals(
257          arraySize * Long.SIZE,
258          BloomFilter.create(Funnels.unencodedCharsFunnel(), i, fpp).bitSize());
259    }
260  }
261
262  public void testEquals_empty() {
263    new EqualsTester()
264        .addEqualityGroup(BloomFilter.create(Funnels.byteArrayFunnel(), 100, 0.01))
265        .addEqualityGroup(BloomFilter.create(Funnels.byteArrayFunnel(), 100, 0.02))
266        .addEqualityGroup(BloomFilter.create(Funnels.byteArrayFunnel(), 200, 0.01))
267        .addEqualityGroup(BloomFilter.create(Funnels.byteArrayFunnel(), 200, 0.02))
268        .addEqualityGroup(BloomFilter.create(Funnels.unencodedCharsFunnel(), 100, 0.01))
269        .addEqualityGroup(BloomFilter.create(Funnels.unencodedCharsFunnel(), 100, 0.02))
270        .addEqualityGroup(BloomFilter.create(Funnels.unencodedCharsFunnel(), 200, 0.01))
271        .addEqualityGroup(BloomFilter.create(Funnels.unencodedCharsFunnel(), 200, 0.02))
272        .testEquals();
273  }
274
275  public void testEquals() {
276    BloomFilter<CharSequence> bf1 = BloomFilter.create(Funnels.unencodedCharsFunnel(), 100);
277    bf1.put("1");
278    bf1.put("2");
279
280    BloomFilter<CharSequence> bf2 = BloomFilter.create(Funnels.unencodedCharsFunnel(), 100);
281    bf2.put("1");
282    bf2.put("2");
283
284    new EqualsTester()
285        .addEqualityGroup(bf1, bf2)
286        .testEquals();
287
288    bf2.put("3");
289
290    new EqualsTester()
291        .addEqualityGroup(bf1)
292        .addEqualityGroup(bf2)
293        .testEquals();
294  }
295
296  public void testEqualsWithCustomFunnel() {
297    BloomFilter<Long> bf1 = BloomFilter.create(new CustomFunnel(), 100);
298    BloomFilter<Long> bf2 = BloomFilter.create(new CustomFunnel(), 100);
299    assertEquals(bf1, bf2);
300  }
301
302  public void testSerializationWithCustomFunnel() {
303    SerializableTester.reserializeAndAssert(BloomFilter.create(new CustomFunnel(), 100));
304  }
305
306  private static final class CustomFunnel implements Funnel<Long> {
307    @Override
308    public void funnel(Long value, PrimitiveSink into) {
309      into.putLong(value);
310    }
311    @Override
312    public boolean equals(@Nullable Object object) {
313      return (object instanceof CustomFunnel);
314    }
315    @Override
316    public int hashCode() {
317      return 42;
318    }
319  }
320
321  public void testPutReturnValue() {
322    for (int i = 0; i < 10; i++) {
323      BloomFilter<CharSequence> bf = BloomFilter.create(Funnels.unencodedCharsFunnel(), 100);
324      for (int j = 0; j < 10; j++) {
325        String value = new Object().toString();
326        boolean mightContain = bf.mightContain(value);
327        boolean put = bf.put(value);
328        assertTrue(mightContain != put);
329      }
330    }
331  }
332
333  public void testPutAll() {
334    int element1 = 1;
335    int element2 = 2;
336
337    BloomFilter<Integer> bf1 = BloomFilter.create(Funnels.integerFunnel(), 100);
338    bf1.put(element1);
339    assertTrue(bf1.mightContain(element1));
340    assertFalse(bf1.mightContain(element2));
341
342    BloomFilter<Integer> bf2 = BloomFilter.create(Funnels.integerFunnel(), 100);
343    bf2.put(element2);
344    assertFalse(bf2.mightContain(element1));
345    assertTrue(bf2.mightContain(element2));
346
347    assertTrue(bf1.isCompatible(bf2));
348    bf1.putAll(bf2);
349    assertTrue(bf1.mightContain(element1));
350    assertTrue(bf1.mightContain(element2));
351    assertFalse(bf2.mightContain(element1));
352    assertTrue(bf2.mightContain(element2));
353  }
354
355  public void testPutAllDifferentSizes() {
356    BloomFilter<Integer> bf1 = BloomFilter.create(Funnels.integerFunnel(), 1);
357    BloomFilter<Integer> bf2 = BloomFilter.create(Funnels.integerFunnel(), 10);
358
359    try {
360      assertFalse(bf1.isCompatible(bf2));
361      bf1.putAll(bf2);
362      fail();
363    } catch (IllegalArgumentException expected) {
364    }
365
366    try {
367      assertFalse(bf2.isCompatible(bf1));
368      bf2.putAll(bf1);
369      fail();
370    } catch (IllegalArgumentException expected) {
371    }
372  }
373
374  public void testPutAllWithSelf() {
375    BloomFilter<Integer> bf1 = BloomFilter.create(Funnels.integerFunnel(), 1);
376    try {
377      assertFalse(bf1.isCompatible(bf1));
378      bf1.putAll(bf1);
379      fail();
380    } catch (IllegalArgumentException expected) {
381    }
382  }
383
384  public void testJavaSerialization() {
385    BloomFilter<byte[]> bf = BloomFilter.create(Funnels.byteArrayFunnel(), 100);
386    for (int i = 0; i < 10; i++) {
387      bf.put(Ints.toByteArray(i));
388    }
389
390    BloomFilter<byte[]> copy = SerializableTester.reserialize(bf);
391    for (int i = 0; i < 10; i++) {
392      assertTrue(copy.mightContain(Ints.toByteArray(i)));
393    }
394    assertEquals(bf.expectedFpp(), copy.expectedFpp());
395
396    SerializableTester.reserializeAndAssert(bf);
397  }
398
399  /**
400   * This test will fail whenever someone updates/reorders the BloomFilterStrategies constants.
401   * Only appending a new constant is allowed.
402   */
403  public void testBloomFilterStrategies() {
404    assertEquals(2, BloomFilterStrategies.values().length);
405    assertEquals(BloomFilterStrategies.MURMUR128_MITZ_32, BloomFilterStrategies.values()[0]);
406    assertEquals(BloomFilterStrategies.MURMUR128_MITZ_64, BloomFilterStrategies.values()[1]);
407  }
408
409  public void testGetDefaultStrategyFromSystemProperty() {
410    // clear the property to test the case when the property is not set (the default)
411    System.clearProperty(BloomFilter.USE_MITZ32_PROPERTY);
412    assertEquals(BloomFilterStrategies.MURMUR128_MITZ_64,
413        BloomFilter.getDefaultStrategyFromSystemProperty());
414
415    System.setProperty(BloomFilter.USE_MITZ32_PROPERTY, "true");
416    assertEquals(BloomFilterStrategies.MURMUR128_MITZ_32,
417        BloomFilter.getDefaultStrategyFromSystemProperty());
418
419    System.setProperty(BloomFilter.USE_MITZ32_PROPERTY, "TRUE");
420    assertEquals(BloomFilterStrategies.MURMUR128_MITZ_32,
421        BloomFilter.getDefaultStrategyFromSystemProperty());
422
423    System.setProperty(BloomFilter.USE_MITZ32_PROPERTY, "false");
424    assertEquals(BloomFilterStrategies.MURMUR128_MITZ_64,
425        BloomFilter.getDefaultStrategyFromSystemProperty());
426  }
427}
428