17dd252788645e940eada959bdde927426e2531c9Paul Duffin/*
27dd252788645e940eada959bdde927426e2531c9Paul Duffin * Copyright (C) 2011 The Guava Authors
37dd252788645e940eada959bdde927426e2531c9Paul Duffin *
47dd252788645e940eada959bdde927426e2531c9Paul Duffin * Licensed under the Apache License, Version 2.0 (the "License");
57dd252788645e940eada959bdde927426e2531c9Paul Duffin * you may not use this file except in compliance with the License.
67dd252788645e940eada959bdde927426e2531c9Paul Duffin * You may obtain a copy of the License at
77dd252788645e940eada959bdde927426e2531c9Paul Duffin *
87dd252788645e940eada959bdde927426e2531c9Paul Duffin * http://www.apache.org/licenses/LICENSE-2.0
97dd252788645e940eada959bdde927426e2531c9Paul Duffin *
107dd252788645e940eada959bdde927426e2531c9Paul Duffin * Unless required by applicable law or agreed to in writing, software
117dd252788645e940eada959bdde927426e2531c9Paul Duffin * distributed under the License is distributed on an "AS IS" BASIS,
127dd252788645e940eada959bdde927426e2531c9Paul Duffin * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
137dd252788645e940eada959bdde927426e2531c9Paul Duffin * See the License for the specific language governing permissions and
147dd252788645e940eada959bdde927426e2531c9Paul Duffin * limitations under the License.
157dd252788645e940eada959bdde927426e2531c9Paul Duffin */
161d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
171d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringertpackage com.google.common.hash;
181d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
190888a09821a98ac0680fad765217302858e70fa4Paul Duffinimport static com.google.common.hash.BloomFilterStrategies.BitArray;
200888a09821a98ac0680fad765217302858e70fa4Paul Duffin
217dd252788645e940eada959bdde927426e2531c9Paul Duffinimport com.google.common.collect.ImmutableSet;
220888a09821a98ac0680fad765217302858e70fa4Paul Duffinimport com.google.common.math.LongMath;
231d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringertimport com.google.common.primitives.Ints;
247dd252788645e940eada959bdde927426e2531c9Paul Duffinimport com.google.common.testing.EqualsTester;
257dd252788645e940eada959bdde927426e2531c9Paul Duffinimport com.google.common.testing.NullPointerTester;
261d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringertimport com.google.common.testing.SerializableTester;
271d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
280888a09821a98ac0680fad765217302858e70fa4Paul Duffinimport junit.framework.TestCase;
290888a09821a98ac0680fad765217302858e70fa4Paul Duffin
300888a09821a98ac0680fad765217302858e70fa4Paul Duffinimport java.math.RoundingMode;
311d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringertimport java.util.Random;
321d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
337dd252788645e940eada959bdde927426e2531c9Paul Duffinimport javax.annotation.Nullable;
347dd252788645e940eada959bdde927426e2531c9Paul Duffin
351d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert/**
361d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Tests for SimpleGenericBloomFilter and derived BloomFilter views.
377dd252788645e940eada959bdde927426e2531c9Paul Duffin *
387dd252788645e940eada959bdde927426e2531c9Paul Duffin * @author Dimitris Andreou
391d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert */
401d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringertpublic class BloomFilterTest extends TestCase {
410888a09821a98ac0680fad765217302858e70fa4Paul Duffin  public void testLargeBloomFilterDoesntOverflow() {
420888a09821a98ac0680fad765217302858e70fa4Paul Duffin    long numBits = Integer.MAX_VALUE;
430888a09821a98ac0680fad765217302858e70fa4Paul Duffin    numBits++;
447dd252788645e940eada959bdde927426e2531c9Paul Duffin
450888a09821a98ac0680fad765217302858e70fa4Paul Duffin    BitArray bitArray = new BitArray(numBits);
460888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertTrue(
470888a09821a98ac0680fad765217302858e70fa4Paul Duffin        "BitArray.bitSize() must return a positive number, but was " + bitArray.bitSize(),
480888a09821a98ac0680fad765217302858e70fa4Paul Duffin        bitArray.bitSize() > 0);
490888a09821a98ac0680fad765217302858e70fa4Paul Duffin
500888a09821a98ac0680fad765217302858e70fa4Paul Duffin    // Ideally we would also test the bitSize() overflow of this BF, but it runs out of heap space
510888a09821a98ac0680fad765217302858e70fa4Paul Duffin    // BloomFilter.create(Funnels.unencodedCharsFunnel(), 244412641, 1e-11);
520888a09821a98ac0680fad765217302858e70fa4Paul Duffin  }
530888a09821a98ac0680fad765217302858e70fa4Paul Duffin
540888a09821a98ac0680fad765217302858e70fa4Paul Duffin  public void testCreateAndCheckMitz32BloomFilterWithKnownFalsePositives() {
557dd252788645e940eada959bdde927426e2531c9Paul Duffin    int numInsertions = 1000000;
560888a09821a98ac0680fad765217302858e70fa4Paul Duffin    BloomFilter<CharSequence> bf = BloomFilter.create(
570888a09821a98ac0680fad765217302858e70fa4Paul Duffin        Funnels.unencodedCharsFunnel(), numInsertions, 0.03,
580888a09821a98ac0680fad765217302858e70fa4Paul Duffin        BloomFilterStrategies.MURMUR128_MITZ_32);
597dd252788645e940eada959bdde927426e2531c9Paul Duffin
607dd252788645e940eada959bdde927426e2531c9Paul Duffin    // Insert "numInsertions" even numbers into the BF.
617dd252788645e940eada959bdde927426e2531c9Paul Duffin    for (int i = 0; i < numInsertions * 2; i += 2) {
627dd252788645e940eada959bdde927426e2531c9Paul Duffin      bf.put(Integer.toString(i));
637dd252788645e940eada959bdde927426e2531c9Paul Duffin    }
647dd252788645e940eada959bdde927426e2531c9Paul Duffin
657dd252788645e940eada959bdde927426e2531c9Paul Duffin    // Assert that the BF "might" have all of the even numbers.
667dd252788645e940eada959bdde927426e2531c9Paul Duffin    for (int i = 0; i < numInsertions * 2; i += 2) {
677dd252788645e940eada959bdde927426e2531c9Paul Duffin      assertTrue(bf.mightContain(Integer.toString(i)));
687dd252788645e940eada959bdde927426e2531c9Paul Duffin    }
697dd252788645e940eada959bdde927426e2531c9Paul Duffin
707dd252788645e940eada959bdde927426e2531c9Paul Duffin    // Now we check for known false positives using a set of known false positives.
717dd252788645e940eada959bdde927426e2531c9Paul Duffin    // (These are all of the false positives under 900.)
727dd252788645e940eada959bdde927426e2531c9Paul Duffin    ImmutableSet<Integer> falsePositives = ImmutableSet.of(
737dd252788645e940eada959bdde927426e2531c9Paul Duffin        49, 51, 59, 163, 199, 321, 325, 363, 367, 469, 545, 561, 727, 769, 773, 781);
747dd252788645e940eada959bdde927426e2531c9Paul Duffin    for (int i = 1; i < 900; i += 2) {
757dd252788645e940eada959bdde927426e2531c9Paul Duffin      if (!falsePositives.contains(i)) {
767dd252788645e940eada959bdde927426e2531c9Paul Duffin        assertFalse("BF should not contain " + i, bf.mightContain(Integer.toString(i)));
777dd252788645e940eada959bdde927426e2531c9Paul Duffin      }
787dd252788645e940eada959bdde927426e2531c9Paul Duffin    }
797dd252788645e940eada959bdde927426e2531c9Paul Duffin
807dd252788645e940eada959bdde927426e2531c9Paul Duffin    // Check that there are exactly 29824 false positives for this BF.
817dd252788645e940eada959bdde927426e2531c9Paul Duffin    int knownNumberOfFalsePositives = 29824;
827dd252788645e940eada959bdde927426e2531c9Paul Duffin    int numFpp = 0;
837dd252788645e940eada959bdde927426e2531c9Paul Duffin    for (int i = 1; i < numInsertions * 2; i += 2) {
847dd252788645e940eada959bdde927426e2531c9Paul Duffin      if (bf.mightContain(Integer.toString(i))) {
857dd252788645e940eada959bdde927426e2531c9Paul Duffin        numFpp++;
867dd252788645e940eada959bdde927426e2531c9Paul Duffin      }
877dd252788645e940eada959bdde927426e2531c9Paul Duffin    }
887dd252788645e940eada959bdde927426e2531c9Paul Duffin    assertEquals(knownNumberOfFalsePositives, numFpp);
897dd252788645e940eada959bdde927426e2531c9Paul Duffin    double actualFpp = (double) knownNumberOfFalsePositives / numInsertions;
907dd252788645e940eada959bdde927426e2531c9Paul Duffin    double expectedFpp = bf.expectedFpp();
917dd252788645e940eada959bdde927426e2531c9Paul Duffin    // The normal order of (expected, actual) is reversed here on purpose.
927dd252788645e940eada959bdde927426e2531c9Paul Duffin    assertEquals(actualFpp, expectedFpp, 0.00015);
937dd252788645e940eada959bdde927426e2531c9Paul Duffin  }
947dd252788645e940eada959bdde927426e2531c9Paul Duffin
950888a09821a98ac0680fad765217302858e70fa4Paul Duffin  public void testCreateAndCheckBloomFilterWithKnownFalsePositives64() {
960888a09821a98ac0680fad765217302858e70fa4Paul Duffin    int numInsertions = 1000000;
970888a09821a98ac0680fad765217302858e70fa4Paul Duffin    BloomFilter<CharSequence> bf = BloomFilter.create(
980888a09821a98ac0680fad765217302858e70fa4Paul Duffin        Funnels.unencodedCharsFunnel(), numInsertions, 0.03,
990888a09821a98ac0680fad765217302858e70fa4Paul Duffin        BloomFilterStrategies.MURMUR128_MITZ_64);
1000888a09821a98ac0680fad765217302858e70fa4Paul Duffin
1010888a09821a98ac0680fad765217302858e70fa4Paul Duffin    // Insert "numInsertions" even numbers into the BF.
1020888a09821a98ac0680fad765217302858e70fa4Paul Duffin    for (int i = 0; i < numInsertions * 2; i += 2) {
1030888a09821a98ac0680fad765217302858e70fa4Paul Duffin      bf.put(Integer.toString(i));
1040888a09821a98ac0680fad765217302858e70fa4Paul Duffin    }
1050888a09821a98ac0680fad765217302858e70fa4Paul Duffin
1060888a09821a98ac0680fad765217302858e70fa4Paul Duffin    // Assert that the BF "might" have all of the even numbers.
1070888a09821a98ac0680fad765217302858e70fa4Paul Duffin    for (int i = 0; i < numInsertions * 2; i += 2) {
1080888a09821a98ac0680fad765217302858e70fa4Paul Duffin      assertTrue(bf.mightContain(Integer.toString(i)));
1090888a09821a98ac0680fad765217302858e70fa4Paul Duffin    }
1100888a09821a98ac0680fad765217302858e70fa4Paul Duffin
1110888a09821a98ac0680fad765217302858e70fa4Paul Duffin    // Now we check for known false positives using a set of known false positives.
1120888a09821a98ac0680fad765217302858e70fa4Paul Duffin    // (These are all of the false positives under 900.)
1130888a09821a98ac0680fad765217302858e70fa4Paul Duffin    ImmutableSet<Integer> falsePositives = ImmutableSet.of(
1140888a09821a98ac0680fad765217302858e70fa4Paul Duffin        15, 25, 287, 319, 381, 399, 421, 465, 529, 697, 767, 857);
1150888a09821a98ac0680fad765217302858e70fa4Paul Duffin    for (int i = 1; i < 900; i += 2) {
1160888a09821a98ac0680fad765217302858e70fa4Paul Duffin      if (!falsePositives.contains(i)) {
1170888a09821a98ac0680fad765217302858e70fa4Paul Duffin        assertFalse("BF should not contain " + i, bf.mightContain(Integer.toString(i)));
1180888a09821a98ac0680fad765217302858e70fa4Paul Duffin      }
1190888a09821a98ac0680fad765217302858e70fa4Paul Duffin    }
1200888a09821a98ac0680fad765217302858e70fa4Paul Duffin
1210888a09821a98ac0680fad765217302858e70fa4Paul Duffin    // Check that there are exactly 30104 false positives for this BF.
1220888a09821a98ac0680fad765217302858e70fa4Paul Duffin    int knownNumberOfFalsePositives = 30104;
1230888a09821a98ac0680fad765217302858e70fa4Paul Duffin    int numFpp = 0;
1240888a09821a98ac0680fad765217302858e70fa4Paul Duffin    for (int i = 1; i < numInsertions * 2; i += 2) {
1250888a09821a98ac0680fad765217302858e70fa4Paul Duffin      if (bf.mightContain(Integer.toString(i))) {
1260888a09821a98ac0680fad765217302858e70fa4Paul Duffin        numFpp++;
1270888a09821a98ac0680fad765217302858e70fa4Paul Duffin      }
1280888a09821a98ac0680fad765217302858e70fa4Paul Duffin    }
1290888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertEquals(knownNumberOfFalsePositives, numFpp);
1300888a09821a98ac0680fad765217302858e70fa4Paul Duffin    double actualFpp = (double) knownNumberOfFalsePositives / numInsertions;
1310888a09821a98ac0680fad765217302858e70fa4Paul Duffin    double expectedFpp = bf.expectedFpp();
1320888a09821a98ac0680fad765217302858e70fa4Paul Duffin    // The normal order of (expected, actual) is reversed here on purpose.
1330888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertEquals(actualFpp, expectedFpp, 0.00033);
1340888a09821a98ac0680fad765217302858e70fa4Paul Duffin  }
1350888a09821a98ac0680fad765217302858e70fa4Paul Duffin
1361d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  /**
1371d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Sanity checking with many combinations of false positive rates and expected insertions
1381d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   */
1391d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  public void testBasic() {
1401d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    for (double fpr = 0.0000001; fpr < 0.1; fpr *= 10) {
1411d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      for (int expectedInsertions = 1; expectedInsertions <= 10000; expectedInsertions *= 10) {
1421d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        checkSanity(BloomFilter.create(HashTestUtils.BAD_FUNNEL, expectedInsertions, fpr));
1431d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      }
1441d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    }
1451d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  }
1467dd252788645e940eada959bdde927426e2531c9Paul Duffin
1477dd252788645e940eada959bdde927426e2531c9Paul Duffin  public void testPreconditions() {
1487dd252788645e940eada959bdde927426e2531c9Paul Duffin    try {
1490888a09821a98ac0680fad765217302858e70fa4Paul Duffin      BloomFilter.create(Funnels.unencodedCharsFunnel(), -1);
1507dd252788645e940eada959bdde927426e2531c9Paul Duffin      fail();
1517dd252788645e940eada959bdde927426e2531c9Paul Duffin    } catch (IllegalArgumentException expected) {}
1527dd252788645e940eada959bdde927426e2531c9Paul Duffin    try {
1530888a09821a98ac0680fad765217302858e70fa4Paul Duffin      BloomFilter.create(Funnels.unencodedCharsFunnel(), -1, 0.03);
1547dd252788645e940eada959bdde927426e2531c9Paul Duffin      fail();
1557dd252788645e940eada959bdde927426e2531c9Paul Duffin    } catch (IllegalArgumentException expected) {}
1567dd252788645e940eada959bdde927426e2531c9Paul Duffin    try {
1570888a09821a98ac0680fad765217302858e70fa4Paul Duffin      BloomFilter.create(Funnels.unencodedCharsFunnel(), 1, 0.0);
1587dd252788645e940eada959bdde927426e2531c9Paul Duffin      fail();
1597dd252788645e940eada959bdde927426e2531c9Paul Duffin    } catch (IllegalArgumentException expected) {}
1607dd252788645e940eada959bdde927426e2531c9Paul Duffin    try {
1610888a09821a98ac0680fad765217302858e70fa4Paul Duffin      BloomFilter.create(Funnels.unencodedCharsFunnel(), 1, 1.0);
1627dd252788645e940eada959bdde927426e2531c9Paul Duffin      fail();
1637dd252788645e940eada959bdde927426e2531c9Paul Duffin    } catch (IllegalArgumentException expected) {}
1647dd252788645e940eada959bdde927426e2531c9Paul Duffin  }
1657dd252788645e940eada959bdde927426e2531c9Paul Duffin
1667dd252788645e940eada959bdde927426e2531c9Paul Duffin  public void testFailureWhenMoreThan255HashFunctionsAreNeeded() {
1677dd252788645e940eada959bdde927426e2531c9Paul Duffin    try {
1687dd252788645e940eada959bdde927426e2531c9Paul Duffin      int n = 1000;
1697dd252788645e940eada959bdde927426e2531c9Paul Duffin      double p = 0.00000000000000000000000000000000000000000000000000000000000000000000000000000001;
1700888a09821a98ac0680fad765217302858e70fa4Paul Duffin      BloomFilter.create(Funnels.unencodedCharsFunnel(), n, p);
1717dd252788645e940eada959bdde927426e2531c9Paul Duffin      fail();
1727dd252788645e940eada959bdde927426e2531c9Paul Duffin    } catch (IllegalArgumentException expected) {}
1737dd252788645e940eada959bdde927426e2531c9Paul Duffin  }
1747dd252788645e940eada959bdde927426e2531c9Paul Duffin
1757dd252788645e940eada959bdde927426e2531c9Paul Duffin  public void testNullPointers() {
1767dd252788645e940eada959bdde927426e2531c9Paul Duffin    NullPointerTester tester = new NullPointerTester();
1770888a09821a98ac0680fad765217302858e70fa4Paul Duffin    tester.testAllPublicInstanceMethods(BloomFilter.create(Funnels.unencodedCharsFunnel(), 100));
1787dd252788645e940eada959bdde927426e2531c9Paul Duffin    tester.testAllPublicStaticMethods(BloomFilter.class);
1797dd252788645e940eada959bdde927426e2531c9Paul Duffin  }
1807dd252788645e940eada959bdde927426e2531c9Paul Duffin
1811d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  /**
1827dd252788645e940eada959bdde927426e2531c9Paul Duffin   * Tests that we never get an optimal hashes number of zero.
1831d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   */
1841d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  public void testOptimalHashes() {
1851d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    for (int n = 1; n < 1000; n++) {
1861d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      for (int m = 0; m < 1000; m++) {
1871d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        assertTrue(BloomFilter.optimalNumOfHashFunctions(n, m) > 0);
1881d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      }
1891d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    }
1901d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  }
1917dd252788645e940eada959bdde927426e2531c9Paul Duffin
1921d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  /**
1937dd252788645e940eada959bdde927426e2531c9Paul Duffin   * Tests that we always get a non-negative optimal size.
1941d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   */
1951d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  public void testOptimalSize() {
1961d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    for (int n = 1; n < 1000; n++) {
1971d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      for (double fpp = Double.MIN_VALUE; fpp < 1.0; fpp += 0.001) {
1981d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        assertTrue(BloomFilter.optimalNumOfBits(n, fpp) >= 0);
1991d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      }
2001d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    }
2017dd252788645e940eada959bdde927426e2531c9Paul Duffin
2021d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    // some random values
2037dd252788645e940eada959bdde927426e2531c9Paul Duffin    Random random = new Random(0);
2041d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    for (int repeats = 0; repeats < 10000; repeats++) {
2051d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      assertTrue(BloomFilter.optimalNumOfBits(random.nextInt(1 << 16), random.nextDouble()) >= 0);
2061d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    }
2077dd252788645e940eada959bdde927426e2531c9Paul Duffin
2087dd252788645e940eada959bdde927426e2531c9Paul Duffin    // and some crazy values (this used to be capped to Integer.MAX_VALUE, now it can go bigger
2097dd252788645e940eada959bdde927426e2531c9Paul Duffin    assertEquals(3327428144502L, BloomFilter.optimalNumOfBits(
2101d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        Integer.MAX_VALUE, Double.MIN_VALUE));
2117dd252788645e940eada959bdde927426e2531c9Paul Duffin    try {
2127dd252788645e940eada959bdde927426e2531c9Paul Duffin      BloomFilter.create(HashTestUtils.BAD_FUNNEL, Integer.MAX_VALUE, Double.MIN_VALUE);
2137dd252788645e940eada959bdde927426e2531c9Paul Duffin      fail("we can't represent such a large BF!");
2147dd252788645e940eada959bdde927426e2531c9Paul Duffin    } catch (IllegalArgumentException expected) {
2157dd252788645e940eada959bdde927426e2531c9Paul Duffin      assertEquals("Could not create BloomFilter of 3327428144502 bits", expected.getMessage());
2167dd252788645e940eada959bdde927426e2531c9Paul Duffin    }
2171d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  }
2187dd252788645e940eada959bdde927426e2531c9Paul Duffin
2191d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  private void checkSanity(BloomFilter<Object> bf) {
2201d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    assertFalse(bf.mightContain(new Object()));
2217dd252788645e940eada959bdde927426e2531c9Paul Duffin    assertFalse(bf.apply(new Object()));
2221d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    for (int i = 0; i < 100; i++) {
2231d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      Object o = new Object();
2241d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      bf.put(o);
2251d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      assertTrue(bf.mightContain(o));
2267dd252788645e940eada959bdde927426e2531c9Paul Duffin      assertTrue(bf.apply(o));
2271d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    }
2281d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  }
2297dd252788645e940eada959bdde927426e2531c9Paul Duffin
2307dd252788645e940eada959bdde927426e2531c9Paul Duffin  public void testCopy() {
2310888a09821a98ac0680fad765217302858e70fa4Paul Duffin    BloomFilter<CharSequence> original = BloomFilter.create(Funnels.unencodedCharsFunnel(), 100);
2327dd252788645e940eada959bdde927426e2531c9Paul Duffin    BloomFilter<CharSequence> copy = original.copy();
2337dd252788645e940eada959bdde927426e2531c9Paul Duffin    assertNotSame(original, copy);
2347dd252788645e940eada959bdde927426e2531c9Paul Duffin    assertEquals(original, copy);
2357dd252788645e940eada959bdde927426e2531c9Paul Duffin  }
2367dd252788645e940eada959bdde927426e2531c9Paul Duffin
2377dd252788645e940eada959bdde927426e2531c9Paul Duffin  public void testExpectedFpp() {
2387dd252788645e940eada959bdde927426e2531c9Paul Duffin    BloomFilter<Object> bf = BloomFilter.create(HashTestUtils.BAD_FUNNEL, 10, 0.03);
2397dd252788645e940eada959bdde927426e2531c9Paul Duffin    double fpp = bf.expectedFpp();
2407dd252788645e940eada959bdde927426e2531c9Paul Duffin    assertEquals(0.0, fpp);
2417dd252788645e940eada959bdde927426e2531c9Paul Duffin    // usually completed in less than 200 iterations
2427dd252788645e940eada959bdde927426e2531c9Paul Duffin    while (fpp != 1.0) {
2437dd252788645e940eada959bdde927426e2531c9Paul Duffin      boolean changed = bf.put(new Object());
2447dd252788645e940eada959bdde927426e2531c9Paul Duffin      double newFpp = bf.expectedFpp();
2457dd252788645e940eada959bdde927426e2531c9Paul Duffin      // if changed, the new fpp is strictly higher, otherwise it is the same
2467dd252788645e940eada959bdde927426e2531c9Paul Duffin      assertTrue(changed ? newFpp > fpp : newFpp == fpp);
2477dd252788645e940eada959bdde927426e2531c9Paul Duffin      fpp = newFpp;
2487dd252788645e940eada959bdde927426e2531c9Paul Duffin    }
2497dd252788645e940eada959bdde927426e2531c9Paul Duffin  }
2507dd252788645e940eada959bdde927426e2531c9Paul Duffin
2510888a09821a98ac0680fad765217302858e70fa4Paul Duffin  public void testBitSize() {
2520888a09821a98ac0680fad765217302858e70fa4Paul Duffin    double fpp = 0.03;
2530888a09821a98ac0680fad765217302858e70fa4Paul Duffin    for (int i = 1; i < 10000; i++) {
2540888a09821a98ac0680fad765217302858e70fa4Paul Duffin      long numBits = BloomFilter.optimalNumOfBits(i, fpp);
2550888a09821a98ac0680fad765217302858e70fa4Paul Duffin      int arraySize = Ints.checkedCast(LongMath.divide(numBits, 64, RoundingMode.CEILING));
2560888a09821a98ac0680fad765217302858e70fa4Paul Duffin      assertEquals(
2570888a09821a98ac0680fad765217302858e70fa4Paul Duffin          arraySize * Long.SIZE,
2580888a09821a98ac0680fad765217302858e70fa4Paul Duffin          BloomFilter.create(Funnels.unencodedCharsFunnel(), i, fpp).bitSize());
2590888a09821a98ac0680fad765217302858e70fa4Paul Duffin    }
2600888a09821a98ac0680fad765217302858e70fa4Paul Duffin  }
2610888a09821a98ac0680fad765217302858e70fa4Paul Duffin
2627dd252788645e940eada959bdde927426e2531c9Paul Duffin  public void testEquals_empty() {
2637dd252788645e940eada959bdde927426e2531c9Paul Duffin    new EqualsTester()
2647dd252788645e940eada959bdde927426e2531c9Paul Duffin        .addEqualityGroup(BloomFilter.create(Funnels.byteArrayFunnel(), 100, 0.01))
2657dd252788645e940eada959bdde927426e2531c9Paul Duffin        .addEqualityGroup(BloomFilter.create(Funnels.byteArrayFunnel(), 100, 0.02))
2667dd252788645e940eada959bdde927426e2531c9Paul Duffin        .addEqualityGroup(BloomFilter.create(Funnels.byteArrayFunnel(), 200, 0.01))
2677dd252788645e940eada959bdde927426e2531c9Paul Duffin        .addEqualityGroup(BloomFilter.create(Funnels.byteArrayFunnel(), 200, 0.02))
2680888a09821a98ac0680fad765217302858e70fa4Paul Duffin        .addEqualityGroup(BloomFilter.create(Funnels.unencodedCharsFunnel(), 100, 0.01))
2690888a09821a98ac0680fad765217302858e70fa4Paul Duffin        .addEqualityGroup(BloomFilter.create(Funnels.unencodedCharsFunnel(), 100, 0.02))
2700888a09821a98ac0680fad765217302858e70fa4Paul Duffin        .addEqualityGroup(BloomFilter.create(Funnels.unencodedCharsFunnel(), 200, 0.01))
2710888a09821a98ac0680fad765217302858e70fa4Paul Duffin        .addEqualityGroup(BloomFilter.create(Funnels.unencodedCharsFunnel(), 200, 0.02))
2727dd252788645e940eada959bdde927426e2531c9Paul Duffin        .testEquals();
2737dd252788645e940eada959bdde927426e2531c9Paul Duffin  }
2747dd252788645e940eada959bdde927426e2531c9Paul Duffin
2757dd252788645e940eada959bdde927426e2531c9Paul Duffin  public void testEquals() {
2760888a09821a98ac0680fad765217302858e70fa4Paul Duffin    BloomFilter<CharSequence> bf1 = BloomFilter.create(Funnels.unencodedCharsFunnel(), 100);
2777dd252788645e940eada959bdde927426e2531c9Paul Duffin    bf1.put("1");
2787dd252788645e940eada959bdde927426e2531c9Paul Duffin    bf1.put("2");
2797dd252788645e940eada959bdde927426e2531c9Paul Duffin
2800888a09821a98ac0680fad765217302858e70fa4Paul Duffin    BloomFilter<CharSequence> bf2 = BloomFilter.create(Funnels.unencodedCharsFunnel(), 100);
2817dd252788645e940eada959bdde927426e2531c9Paul Duffin    bf2.put("1");
2827dd252788645e940eada959bdde927426e2531c9Paul Duffin    bf2.put("2");
2837dd252788645e940eada959bdde927426e2531c9Paul Duffin
2847dd252788645e940eada959bdde927426e2531c9Paul Duffin    new EqualsTester()
2857dd252788645e940eada959bdde927426e2531c9Paul Duffin        .addEqualityGroup(bf1, bf2)
2867dd252788645e940eada959bdde927426e2531c9Paul Duffin        .testEquals();
2877dd252788645e940eada959bdde927426e2531c9Paul Duffin
2887dd252788645e940eada959bdde927426e2531c9Paul Duffin    bf2.put("3");
2897dd252788645e940eada959bdde927426e2531c9Paul Duffin
2907dd252788645e940eada959bdde927426e2531c9Paul Duffin    new EqualsTester()
2917dd252788645e940eada959bdde927426e2531c9Paul Duffin        .addEqualityGroup(bf1)
2927dd252788645e940eada959bdde927426e2531c9Paul Duffin        .addEqualityGroup(bf2)
2937dd252788645e940eada959bdde927426e2531c9Paul Duffin        .testEquals();
2947dd252788645e940eada959bdde927426e2531c9Paul Duffin  }
2957dd252788645e940eada959bdde927426e2531c9Paul Duffin
2967dd252788645e940eada959bdde927426e2531c9Paul Duffin  public void testEqualsWithCustomFunnel() {
2977dd252788645e940eada959bdde927426e2531c9Paul Duffin    BloomFilter<Long> bf1 = BloomFilter.create(new CustomFunnel(), 100);
2987dd252788645e940eada959bdde927426e2531c9Paul Duffin    BloomFilter<Long> bf2 = BloomFilter.create(new CustomFunnel(), 100);
2997dd252788645e940eada959bdde927426e2531c9Paul Duffin    assertEquals(bf1, bf2);
3007dd252788645e940eada959bdde927426e2531c9Paul Duffin  }
3017dd252788645e940eada959bdde927426e2531c9Paul Duffin
3027dd252788645e940eada959bdde927426e2531c9Paul Duffin  public void testSerializationWithCustomFunnel() {
3037dd252788645e940eada959bdde927426e2531c9Paul Duffin    SerializableTester.reserializeAndAssert(BloomFilter.create(new CustomFunnel(), 100));
3047dd252788645e940eada959bdde927426e2531c9Paul Duffin  }
3057dd252788645e940eada959bdde927426e2531c9Paul Duffin
3067dd252788645e940eada959bdde927426e2531c9Paul Duffin  private static final class CustomFunnel implements Funnel<Long> {
3077dd252788645e940eada959bdde927426e2531c9Paul Duffin    @Override
3087dd252788645e940eada959bdde927426e2531c9Paul Duffin    public void funnel(Long value, PrimitiveSink into) {
3097dd252788645e940eada959bdde927426e2531c9Paul Duffin      into.putLong(value);
3107dd252788645e940eada959bdde927426e2531c9Paul Duffin    }
3117dd252788645e940eada959bdde927426e2531c9Paul Duffin    @Override
3127dd252788645e940eada959bdde927426e2531c9Paul Duffin    public boolean equals(@Nullable Object object) {
3137dd252788645e940eada959bdde927426e2531c9Paul Duffin      return (object instanceof CustomFunnel);
3147dd252788645e940eada959bdde927426e2531c9Paul Duffin    }
3157dd252788645e940eada959bdde927426e2531c9Paul Duffin    @Override
3167dd252788645e940eada959bdde927426e2531c9Paul Duffin    public int hashCode() {
3177dd252788645e940eada959bdde927426e2531c9Paul Duffin      return 42;
3187dd252788645e940eada959bdde927426e2531c9Paul Duffin    }
3197dd252788645e940eada959bdde927426e2531c9Paul Duffin  }
3207dd252788645e940eada959bdde927426e2531c9Paul Duffin
3217dd252788645e940eada959bdde927426e2531c9Paul Duffin  public void testPutReturnValue() {
3227dd252788645e940eada959bdde927426e2531c9Paul Duffin    for (int i = 0; i < 10; i++) {
3230888a09821a98ac0680fad765217302858e70fa4Paul Duffin      BloomFilter<CharSequence> bf = BloomFilter.create(Funnels.unencodedCharsFunnel(), 100);
3247dd252788645e940eada959bdde927426e2531c9Paul Duffin      for (int j = 0; j < 10; j++) {
3257dd252788645e940eada959bdde927426e2531c9Paul Duffin        String value = new Object().toString();
3267dd252788645e940eada959bdde927426e2531c9Paul Duffin        boolean mightContain = bf.mightContain(value);
3277dd252788645e940eada959bdde927426e2531c9Paul Duffin        boolean put = bf.put(value);
3287dd252788645e940eada959bdde927426e2531c9Paul Duffin        assertTrue(mightContain != put);
3297dd252788645e940eada959bdde927426e2531c9Paul Duffin      }
3307dd252788645e940eada959bdde927426e2531c9Paul Duffin    }
3317dd252788645e940eada959bdde927426e2531c9Paul Duffin  }
3327dd252788645e940eada959bdde927426e2531c9Paul Duffin
3330888a09821a98ac0680fad765217302858e70fa4Paul Duffin  public void testPutAll() {
3340888a09821a98ac0680fad765217302858e70fa4Paul Duffin    int element1 = 1;
3350888a09821a98ac0680fad765217302858e70fa4Paul Duffin    int element2 = 2;
3360888a09821a98ac0680fad765217302858e70fa4Paul Duffin
3370888a09821a98ac0680fad765217302858e70fa4Paul Duffin    BloomFilter<Integer> bf1 = BloomFilter.create(Funnels.integerFunnel(), 100);
3380888a09821a98ac0680fad765217302858e70fa4Paul Duffin    bf1.put(element1);
3390888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertTrue(bf1.mightContain(element1));
3400888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertFalse(bf1.mightContain(element2));
3410888a09821a98ac0680fad765217302858e70fa4Paul Duffin
3420888a09821a98ac0680fad765217302858e70fa4Paul Duffin    BloomFilter<Integer> bf2 = BloomFilter.create(Funnels.integerFunnel(), 100);
3430888a09821a98ac0680fad765217302858e70fa4Paul Duffin    bf2.put(element2);
3440888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertFalse(bf2.mightContain(element1));
3450888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertTrue(bf2.mightContain(element2));
3460888a09821a98ac0680fad765217302858e70fa4Paul Duffin
3470888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertTrue(bf1.isCompatible(bf2));
3480888a09821a98ac0680fad765217302858e70fa4Paul Duffin    bf1.putAll(bf2);
3490888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertTrue(bf1.mightContain(element1));
3500888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertTrue(bf1.mightContain(element2));
3510888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertFalse(bf2.mightContain(element1));
3520888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertTrue(bf2.mightContain(element2));
3530888a09821a98ac0680fad765217302858e70fa4Paul Duffin  }
3540888a09821a98ac0680fad765217302858e70fa4Paul Duffin
3550888a09821a98ac0680fad765217302858e70fa4Paul Duffin  public void testPutAllDifferentSizes() {
3560888a09821a98ac0680fad765217302858e70fa4Paul Duffin    BloomFilter<Integer> bf1 = BloomFilter.create(Funnels.integerFunnel(), 1);
3570888a09821a98ac0680fad765217302858e70fa4Paul Duffin    BloomFilter<Integer> bf2 = BloomFilter.create(Funnels.integerFunnel(), 10);
3580888a09821a98ac0680fad765217302858e70fa4Paul Duffin
3590888a09821a98ac0680fad765217302858e70fa4Paul Duffin    try {
3600888a09821a98ac0680fad765217302858e70fa4Paul Duffin      assertFalse(bf1.isCompatible(bf2));
3610888a09821a98ac0680fad765217302858e70fa4Paul Duffin      bf1.putAll(bf2);
3620888a09821a98ac0680fad765217302858e70fa4Paul Duffin      fail();
3630888a09821a98ac0680fad765217302858e70fa4Paul Duffin    } catch (IllegalArgumentException expected) {
3640888a09821a98ac0680fad765217302858e70fa4Paul Duffin    }
3650888a09821a98ac0680fad765217302858e70fa4Paul Duffin
3660888a09821a98ac0680fad765217302858e70fa4Paul Duffin    try {
3670888a09821a98ac0680fad765217302858e70fa4Paul Duffin      assertFalse(bf2.isCompatible(bf1));
3680888a09821a98ac0680fad765217302858e70fa4Paul Duffin      bf2.putAll(bf1);
3690888a09821a98ac0680fad765217302858e70fa4Paul Duffin      fail();
3700888a09821a98ac0680fad765217302858e70fa4Paul Duffin    } catch (IllegalArgumentException expected) {
3710888a09821a98ac0680fad765217302858e70fa4Paul Duffin    }
3720888a09821a98ac0680fad765217302858e70fa4Paul Duffin  }
3730888a09821a98ac0680fad765217302858e70fa4Paul Duffin
3740888a09821a98ac0680fad765217302858e70fa4Paul Duffin  public void testPutAllWithSelf() {
3750888a09821a98ac0680fad765217302858e70fa4Paul Duffin    BloomFilter<Integer> bf1 = BloomFilter.create(Funnels.integerFunnel(), 1);
3760888a09821a98ac0680fad765217302858e70fa4Paul Duffin    try {
3770888a09821a98ac0680fad765217302858e70fa4Paul Duffin      assertFalse(bf1.isCompatible(bf1));
3780888a09821a98ac0680fad765217302858e70fa4Paul Duffin      bf1.putAll(bf1);
3790888a09821a98ac0680fad765217302858e70fa4Paul Duffin      fail();
3800888a09821a98ac0680fad765217302858e70fa4Paul Duffin    } catch (IllegalArgumentException expected) {
3810888a09821a98ac0680fad765217302858e70fa4Paul Duffin    }
3820888a09821a98ac0680fad765217302858e70fa4Paul Duffin  }
3830888a09821a98ac0680fad765217302858e70fa4Paul Duffin
3847dd252788645e940eada959bdde927426e2531c9Paul Duffin  public void testJavaSerialization() {
3851d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    BloomFilter<byte[]> bf = BloomFilter.create(Funnels.byteArrayFunnel(), 100);
3861d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    for (int i = 0; i < 10; i++) {
3871d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      bf.put(Ints.toByteArray(i));
3881d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    }
3897dd252788645e940eada959bdde927426e2531c9Paul Duffin
3907dd252788645e940eada959bdde927426e2531c9Paul Duffin    BloomFilter<byte[]> copy = SerializableTester.reserialize(bf);
3911d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    for (int i = 0; i < 10; i++) {
3927dd252788645e940eada959bdde927426e2531c9Paul Duffin      assertTrue(copy.mightContain(Ints.toByteArray(i)));
3931d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    }
3947dd252788645e940eada959bdde927426e2531c9Paul Duffin    assertEquals(bf.expectedFpp(), copy.expectedFpp());
3957dd252788645e940eada959bdde927426e2531c9Paul Duffin
3967dd252788645e940eada959bdde927426e2531c9Paul Duffin    SerializableTester.reserializeAndAssert(bf);
3977dd252788645e940eada959bdde927426e2531c9Paul Duffin  }
3987dd252788645e940eada959bdde927426e2531c9Paul Duffin
3997dd252788645e940eada959bdde927426e2531c9Paul Duffin  /**
4007dd252788645e940eada959bdde927426e2531c9Paul Duffin   * This test will fail whenever someone updates/reorders the BloomFilterStrategies constants.
4017dd252788645e940eada959bdde927426e2531c9Paul Duffin   * Only appending a new constant is allowed.
4027dd252788645e940eada959bdde927426e2531c9Paul Duffin   */
4037dd252788645e940eada959bdde927426e2531c9Paul Duffin  public void testBloomFilterStrategies() {
4040888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertEquals(2, BloomFilterStrategies.values().length);
4057dd252788645e940eada959bdde927426e2531c9Paul Duffin    assertEquals(BloomFilterStrategies.MURMUR128_MITZ_32, BloomFilterStrategies.values()[0]);
4060888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertEquals(BloomFilterStrategies.MURMUR128_MITZ_64, BloomFilterStrategies.values()[1]);
4070888a09821a98ac0680fad765217302858e70fa4Paul Duffin  }
4080888a09821a98ac0680fad765217302858e70fa4Paul Duffin
4090888a09821a98ac0680fad765217302858e70fa4Paul Duffin  public void testGetDefaultStrategyFromSystemProperty() {
4100888a09821a98ac0680fad765217302858e70fa4Paul Duffin    // clear the property to test the case when the property is not set (the default)
4110888a09821a98ac0680fad765217302858e70fa4Paul Duffin    System.clearProperty(BloomFilter.USE_MITZ32_PROPERTY);
4120888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertEquals(BloomFilterStrategies.MURMUR128_MITZ_64,
4130888a09821a98ac0680fad765217302858e70fa4Paul Duffin        BloomFilter.getDefaultStrategyFromSystemProperty());
4140888a09821a98ac0680fad765217302858e70fa4Paul Duffin
4150888a09821a98ac0680fad765217302858e70fa4Paul Duffin    System.setProperty(BloomFilter.USE_MITZ32_PROPERTY, "true");
4160888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertEquals(BloomFilterStrategies.MURMUR128_MITZ_32,
4170888a09821a98ac0680fad765217302858e70fa4Paul Duffin        BloomFilter.getDefaultStrategyFromSystemProperty());
4180888a09821a98ac0680fad765217302858e70fa4Paul Duffin
4190888a09821a98ac0680fad765217302858e70fa4Paul Duffin    System.setProperty(BloomFilter.USE_MITZ32_PROPERTY, "TRUE");
4200888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertEquals(BloomFilterStrategies.MURMUR128_MITZ_32,
4210888a09821a98ac0680fad765217302858e70fa4Paul Duffin        BloomFilter.getDefaultStrategyFromSystemProperty());
4220888a09821a98ac0680fad765217302858e70fa4Paul Duffin
4230888a09821a98ac0680fad765217302858e70fa4Paul Duffin    System.setProperty(BloomFilter.USE_MITZ32_PROPERTY, "false");
4240888a09821a98ac0680fad765217302858e70fa4Paul Duffin    assertEquals(BloomFilterStrategies.MURMUR128_MITZ_64,
4250888a09821a98ac0680fad765217302858e70fa4Paul Duffin        BloomFilter.getDefaultStrategyFromSystemProperty());
4261d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  }
4271d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert}
428