1/* 2 * Copyright (C) 2011 The Guava Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package com.google.common.hash; 18 19import static com.google.common.hash.BloomFilterStrategies.BitArray; 20 21import com.google.common.collect.ImmutableSet; 22import com.google.common.math.LongMath; 23import com.google.common.primitives.Ints; 24import com.google.common.testing.EqualsTester; 25import com.google.common.testing.NullPointerTester; 26import com.google.common.testing.SerializableTester; 27 28import junit.framework.TestCase; 29 30import java.math.RoundingMode; 31import java.util.Random; 32 33import javax.annotation.Nullable; 34 35/** 36 * Tests for SimpleGenericBloomFilter and derived BloomFilter views. 37 * 38 * @author Dimitris Andreou 39 */ 40public class BloomFilterTest extends TestCase { 41 public void testLargeBloomFilterDoesntOverflow() { 42 long numBits = Integer.MAX_VALUE; 43 numBits++; 44 45 BitArray bitArray = new BitArray(numBits); 46 assertTrue( 47 "BitArray.bitSize() must return a positive number, but was " + bitArray.bitSize(), 48 bitArray.bitSize() > 0); 49 50 // Ideally we would also test the bitSize() overflow of this BF, but it runs out of heap space 51 // BloomFilter.create(Funnels.unencodedCharsFunnel(), 244412641, 1e-11); 52 } 53 54 public void testCreateAndCheckMitz32BloomFilterWithKnownFalsePositives() { 55 int numInsertions = 1000000; 56 BloomFilter<CharSequence> bf = BloomFilter.create( 57 Funnels.unencodedCharsFunnel(), numInsertions, 0.03, 58 BloomFilterStrategies.MURMUR128_MITZ_32); 59 60 // Insert "numInsertions" even numbers into the BF. 61 for (int i = 0; i < numInsertions * 2; i += 2) { 62 bf.put(Integer.toString(i)); 63 } 64 65 // Assert that the BF "might" have all of the even numbers. 66 for (int i = 0; i < numInsertions * 2; i += 2) { 67 assertTrue(bf.mightContain(Integer.toString(i))); 68 } 69 70 // Now we check for known false positives using a set of known false positives. 71 // (These are all of the false positives under 900.) 72 ImmutableSet<Integer> falsePositives = ImmutableSet.of( 73 49, 51, 59, 163, 199, 321, 325, 363, 367, 469, 545, 561, 727, 769, 773, 781); 74 for (int i = 1; i < 900; i += 2) { 75 if (!falsePositives.contains(i)) { 76 assertFalse("BF should not contain " + i, bf.mightContain(Integer.toString(i))); 77 } 78 } 79 80 // Check that there are exactly 29824 false positives for this BF. 81 int knownNumberOfFalsePositives = 29824; 82 int numFpp = 0; 83 for (int i = 1; i < numInsertions * 2; i += 2) { 84 if (bf.mightContain(Integer.toString(i))) { 85 numFpp++; 86 } 87 } 88 assertEquals(knownNumberOfFalsePositives, numFpp); 89 double actualFpp = (double) knownNumberOfFalsePositives / numInsertions; 90 double expectedFpp = bf.expectedFpp(); 91 // The normal order of (expected, actual) is reversed here on purpose. 92 assertEquals(actualFpp, expectedFpp, 0.00015); 93 } 94 95 public void testCreateAndCheckBloomFilterWithKnownFalsePositives64() { 96 int numInsertions = 1000000; 97 BloomFilter<CharSequence> bf = BloomFilter.create( 98 Funnels.unencodedCharsFunnel(), numInsertions, 0.03, 99 BloomFilterStrategies.MURMUR128_MITZ_64); 100 101 // Insert "numInsertions" even numbers into the BF. 102 for (int i = 0; i < numInsertions * 2; i += 2) { 103 bf.put(Integer.toString(i)); 104 } 105 106 // Assert that the BF "might" have all of the even numbers. 107 for (int i = 0; i < numInsertions * 2; i += 2) { 108 assertTrue(bf.mightContain(Integer.toString(i))); 109 } 110 111 // Now we check for known false positives using a set of known false positives. 112 // (These are all of the false positives under 900.) 113 ImmutableSet<Integer> falsePositives = ImmutableSet.of( 114 15, 25, 287, 319, 381, 399, 421, 465, 529, 697, 767, 857); 115 for (int i = 1; i < 900; i += 2) { 116 if (!falsePositives.contains(i)) { 117 assertFalse("BF should not contain " + i, bf.mightContain(Integer.toString(i))); 118 } 119 } 120 121 // Check that there are exactly 30104 false positives for this BF. 122 int knownNumberOfFalsePositives = 30104; 123 int numFpp = 0; 124 for (int i = 1; i < numInsertions * 2; i += 2) { 125 if (bf.mightContain(Integer.toString(i))) { 126 numFpp++; 127 } 128 } 129 assertEquals(knownNumberOfFalsePositives, numFpp); 130 double actualFpp = (double) knownNumberOfFalsePositives / numInsertions; 131 double expectedFpp = bf.expectedFpp(); 132 // The normal order of (expected, actual) is reversed here on purpose. 133 assertEquals(actualFpp, expectedFpp, 0.00033); 134 } 135 136 /** 137 * Sanity checking with many combinations of false positive rates and expected insertions 138 */ 139 public void testBasic() { 140 for (double fpr = 0.0000001; fpr < 0.1; fpr *= 10) { 141 for (int expectedInsertions = 1; expectedInsertions <= 10000; expectedInsertions *= 10) { 142 checkSanity(BloomFilter.create(HashTestUtils.BAD_FUNNEL, expectedInsertions, fpr)); 143 } 144 } 145 } 146 147 public void testPreconditions() { 148 try { 149 BloomFilter.create(Funnels.unencodedCharsFunnel(), -1); 150 fail(); 151 } catch (IllegalArgumentException expected) {} 152 try { 153 BloomFilter.create(Funnels.unencodedCharsFunnel(), -1, 0.03); 154 fail(); 155 } catch (IllegalArgumentException expected) {} 156 try { 157 BloomFilter.create(Funnels.unencodedCharsFunnel(), 1, 0.0); 158 fail(); 159 } catch (IllegalArgumentException expected) {} 160 try { 161 BloomFilter.create(Funnels.unencodedCharsFunnel(), 1, 1.0); 162 fail(); 163 } catch (IllegalArgumentException expected) {} 164 } 165 166 public void testFailureWhenMoreThan255HashFunctionsAreNeeded() { 167 try { 168 int n = 1000; 169 double p = 0.00000000000000000000000000000000000000000000000000000000000000000000000000000001; 170 BloomFilter.create(Funnels.unencodedCharsFunnel(), n, p); 171 fail(); 172 } catch (IllegalArgumentException expected) {} 173 } 174 175 public void testNullPointers() { 176 NullPointerTester tester = new NullPointerTester(); 177 tester.testAllPublicInstanceMethods(BloomFilter.create(Funnels.unencodedCharsFunnel(), 100)); 178 tester.testAllPublicStaticMethods(BloomFilter.class); 179 } 180 181 /** 182 * Tests that we never get an optimal hashes number of zero. 183 */ 184 public void testOptimalHashes() { 185 for (int n = 1; n < 1000; n++) { 186 for (int m = 0; m < 1000; m++) { 187 assertTrue(BloomFilter.optimalNumOfHashFunctions(n, m) > 0); 188 } 189 } 190 } 191 192 /** 193 * Tests that we always get a non-negative optimal size. 194 */ 195 public void testOptimalSize() { 196 for (int n = 1; n < 1000; n++) { 197 for (double fpp = Double.MIN_VALUE; fpp < 1.0; fpp += 0.001) { 198 assertTrue(BloomFilter.optimalNumOfBits(n, fpp) >= 0); 199 } 200 } 201 202 // some random values 203 Random random = new Random(0); 204 for (int repeats = 0; repeats < 10000; repeats++) { 205 assertTrue(BloomFilter.optimalNumOfBits(random.nextInt(1 << 16), random.nextDouble()) >= 0); 206 } 207 208 // and some crazy values (this used to be capped to Integer.MAX_VALUE, now it can go bigger 209 assertEquals(3327428144502L, BloomFilter.optimalNumOfBits( 210 Integer.MAX_VALUE, Double.MIN_VALUE)); 211 try { 212 BloomFilter.create(HashTestUtils.BAD_FUNNEL, Integer.MAX_VALUE, Double.MIN_VALUE); 213 fail("we can't represent such a large BF!"); 214 } catch (IllegalArgumentException expected) { 215 assertEquals("Could not create BloomFilter of 3327428144502 bits", expected.getMessage()); 216 } 217 } 218 219 private void checkSanity(BloomFilter<Object> bf) { 220 assertFalse(bf.mightContain(new Object())); 221 assertFalse(bf.apply(new Object())); 222 for (int i = 0; i < 100; i++) { 223 Object o = new Object(); 224 bf.put(o); 225 assertTrue(bf.mightContain(o)); 226 assertTrue(bf.apply(o)); 227 } 228 } 229 230 public void testCopy() { 231 BloomFilter<CharSequence> original = BloomFilter.create(Funnels.unencodedCharsFunnel(), 100); 232 BloomFilter<CharSequence> copy = original.copy(); 233 assertNotSame(original, copy); 234 assertEquals(original, copy); 235 } 236 237 public void testExpectedFpp() { 238 BloomFilter<Object> bf = BloomFilter.create(HashTestUtils.BAD_FUNNEL, 10, 0.03); 239 double fpp = bf.expectedFpp(); 240 assertEquals(0.0, fpp); 241 // usually completed in less than 200 iterations 242 while (fpp != 1.0) { 243 boolean changed = bf.put(new Object()); 244 double newFpp = bf.expectedFpp(); 245 // if changed, the new fpp is strictly higher, otherwise it is the same 246 assertTrue(changed ? newFpp > fpp : newFpp == fpp); 247 fpp = newFpp; 248 } 249 } 250 251 public void testBitSize() { 252 double fpp = 0.03; 253 for (int i = 1; i < 10000; i++) { 254 long numBits = BloomFilter.optimalNumOfBits(i, fpp); 255 int arraySize = Ints.checkedCast(LongMath.divide(numBits, 64, RoundingMode.CEILING)); 256 assertEquals( 257 arraySize * Long.SIZE, 258 BloomFilter.create(Funnels.unencodedCharsFunnel(), i, fpp).bitSize()); 259 } 260 } 261 262 public void testEquals_empty() { 263 new EqualsTester() 264 .addEqualityGroup(BloomFilter.create(Funnels.byteArrayFunnel(), 100, 0.01)) 265 .addEqualityGroup(BloomFilter.create(Funnels.byteArrayFunnel(), 100, 0.02)) 266 .addEqualityGroup(BloomFilter.create(Funnels.byteArrayFunnel(), 200, 0.01)) 267 .addEqualityGroup(BloomFilter.create(Funnels.byteArrayFunnel(), 200, 0.02)) 268 .addEqualityGroup(BloomFilter.create(Funnels.unencodedCharsFunnel(), 100, 0.01)) 269 .addEqualityGroup(BloomFilter.create(Funnels.unencodedCharsFunnel(), 100, 0.02)) 270 .addEqualityGroup(BloomFilter.create(Funnels.unencodedCharsFunnel(), 200, 0.01)) 271 .addEqualityGroup(BloomFilter.create(Funnels.unencodedCharsFunnel(), 200, 0.02)) 272 .testEquals(); 273 } 274 275 public void testEquals() { 276 BloomFilter<CharSequence> bf1 = BloomFilter.create(Funnels.unencodedCharsFunnel(), 100); 277 bf1.put("1"); 278 bf1.put("2"); 279 280 BloomFilter<CharSequence> bf2 = BloomFilter.create(Funnels.unencodedCharsFunnel(), 100); 281 bf2.put("1"); 282 bf2.put("2"); 283 284 new EqualsTester() 285 .addEqualityGroup(bf1, bf2) 286 .testEquals(); 287 288 bf2.put("3"); 289 290 new EqualsTester() 291 .addEqualityGroup(bf1) 292 .addEqualityGroup(bf2) 293 .testEquals(); 294 } 295 296 public void testEqualsWithCustomFunnel() { 297 BloomFilter<Long> bf1 = BloomFilter.create(new CustomFunnel(), 100); 298 BloomFilter<Long> bf2 = BloomFilter.create(new CustomFunnel(), 100); 299 assertEquals(bf1, bf2); 300 } 301 302 public void testSerializationWithCustomFunnel() { 303 SerializableTester.reserializeAndAssert(BloomFilter.create(new CustomFunnel(), 100)); 304 } 305 306 private static final class CustomFunnel implements Funnel<Long> { 307 @Override 308 public void funnel(Long value, PrimitiveSink into) { 309 into.putLong(value); 310 } 311 @Override 312 public boolean equals(@Nullable Object object) { 313 return (object instanceof CustomFunnel); 314 } 315 @Override 316 public int hashCode() { 317 return 42; 318 } 319 } 320 321 public void testPutReturnValue() { 322 for (int i = 0; i < 10; i++) { 323 BloomFilter<CharSequence> bf = BloomFilter.create(Funnels.unencodedCharsFunnel(), 100); 324 for (int j = 0; j < 10; j++) { 325 String value = new Object().toString(); 326 boolean mightContain = bf.mightContain(value); 327 boolean put = bf.put(value); 328 assertTrue(mightContain != put); 329 } 330 } 331 } 332 333 public void testPutAll() { 334 int element1 = 1; 335 int element2 = 2; 336 337 BloomFilter<Integer> bf1 = BloomFilter.create(Funnels.integerFunnel(), 100); 338 bf1.put(element1); 339 assertTrue(bf1.mightContain(element1)); 340 assertFalse(bf1.mightContain(element2)); 341 342 BloomFilter<Integer> bf2 = BloomFilter.create(Funnels.integerFunnel(), 100); 343 bf2.put(element2); 344 assertFalse(bf2.mightContain(element1)); 345 assertTrue(bf2.mightContain(element2)); 346 347 assertTrue(bf1.isCompatible(bf2)); 348 bf1.putAll(bf2); 349 assertTrue(bf1.mightContain(element1)); 350 assertTrue(bf1.mightContain(element2)); 351 assertFalse(bf2.mightContain(element1)); 352 assertTrue(bf2.mightContain(element2)); 353 } 354 355 public void testPutAllDifferentSizes() { 356 BloomFilter<Integer> bf1 = BloomFilter.create(Funnels.integerFunnel(), 1); 357 BloomFilter<Integer> bf2 = BloomFilter.create(Funnels.integerFunnel(), 10); 358 359 try { 360 assertFalse(bf1.isCompatible(bf2)); 361 bf1.putAll(bf2); 362 fail(); 363 } catch (IllegalArgumentException expected) { 364 } 365 366 try { 367 assertFalse(bf2.isCompatible(bf1)); 368 bf2.putAll(bf1); 369 fail(); 370 } catch (IllegalArgumentException expected) { 371 } 372 } 373 374 public void testPutAllWithSelf() { 375 BloomFilter<Integer> bf1 = BloomFilter.create(Funnels.integerFunnel(), 1); 376 try { 377 assertFalse(bf1.isCompatible(bf1)); 378 bf1.putAll(bf1); 379 fail(); 380 } catch (IllegalArgumentException expected) { 381 } 382 } 383 384 public void testJavaSerialization() { 385 BloomFilter<byte[]> bf = BloomFilter.create(Funnels.byteArrayFunnel(), 100); 386 for (int i = 0; i < 10; i++) { 387 bf.put(Ints.toByteArray(i)); 388 } 389 390 BloomFilter<byte[]> copy = SerializableTester.reserialize(bf); 391 for (int i = 0; i < 10; i++) { 392 assertTrue(copy.mightContain(Ints.toByteArray(i))); 393 } 394 assertEquals(bf.expectedFpp(), copy.expectedFpp()); 395 396 SerializableTester.reserializeAndAssert(bf); 397 } 398 399 /** 400 * This test will fail whenever someone updates/reorders the BloomFilterStrategies constants. 401 * Only appending a new constant is allowed. 402 */ 403 public void testBloomFilterStrategies() { 404 assertEquals(2, BloomFilterStrategies.values().length); 405 assertEquals(BloomFilterStrategies.MURMUR128_MITZ_32, BloomFilterStrategies.values()[0]); 406 assertEquals(BloomFilterStrategies.MURMUR128_MITZ_64, BloomFilterStrategies.values()[1]); 407 } 408 409 public void testGetDefaultStrategyFromSystemProperty() { 410 // clear the property to test the case when the property is not set (the default) 411 System.clearProperty(BloomFilter.USE_MITZ32_PROPERTY); 412 assertEquals(BloomFilterStrategies.MURMUR128_MITZ_64, 413 BloomFilter.getDefaultStrategyFromSystemProperty()); 414 415 System.setProperty(BloomFilter.USE_MITZ32_PROPERTY, "true"); 416 assertEquals(BloomFilterStrategies.MURMUR128_MITZ_32, 417 BloomFilter.getDefaultStrategyFromSystemProperty()); 418 419 System.setProperty(BloomFilter.USE_MITZ32_PROPERTY, "TRUE"); 420 assertEquals(BloomFilterStrategies.MURMUR128_MITZ_32, 421 BloomFilter.getDefaultStrategyFromSystemProperty()); 422 423 System.setProperty(BloomFilter.USE_MITZ32_PROPERTY, "false"); 424 assertEquals(BloomFilterStrategies.MURMUR128_MITZ_64, 425 BloomFilter.getDefaultStrategyFromSystemProperty()); 426 } 427} 428