1// Copyright 2011 Google Inc. All Rights Reserved. 2 3package com.google.common.hash; 4 5import static com.google.common.base.Preconditions.checkArgument; 6 7import com.google.common.math.IntMath; 8 9import java.math.RoundingMode; 10 11/** 12 * Collections of strategies of generating the {@code k * log(M)} bits required for an element to 13 * be mapped to a {@link BloomFilter} of {@code M} bits and {@code k} hash functions. These 14 * strategies are part of the serialized form of the Bloom filters that use them, thus they must be 15 * preserved as is (no updates allowed, only introduction of new versions). 16 * 17 * @author andreou@google.com (Dimitris Andreou) 18 */ 19enum BloomFilterStrategies implements BloomFilter.Strategy { 20 /** 21 * See "Less Hashing, Same Performance: Building a Better Bloom Filter" by Adam Kirsch and 22 * Michael Mitzenmacher. The paper argues that this trick doesn't significantly deteriorate the 23 * performance of a Bloom filter (yet only needs two 32bit hash functions). 24 */ 25 MURMUR128_MITZ_32() { 26 @Override public <T> void put(T object, Funnel<? super T> funnel, 27 int numHashFunctions, BitArray bits) { 28 // TODO(user): when the murmur's shortcuts are implemented, update this code 29 long hash64 = Hashing.murmur3_128().newHasher().putObject(object, funnel).hash().asLong(); 30 int hash1 = (int) hash64; 31 int hash2 = (int) (hash64 >>> 32); 32 for (int i = 1; i <= numHashFunctions; i++) { 33 int nextHash = hash1 + i * hash2; 34 if (nextHash < 0) { 35 nextHash = ~nextHash; 36 } 37 // up to here, the code is identical with the next method 38 bits.set(nextHash % bits.size()); 39 } 40 } 41 42 @Override public <T> boolean mightContain(T object, Funnel<? super T> funnel, 43 int numHashFunctions, BitArray bits) { 44 long hash64 = Hashing.murmur3_128().newHasher().putObject(object, funnel).hash().asLong(); 45 int hash1 = (int) hash64; 46 int hash2 = (int) (hash64 >>> 32); 47 for (int i = 1; i <= numHashFunctions; i++) { 48 int nextHash = hash1 + i * hash2; 49 if (nextHash < 0) { 50 nextHash = ~nextHash; 51 } 52 // up to here, the code is identical with the previous method 53 if (!bits.get(nextHash % bits.size())) { 54 return false; 55 } 56 } 57 return true; 58 } 59 }; 60 61 static class BitArray { 62 final long[] data; 63 64 BitArray(int bits) { 65 this(new long[IntMath.divide(bits, 64, RoundingMode.CEILING)]); 66 } 67 68 // Used by serialization 69 BitArray(long[] data) { 70 checkArgument(data.length > 0, "data length is zero!"); 71 this.data = data; 72 } 73 74 void set(int index) { 75 data[index >> 6] |= (1L << index); 76 } 77 78 boolean get(int index) { 79 return (data[index >> 6] & (1L << index)) != 0; 80 } 81 82 /** Number of bits */ 83 int size() { 84 return data.length * Long.SIZE; 85 } 86 } 87} 88