11d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert// Copyright 2011 Google Inc. All Rights Reserved. 21d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 31d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringertpackage com.google.common.hash; 41d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 51d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringertimport static com.google.common.base.Preconditions.checkArgument; 61d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 71d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringertimport com.google.common.math.IntMath; 81d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 91d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringertimport java.math.RoundingMode; 101d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 111d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert/** 121d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Collections of strategies of generating the {@code k * log(M)} bits required for an element to 131d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * be mapped to a {@link BloomFilter} of {@code M} bits and {@code k} hash functions. These 141d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * strategies are part of the serialized form of the Bloom filters that use them, thus they must be 151d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * preserved as is (no updates allowed, only introduction of new versions). 161d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * 171d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * @author andreou@google.com (Dimitris Andreou) 181d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert */ 191d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringertenum BloomFilterStrategies implements BloomFilter.Strategy { 201d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert /** 211d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * See "Less Hashing, Same Performance: Building a Better Bloom Filter" by Adam Kirsch and 221d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Michael Mitzenmacher. The paper argues that this trick doesn't significantly deteriorate the 231d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * performance of a Bloom filter (yet only needs two 32bit hash functions). 241d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert */ 251d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert MURMUR128_MITZ_32() { 261d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert @Override public <T> void put(T object, Funnel<? super T> funnel, 271d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert int numHashFunctions, BitArray bits) { 281d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert // TODO(user): when the murmur's shortcuts are implemented, update this code 291d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert long hash64 = Hashing.murmur3_128().newHasher().putObject(object, funnel).hash().asLong(); 301d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert int hash1 = (int) hash64; 311d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert int hash2 = (int) (hash64 >>> 32); 321d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert for (int i = 1; i <= numHashFunctions; i++) { 331d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert int nextHash = hash1 + i * hash2; 341d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert if (nextHash < 0) { 351d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert nextHash = ~nextHash; 361d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert } 371d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert // up to here, the code is identical with the next method 381d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert bits.set(nextHash % bits.size()); 391d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert } 401d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert } 411d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 421d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert @Override public <T> boolean mightContain(T object, Funnel<? super T> funnel, 431d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert int numHashFunctions, BitArray bits) { 441d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert long hash64 = Hashing.murmur3_128().newHasher().putObject(object, funnel).hash().asLong(); 451d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert int hash1 = (int) hash64; 461d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert int hash2 = (int) (hash64 >>> 32); 471d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert for (int i = 1; i <= numHashFunctions; i++) { 481d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert int nextHash = hash1 + i * hash2; 491d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert if (nextHash < 0) { 501d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert nextHash = ~nextHash; 511d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert } 521d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert // up to here, the code is identical with the previous method 531d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert if (!bits.get(nextHash % bits.size())) { 541d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert return false; 551d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert } 561d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert } 571d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert return true; 581d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert } 591d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert }; 601d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 611d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert static class BitArray { 621d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert final long[] data; 631d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 641d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert BitArray(int bits) { 651d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert this(new long[IntMath.divide(bits, 64, RoundingMode.CEILING)]); 661d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert } 671d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 681d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert // Used by serialization 691d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert BitArray(long[] data) { 701d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert checkArgument(data.length > 0, "data length is zero!"); 711d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert this.data = data; 721d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert } 731d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 741d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert void set(int index) { 751d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert data[index >> 6] |= (1L << index); 761d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert } 771d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 781d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert boolean get(int index) { 791d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert return (data[index >> 6] & (1L << index)) != 0; 801d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert } 811d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 821d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert /** Number of bits */ 831d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert int size() { 841d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert return data.length * Long.SIZE; 851d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert } 861d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert } 871d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert} 88