11d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert// Copyright 2011 Google Inc. All Rights Reserved.
21d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
31d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringertpackage com.google.common.hash;
41d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
51d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringertimport static com.google.common.base.Preconditions.checkArgument;
61d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
71d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringertimport com.google.common.math.IntMath;
81d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
91d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringertimport java.math.RoundingMode;
101d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
111d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert/**
121d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Collections of strategies of generating the {@code k * log(M)} bits required for an element to
131d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * be mapped to a {@link BloomFilter} of {@code M} bits and {@code k} hash functions. These
141d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * strategies are part of the serialized form of the Bloom filters that use them, thus they must be
151d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * preserved as is (no updates allowed, only introduction of new versions).
161d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert *
171d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * @author andreou@google.com (Dimitris Andreou)
181d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert */
191d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringertenum BloomFilterStrategies implements BloomFilter.Strategy {
201d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  /**
211d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * See "Less Hashing, Same Performance: Building a Better Bloom Filter" by Adam Kirsch and
221d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * Michael Mitzenmacher. The paper argues that this trick doesn't significantly deteriorate the
231d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   * performance of a Bloom filter (yet only needs two 32bit hash functions).
241d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert   */
251d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  MURMUR128_MITZ_32() {
261d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    @Override public <T> void put(T object, Funnel<? super T> funnel,
271d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        int numHashFunctions, BitArray bits) {
281d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      // TODO(user): when the murmur's shortcuts are implemented, update this code
291d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      long hash64 = Hashing.murmur3_128().newHasher().putObject(object, funnel).hash().asLong();
301d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      int hash1 = (int) hash64;
311d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      int hash2 = (int) (hash64 >>> 32);
321d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      for (int i = 1; i <= numHashFunctions; i++) {
331d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        int nextHash = hash1 + i * hash2;
341d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        if (nextHash < 0) {
351d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          nextHash = ~nextHash;
361d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        }
371d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        // up to here, the code is identical with the next method
381d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        bits.set(nextHash % bits.size());
391d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      }
401d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    }
411d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
421d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    @Override public <T> boolean mightContain(T object, Funnel<? super T> funnel,
431d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        int numHashFunctions, BitArray bits) {
441d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      long hash64 = Hashing.murmur3_128().newHasher().putObject(object, funnel).hash().asLong();
451d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      int hash1 = (int) hash64;
461d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      int hash2 = (int) (hash64 >>> 32);
471d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      for (int i = 1; i <= numHashFunctions; i++) {
481d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        int nextHash = hash1 + i * hash2;
491d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        if (nextHash < 0) {
501d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          nextHash = ~nextHash;
511d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        }
521d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        // up to here, the code is identical with the previous method
531d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        if (!bits.get(nextHash % bits.size())) {
541d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert          return false;
551d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert        }
561d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      }
571d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      return true;
581d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    }
591d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  };
601d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
611d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  static class BitArray {
621d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    final long[] data;
631d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
641d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    BitArray(int bits) {
651d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      this(new long[IntMath.divide(bits, 64, RoundingMode.CEILING)]);
661d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    }
671d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
681d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    // Used by serialization
691d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    BitArray(long[] data) {
701d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      checkArgument(data.length > 0, "data length is zero!");
711d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      this.data = data;
721d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    }
731d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
741d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    void set(int index) {
751d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      data[index >> 6] |= (1L << index);
761d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    }
771d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
781d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    boolean get(int index) {
791d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      return (data[index >> 6] & (1L << index)) != 0;
801d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    }
811d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert
821d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    /** Number of bits */
831d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    int size() {
841d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert      return data.length * Long.SIZE;
851d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert    }
861d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert  }
871d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert}
88