1// Copyright 2011 Google Inc. All Rights Reserved.
2
3package com.google.common.hash;
4
5import static com.google.common.base.Preconditions.checkArgument;
6
7import com.google.common.math.IntMath;
8
9import java.math.RoundingMode;
10
11/**
12 * Collections of strategies of generating the {@code k * log(M)} bits required for an element to
13 * be mapped to a {@link BloomFilter} of {@code M} bits and {@code k} hash functions. These
14 * strategies are part of the serialized form of the Bloom filters that use them, thus they must be
15 * preserved as is (no updates allowed, only introduction of new versions).
16 *
17 * @author andreou@google.com (Dimitris Andreou)
18 */
19enum BloomFilterStrategies implements BloomFilter.Strategy {
20  /**
21   * See "Less Hashing, Same Performance: Building a Better Bloom Filter" by Adam Kirsch and
22   * Michael Mitzenmacher. The paper argues that this trick doesn't significantly deteriorate the
23   * performance of a Bloom filter (yet only needs two 32bit hash functions).
24   */
25  MURMUR128_MITZ_32() {
26    @Override public <T> void put(T object, Funnel<? super T> funnel,
27        int numHashFunctions, BitArray bits) {
28      // TODO(user): when the murmur's shortcuts are implemented, update this code
29      long hash64 = Hashing.murmur3_128().newHasher().putObject(object, funnel).hash().asLong();
30      int hash1 = (int) hash64;
31      int hash2 = (int) (hash64 >>> 32);
32      for (int i = 1; i <= numHashFunctions; i++) {
33        int nextHash = hash1 + i * hash2;
34        if (nextHash < 0) {
35          nextHash = ~nextHash;
36        }
37        // up to here, the code is identical with the next method
38        bits.set(nextHash % bits.size());
39      }
40    }
41
42    @Override public <T> boolean mightContain(T object, Funnel<? super T> funnel,
43        int numHashFunctions, BitArray bits) {
44      long hash64 = Hashing.murmur3_128().newHasher().putObject(object, funnel).hash().asLong();
45      int hash1 = (int) hash64;
46      int hash2 = (int) (hash64 >>> 32);
47      for (int i = 1; i <= numHashFunctions; i++) {
48        int nextHash = hash1 + i * hash2;
49        if (nextHash < 0) {
50          nextHash = ~nextHash;
51        }
52        // up to here, the code is identical with the previous method
53        if (!bits.get(nextHash % bits.size())) {
54          return false;
55        }
56      }
57      return true;
58    }
59  };
60
61  static class BitArray {
62    final long[] data;
63
64    BitArray(int bits) {
65      this(new long[IntMath.divide(bits, 64, RoundingMode.CEILING)]);
66    }
67
68    // Used by serialization
69    BitArray(long[] data) {
70      checkArgument(data.length > 0, "data length is zero!");
71      this.data = data;
72    }
73
74    void set(int index) {
75      data[index >> 6] |= (1L << index);
76    }
77
78    boolean get(int index) {
79      return (data[index >> 6] & (1L << index)) != 0;
80    }
81
82    /** Number of bits */
83    int size() {
84      return data.length * Long.SIZE;
85    }
86  }
87}
88