1/*
2 * Copyright (C) 2011 The Guava Authors
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5 * in compliance with the License. You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software distributed under the License
10 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11 * or implied. See the License for the specific language governing permissions and limitations under
12 * the License.
13 */
14
15package com.google.common.hash;
16
17import static com.google.common.base.Preconditions.checkArgument;
18
19import com.google.common.annotations.Beta;
20import com.google.common.primitives.UnsignedInts;
21
22import java.nio.ByteBuffer;
23import java.security.MessageDigest;
24import java.util.Iterator;
25
26/**
27 * Static methods to obtain {@link HashFunction} instances, and other static
28 * hashing-related utilities.
29 *
30 * @author Kevin Bourrillion
31 * @author Dimitris Andreou
32 * @author Kurt Alfred Kluever
33 * @since 11.0
34 */
35@Beta
36public final class Hashing {
37  private Hashing() {}
38
39  /**
40   * Returns a general-purpose, <b>non-cryptographic-strength</b>, streaming hash function that
41   * produces hash codes of length at least {@code minimumBits}. Users without specific
42   * compatibility requirements and who do not persist the hash codes are encouraged to
43   * choose this hash function.
44   *
45   * <p><b>Warning: the implementation is unspecified and is subject to change.</b>
46   *
47   * @throws IllegalArgumentException if {@code minimumBits} is not positive
48   */
49  public static HashFunction goodFastHash(int minimumBits) {
50    int bits = checkPositiveAndMakeMultipleOf32(minimumBits);
51
52    if (bits == 32) {
53      return murmur3_32();
54    } else if (bits <= 128) {
55      return murmur3_128();
56    } else {
57      // Join some 128-bit murmur3s
58      int hashFunctionsNeeded = (bits + 127) / 128;
59      HashFunction[] hashFunctions = new HashFunction[hashFunctionsNeeded];
60      for (int i = 0; i < hashFunctionsNeeded; i++) {
61        hashFunctions[i] = murmur3_128(i * 1500450271 /* a prime; shouldn't matter */);
62      }
63      return new ConcatenatedHashFunction(hashFunctions);
64    }
65  }
66
67  /**
68   * Returns a hash function implementing the
69   * <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp">32-bit murmur3
70   * algorithm</a> (little-endian variant), using the given seed value.
71   */
72  public static HashFunction murmur3_32(int seed) {
73    return new Murmur3_32HashFunction(seed);
74  }
75
76  /**
77   * Returns a hash function implementing the
78   * <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp">32-bit murmur3
79   * algorithm</a> (little-endian variant), using a seed value of zero.
80   */
81  public static HashFunction murmur3_32() {
82    return MURMUR3_32;
83  }
84
85  private static final Murmur3_32HashFunction MURMUR3_32 = new Murmur3_32HashFunction(0);
86
87  /**
88   * Returns a hash function implementing the
89   * <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp">
90   * 128-bit murmur3 algorithm, x64 variant</a> (little-endian variant), using the given seed
91   * value.
92   */
93  public static HashFunction murmur3_128(int seed) {
94    return new Murmur3_128HashFunction(seed);
95  }
96
97  /**
98   * Returns a hash function implementing the
99   * <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp">
100   * 128-bit murmur3 algorithm, x64 variant</a>  (little-endian variant), using a seed value
101   * of zero.
102   */
103  public static HashFunction murmur3_128() {
104    return MURMUR3_128;
105  }
106
107  private static final Murmur3_128HashFunction MURMUR3_128 = new Murmur3_128HashFunction(0);
108
109  /**
110   * Returns a hash function implementing the MD5 hash algorithm by delegating to the MD5
111   * {@link MessageDigest}.
112   */
113  public static HashFunction md5() {
114    return MD5;
115  }
116
117  private static final HashFunction MD5 = new MessageDigestHashFunction("MD5");
118
119  /**
120   * Returns a hash function implementing the SHA-1 algorithm by delegating to the SHA-1
121   * {@link MessageDigest}.
122   */
123  public static HashFunction sha1() {
124    return SHA_1;
125  }
126
127  private static final HashFunction SHA_1 = new MessageDigestHashFunction("SHA-1");
128
129  /**
130   * Returns a hash function implementing the SHA-256 algorithm by delegating to the SHA-256
131   * {@link MessageDigest}.
132   */
133  public static HashFunction sha256() {
134    return SHA_256;
135  }
136
137  private static final HashFunction SHA_256 = new MessageDigestHashFunction("SHA-256");
138
139  /**
140   * Returns a hash function implementing the SHA-512 algorithm by delegating to the SHA-512
141   * {@link MessageDigest}.
142   */
143  public static HashFunction sha512() {
144    return SHA_512;
145  }
146
147  private static final HashFunction SHA_512 = new MessageDigestHashFunction("SHA-512");
148
149  /**
150   * If {@code hashCode} has enough bits, returns {@code hashCode.asLong()}, otherwise
151   * returns a {@code long} value with {@code hashCode.asInt()} as the least-significant
152   * four bytes and {@code 0x00} as each of the most-significant four bytes.
153   */
154  public static long padToLong(HashCode hashCode) {
155    return (hashCode.bits() < 64) ? UnsignedInts.toLong(hashCode.asInt()) : hashCode.asLong();
156  }
157
158  /**
159   * Assigns to {@code hashCode} a "bucket" in the range {@code [0, buckets)}, in a uniform
160   * manner that minimizes the need for remapping as {@code buckets} grows. That is,
161   * {@code consistentHash(h, n)} equals:
162   *
163   * <ul>
164   * <li>{@code n - 1}, with approximate probability {@code 1/n}
165   * <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n})
166   * </ul>
167   *
168   * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">wikipedia
169   * article on consistent hashing</a> for more information.
170   */
171  public static int consistentHash(HashCode hashCode, int buckets) {
172    return consistentHash(padToLong(hashCode), buckets);
173  }
174
175  /**
176   * Assigns to {@code input} a "bucket" in the range {@code [0, buckets)}, in a uniform
177   * manner that minimizes the need for remapping as {@code buckets} grows. That is,
178   * {@code consistentHash(h, n)} equals:
179   *
180   * <ul>
181   * <li>{@code n - 1}, with approximate probability {@code 1/n}
182   * <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n})
183   * </ul>
184   *
185   * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">wikipedia
186   * article on consistent hashing</a> for more information.
187   */
188  public static int consistentHash(long input, int buckets) {
189    checkArgument(buckets > 0, "buckets must be positive: %s", buckets);
190    long h = input;
191    int candidate = 0;
192    int next;
193
194    // Jump from bucket to bucket until we go out of range
195    while (true) {
196      // See http://en.wikipedia.org/wiki/Linear_congruential_generator
197      // These values for a and m come from the C++ version of this function.
198      h = 2862933555777941757L * h + 1;
199      double inv = 0x1.0p31 / ((int) (h >>> 33) + 1);
200      next = (int) ((candidate + 1) * inv);
201
202      if (next >= 0 && next < buckets) {
203        candidate = next;
204      } else {
205        return candidate;
206      }
207    }
208  }
209
210  /**
211   * Returns a hash code, having the same bit length as each of the input hash codes,
212   * that combines the information of these hash codes in an ordered fashion. That
213   * is, whenever two equal hash codes are produced by two calls to this method, it
214   * is <i>as likely as possible</i> that each was computed from the <i>same</i>
215   * input hash codes in the <i>same</i> order.
216   *
217   * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes
218   *     do not all have the same bit length
219   */
220  public static HashCode combineOrdered(Iterable<HashCode> hashCodes) {
221    Iterator<HashCode> iterator = hashCodes.iterator();
222    checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine.");
223    int bits = iterator.next().bits();
224    byte[] resultBytes = new byte[bits / 8];
225    for (HashCode hashCode : hashCodes) {
226      byte[] nextBytes = hashCode.asBytes();
227      checkArgument(nextBytes.length == resultBytes.length,
228          "All hashcodes must have the same bit length.");
229      for (int i = 0; i < nextBytes.length; i++) {
230        resultBytes[i] = (byte) (resultBytes[i] * 37 ^ nextBytes[i]);
231      }
232    }
233    return HashCodes.fromBytes(resultBytes);
234  }
235
236  /**
237   * Returns a hash code, having the same bit length as each of the input hash codes,
238   * that combines the information of these hash codes in an unordered fashion. That
239   * is, whenever two equal hash codes are produced by two calls to this method, it
240   * is <i>as likely as possible</i> that each was computed from the <i>same</i>
241   * input hash codes in <i>some</i> order.
242   *
243   * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes
244   *     do not all have the same bit length
245   */
246  public static HashCode combineUnordered(Iterable<HashCode> hashCodes) {
247    Iterator<HashCode> iterator = hashCodes.iterator();
248    checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine.");
249    byte[] resultBytes = new byte[iterator.next().bits() / 8];
250    for (HashCode hashCode : hashCodes) {
251      byte[] nextBytes = hashCode.asBytes();
252      checkArgument(nextBytes.length == resultBytes.length,
253          "All hashcodes must have the same bit length.");
254      for (int i = 0; i < nextBytes.length; i++) {
255        resultBytes[i] += nextBytes[i];
256      }
257    }
258    return HashCodes.fromBytes(resultBytes);
259  }
260
261  /**
262   * Checks that the passed argument is positive, and ceils it to a multiple of 32.
263   */
264  static int checkPositiveAndMakeMultipleOf32(int bits) {
265    checkArgument(bits > 0, "Number of bits must be positive");
266    return (bits + 31) & ~31;
267  }
268
269  // TODO(kevinb): probably expose this via a Hashing method at some point?
270  private static class ConcatenatedHashFunction extends AbstractCompositeHashFunction {
271    final int bits;
272
273    ConcatenatedHashFunction(HashFunction[] functions) {
274      super(functions);
275      int bitSum = 0;
276      for (HashFunction f : this.functions) {
277        bitSum += f.bits();
278      }
279      this.bits = bitSum;
280    }
281
282    @Override
283    HashCode makeHash(Hasher[] hashers) {
284      byte[] bytes = new byte[bits / 8];
285      ByteBuffer buffer = ByteBuffer.wrap(bytes);
286      for (Hasher hasher : hashers) {
287        buffer.put(hasher.hash().asBytes());
288      }
289      return HashCodes.fromBytes(bytes);
290    }
291
292    @Override
293    public int bits() {
294      return bits;
295    }
296  }
297}
298