1/* 2 * Copyright (C) 2011 The Guava Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 * in compliance with the License. You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software distributed under the License 10 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 * or implied. See the License for the specific language governing permissions and limitations under 12 * the License. 13 */ 14 15package com.google.common.hash; 16 17import static com.google.common.base.Preconditions.checkArgument; 18 19import com.google.common.base.Preconditions; 20 21import java.nio.ByteBuffer; 22import java.nio.ByteOrder; 23import java.nio.charset.Charset; 24 25/** 26 * Skeleton implementation of {@link HashFunction}. Provides default implementations which 27 * invokes the appropriate method on {@link #newHasher()}, then return the result of 28 * {@link Hasher#hash}. 29 * 30 * <p>Invocations of {@link #newHasher(int)} also delegate to {@linkplain #newHasher()}, ignoring 31 * the expected input size parameter. 32 * 33 * @author Kevin Bourrillion 34 */ 35abstract class AbstractStreamingHashFunction implements HashFunction { 36 @Override public HashCode hashString(CharSequence input) { 37 return newHasher().putString(input).hash(); 38 } 39 40 @Override public HashCode hashString(CharSequence input, Charset charset) { 41 return newHasher().putString(input, charset).hash(); 42 } 43 44 @Override public HashCode hashLong(long input) { 45 return newHasher().putLong(input).hash(); 46 } 47 48 @Override public HashCode hashBytes(byte[] input) { 49 return newHasher().putBytes(input).hash(); 50 } 51 52 @Override public HashCode hashBytes(byte[] input, int off, int len) { 53 return newHasher().putBytes(input, off, len).hash(); 54 } 55 56 @Override public Hasher newHasher(int expectedInputSize) { 57 Preconditions.checkArgument(expectedInputSize >= 0); 58 return newHasher(); 59 } 60 61 /** 62 * A convenience base class for implementors of {@code Hasher}; handles accumulating data 63 * until an entire "chunk" (of implementation-dependent length) is ready to be hashed. 64 * 65 * @author kevinb@google.com (Kevin Bourrillion) 66 * @author andreou@google.com (Dimitris Andreou) 67 */ 68 // TODO(kevinb): this class still needs some design-and-document-for-inheritance love 69 protected static abstract class AbstractStreamingHasher extends AbstractHasher { 70 /** Buffer via which we pass data to the hash algorithm (the implementor) */ 71 private final ByteBuffer buffer; 72 73 /** Number of bytes to be filled before process() invocation(s). */ 74 private final int bufferSize; 75 76 /** Number of bytes processed per process() invocation. */ 77 private final int chunkSize; 78 79 /** 80 * Constructor for use by subclasses. This hasher instance will process chunks of the specified 81 * size. 82 * 83 * @param chunkSize the number of bytes available per {@link #process(ByteBuffer)} invocation; 84 * must be at least 4 85 */ 86 protected AbstractStreamingHasher(int chunkSize) { 87 this(chunkSize, chunkSize); 88 } 89 90 /** 91 * Constructor for use by subclasses. This hasher instance will process chunks of the specified 92 * size, using an internal buffer of {@code bufferSize} size, which must be a multiple of 93 * {@code chunkSize}. 94 * 95 * @param chunkSize the number of bytes available per {@link #process(ByteBuffer)} invocation; 96 * must be at least 4 97 * @param bufferSize the size of the internal buffer. Must be a multiple of chunkSize 98 */ 99 protected AbstractStreamingHasher(int chunkSize, int bufferSize) { 100 // TODO(kevinb): check more preconditions (as bufferSize >= chunkSize) if this is ever public 101 checkArgument(bufferSize % chunkSize == 0); 102 103 // TODO(user): benchmark performance difference with longer buffer 104 this.buffer = ByteBuffer 105 .allocate(bufferSize + 7) // always space for a single primitive 106 .order(ByteOrder.LITTLE_ENDIAN); 107 this.bufferSize = bufferSize; 108 this.chunkSize = chunkSize; 109 } 110 111 /** 112 * Processes the available bytes of the buffer (at most {@code chunk} bytes). 113 */ 114 protected abstract void process(ByteBuffer bb); 115 116 /** 117 * This is invoked for the last bytes of the input, which are not enough to 118 * fill a whole chunk. The passed {@code ByteBuffer} is guaranteed to be 119 * non-empty. 120 * 121 * <p>This implementation simply pads with zeros and delegates to 122 * {@link #process(ByteBuffer)}. 123 */ 124 protected void processRemaining(ByteBuffer bb) { 125 bb.position(bb.limit()); // move at the end 126 bb.limit(chunkSize + 7); // get ready to pad with longs 127 while (bb.position() < chunkSize) { 128 bb.putLong(0); 129 } 130 bb.limit(chunkSize); 131 bb.flip(); 132 process(bb); 133 } 134 135 @Override 136 public final Hasher putBytes(byte[] bytes) { 137 return putBytes(bytes, 0, bytes.length); 138 } 139 140 @Override 141 public final Hasher putBytes(byte[] bytes, int off, int len) { 142 return putBytes(ByteBuffer.wrap(bytes, off, len).order(ByteOrder.LITTLE_ENDIAN)); 143 } 144 145 private final Hasher putBytes(ByteBuffer readBuffer) { 146 // If we have room for all of it, this is easy 147 if (readBuffer.remaining() <= buffer.remaining()) { 148 buffer.put(readBuffer); 149 munchIfFull(); 150 return this; 151 } 152 153 // First add just enough to fill buffer size, and munch that 154 int bytesToCopy = bufferSize - buffer.position(); 155 for (int i = 0; i < bytesToCopy; i++) { 156 buffer.put(readBuffer.get()); 157 } 158 munch(); // buffer becomes empty here, since chunkSize divides bufferSize 159 160 // Now process directly from the rest of the input buffer 161 while (readBuffer.remaining() >= chunkSize) { 162 process(readBuffer); 163 } 164 165 // Finally stick the remainder back in our usual buffer 166 buffer.put(readBuffer); 167 return this; 168 } 169 170 @Override 171 public final Hasher putString(CharSequence charSequence) { 172 for (int i = 0; i < charSequence.length(); i++) { 173 putChar(charSequence.charAt(i)); 174 } 175 return this; 176 } 177 178 @Override 179 public final Hasher putByte(byte b) { 180 buffer.put(b); 181 munchIfFull(); 182 return this; 183 } 184 185 @Override 186 public final Hasher putShort(short s) { 187 buffer.putShort(s); 188 munchIfFull(); 189 return this; 190 } 191 192 @Override 193 public final Hasher putChar(char c) { 194 buffer.putChar(c); 195 munchIfFull(); 196 return this; 197 } 198 199 @Override 200 public final Hasher putInt(int i) { 201 buffer.putInt(i); 202 munchIfFull(); 203 return this; 204 } 205 206 @Override 207 public final Hasher putLong(long l) { 208 buffer.putLong(l); 209 munchIfFull(); 210 return this; 211 } 212 213 @Override 214 public final <T> Hasher putObject(T instance, Funnel<? super T> funnel) { 215 funnel.funnel(instance, this); 216 return this; 217 } 218 219 @Override 220 public final HashCode hash() { 221 munch(); 222 buffer.flip(); 223 if (buffer.remaining() > 0) { 224 processRemaining(buffer); 225 } 226 return makeHash(); 227 } 228 229 abstract HashCode makeHash(); 230 231 // Process pent-up data in chunks 232 private void munchIfFull() { 233 if (buffer.remaining() < 8) { 234 // buffer is full; not enough room for a primitive. We have at least one full chunk. 235 munch(); 236 } 237 } 238 239 private void munch() { 240 buffer.flip(); 241 while (buffer.remaining() >= chunkSize) { 242 // we could limit the buffer to ensure process() does not read more than 243 // chunkSize number of bytes, but we trust the implementations 244 process(buffer); 245 } 246 buffer.compact(); // preserve any remaining data that do not make a full chunk 247 } 248 } 249} 250