1c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Copyright 2007, 2008 Google Inc. 2c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Authors: Jeff Dean, Sanjay Ghemawat, Lincoln Smith 3c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 4c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Licensed under the Apache License, Version 2.0 (the "License"); 5c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// you may not use this file except in compliance with the License. 6c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// You may obtain a copy of the License at 7c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 8c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// http://www.apache.org/licenses/LICENSE-2.0 9c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 10c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Unless required by applicable law or agreed to in writing, software 11c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// distributed under the License is distributed on an "AS IS" BASIS, 12c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// See the License for the specific language governing permissions and 14c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// limitations under the License. 15c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 16c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#ifndef OPEN_VCDIFF_ROLLING_HASH_H_ 17c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#define OPEN_VCDIFF_ROLLING_HASH_H_ 18c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 19c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include <config.h> 20c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include <stdint.h> // uint32_t 21c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "logging.h" 22c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 23c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottnamespace open_vcdiff { 24c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 25c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Rabin-Karp hasher module -- this is a faster version with different 26c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// constants, so it's not quite Rabin-Karp fingerprinting, but its behavior is 27c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// close enough for most applications. 28c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 29c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Definitions common to all hash window sizes. 30c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottclass RollingHashUtil { 31c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott public: 32c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Multiplier for incremental hashing. The compiler should be smart enough to 33c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // convert (val * kMult) into ((val << 8) + val). 34c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott static const uint32_t kMult = 257; 35c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 36c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // All hashes are returned modulo "kBase". Current implementation requires 37c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // kBase <= 2^32/kMult to avoid overflow. Also, kBase must be a power of two 38c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // so that we can compute modulus efficiently. 39c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott static const uint32_t kBase = (1 << 23); 40c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 41c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Returns operand % kBase, assuming that kBase is a power of two. 42c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott static inline uint32_t ModBase(uint32_t operand) { 43c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return operand & (kBase - 1); 44c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 45c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 46c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Given an unsigned integer "operand", returns an unsigned integer "result" 47c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // such that 48c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // result < kBase 49c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // and 50c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // ModBase(operand + result) == 0 51c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott static inline uint32_t FindModBaseInverse(uint32_t operand) { 52c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // The subtraction (0 - operand) produces an unsigned underflow for any 53c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // operand except 0. The underflow results in a (very large) unsigned 54c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // number. Binary subtraction is used instead of unary negation because 55c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // some compilers (e.g. Visual Studio 7+) produce a warning if an unsigned 56c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // value is negated. 57c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // 58c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // The C++ mod operation (operand % kBase) may produce different results for 59c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // different compilers if operand is negative. That is not a problem in 60c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // this case, since all numbers used are unsigned, and ModBase does its work 61c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // using bitwise arithmetic rather than the % operator. 62c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return ModBase(uint32_t(0) - operand); 63c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 64c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 65c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Here's the heart of the hash algorithm. Start with a partial_hash value of 66c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // 0, and run this HashStep once against each byte in the data window to be 67c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // hashed. The result will be the hash value for the entire data window. The 68c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Hash() function, below, does exactly this, albeit with some refinements. 69c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott static inline uint32_t HashStep(uint32_t partial_hash, 70c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott unsigned char next_byte) { 71c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return ModBase((partial_hash * kMult) + next_byte); 72c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 73c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 74c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Use this function to start computing a new hash value based on the first 75c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // two bytes in the window. It is equivalent to calling 76c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // HashStep(HashStep(0, ptr[0]), ptr[1]) 77c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // but takes advantage of the fact that the maximum value of 78c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // (ptr[0] * kMult) + ptr[1] is not large enough to exceed kBase, thus 79c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // avoiding an unnecessary ModBase operation. 80c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott static inline uint32_t HashFirstTwoBytes(const char* ptr) { 81c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return (static_cast<unsigned char>(ptr[0]) * kMult) 82c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott + static_cast<unsigned char>(ptr[1]); 83c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 84c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott private: 85c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Making these private avoids copy constructor and assignment operator. 86c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // No objects of this type should be constructed. 87c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott RollingHashUtil(); 88c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott RollingHashUtil(const RollingHashUtil&); // NOLINT 89c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott void operator=(const RollingHashUtil&); 90c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}; 91c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 92c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// window_size must be >= 2. 93c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<int window_size> 94c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottclass RollingHash { 95c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott public: 96c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Perform global initialization that is required in order to instantiate a 97c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // RollingHash. This function *must* be called (preferably on startup) by any 98c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // program that uses a RollingHash. It is harmless to call this function more 99c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // than once. It is not thread-safe, but calling it from two different 100c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // threads at the same time can only cause a memory leak, not incorrect 101c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // behavior. Make sure to call it before spawning any threads that could use 102c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // RollingHash. The function returns true if initialization succeeds, or 103c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // false if initialization fails, in which case the caller should not proceed 104c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // to construct any objects of type RollingHash. 105c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott static bool Init(); 106c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 107c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Initialize hasher to maintain a window of the specified size. You need an 108c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // instance of this type to use UpdateHash(), but Hash() does not depend on 109c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // remove_table_, so it is static. 110c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott RollingHash() { 111c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (!remove_table_) { 112c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott LOG(DFATAL) << "RollingHash object instantiated" 113c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott " before calling RollingHash::Init()" << LOG_ENDL; 114c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 115c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 116c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 117c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Compute a hash of the window "ptr[0, window_size - 1]". 118c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott static uint32_t Hash(const char* ptr) { 119c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott uint32_t h = RollingHashUtil::HashFirstTwoBytes(ptr); 120c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott for (int i = 2; i < window_size; ++i) { 121c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott h = RollingHashUtil::HashStep(h, ptr[i]); 122c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 123c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return h; 124c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 125c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 126c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Update a hash by removing the oldest byte and adding a new byte. 127c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // 128c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // UpdateHash takes the hash value of buffer[0] ... buffer[window_size -1] 129c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // along with the value of buffer[0] (the "old_first_byte" argument) 130c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // and the value of buffer[window_size] (the "new_last_byte" argument). 131c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // It quickly computes the hash value of buffer[1] ... buffer[window_size] 132c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // without having to run Hash() on the entire window. 133c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // 134c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // The larger the window, the more advantage comes from using UpdateHash() 135c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // (which runs in time independent of window_size) instead of Hash(). 136c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Each time window_size doubles, the time to execute Hash() also doubles, 137c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // while the time to execute UpdateHash() remains constant. Empirical tests 138c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // have borne out this statement. 139c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott uint32_t UpdateHash(uint32_t old_hash, 140c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const char old_first_byte, 141c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const char new_last_byte) const { 142c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott uint32_t partial_hash = RemoveFirstByteFromHash(old_hash, old_first_byte); 143c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return RollingHashUtil::HashStep(partial_hash, new_last_byte); 144c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 145c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 146c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott protected: 147c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Given a full hash value for buffer[0] ... buffer[window_size -1], plus the 148c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // value of the first byte buffer[0], this function returns a *partial* hash 149c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // value for buffer[1] ... buffer[window_size -1]. See the comments in 150c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Init(), below, for a description of how the contents of remove_table_ are 151c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // computed. 152c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott static uint32_t RemoveFirstByteFromHash(uint32_t full_hash, 153c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott unsigned char first_byte) { 154c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return RollingHashUtil::ModBase(full_hash + remove_table_[first_byte]); 155c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 156c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 157c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott private: 158c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // We keep a table that maps from any byte "b" to 159c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // (- b * pow(kMult, window_size - 1)) % kBase 160c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott static const uint32_t* remove_table_; 161c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}; 162c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 163c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// For each window_size, fill a 256-entry table such that 164c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// the hash value of buffer[0] ... buffer[window_size - 1] 165c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// + remove_table_[buffer[0]] 166c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// == the hash value of buffer[1] ... buffer[window_size - 1] 167c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// See the comments in Init(), below, for a description of how the contents of 168c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// remove_table_ are computed. 169c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<int window_size> 170c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottconst uint32_t* RollingHash<window_size>::remove_table_ = NULL; 171c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 172c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Init() checks to make sure that the static object remove_table_ has been 173c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// initialized; if not, it does the considerable work of populating it. Once 174c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// it's ready, the table can be used for any number of RollingHash objects of 175c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// the same window_size. 176c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 177c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<int window_size> 178c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottbool RollingHash<window_size>::Init() { 179c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (window_size < 2) { 180c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott LOG(ERROR) << "RollingHash window size " << window_size 181c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott << " is too small" << LOG_ENDL; 182c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return false; 183c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 184c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (remove_table_ == NULL) { 185c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // The new object is placed into a local pointer instead of directly into 186c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // remove_table_, for two reasons: 187c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // 1. remove_table_ is a pointer to const. The table is populated using 188c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // the non-const local pointer and then assigned to the global const 189c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // pointer once it's ready. 190c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // 2. No other thread will ever see remove_table_ pointing to a 191c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // partially-initialized table. If two threads happen to call Init() 192c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // at the same time, two tables with the same contents may be created 193c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // (causing a memory leak), but the results will be consistent 194c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // no matter which of the two tables is used. 195c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott uint32_t* new_remove_table = new uint32_t[256]; 196c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Compute multiplier. Concisely, it is: 197c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // pow(kMult, (window_size - 1)) % kBase, 198c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // but we compute the power in integer form. 199c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott uint32_t multiplier = 1; 200c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott for (int i = 0; i < window_size - 1; ++i) { 201c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott multiplier = 202c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott RollingHashUtil::ModBase(multiplier * RollingHashUtil::kMult); 203c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 204c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // For each character removed_byte, compute 205c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // remove_table_[removed_byte] == 206c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // (- (removed_byte * pow(kMult, (window_size - 1)))) % kBase 207c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // where the power operator "pow" is taken in integer form. 208c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // 209c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // If you take a hash value fp representing the hash of 210c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // buffer[0] ... buffer[window_size - 1] 211c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // and add the value of remove_table_[buffer[0]] to it, the result will be 212c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // a partial hash value for 213c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // buffer[1] ... buffer[window_size - 1] 214c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // that is to say, it no longer includes buffer[0]. 215c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // 216c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // The following byte at buffer[window_size] can then be merged with this 217c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // partial hash value to arrive quickly at the hash value for a window that 218c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // has advanced by one byte, to 219c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // buffer[1] ... buffer[window_size] 220c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // In fact, that is precisely what happens in UpdateHash, above. 221c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott uint32_t byte_times_multiplier = 0; 222c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott for (int removed_byte = 0; removed_byte < 256; ++removed_byte) { 223c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott new_remove_table[removed_byte] = 224c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott RollingHashUtil::FindModBaseInverse(byte_times_multiplier); 225c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Iteratively adding the multiplier in this loop is equivalent to 226c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // computing (removed_byte * multiplier), and is faster 227c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott byte_times_multiplier = 228c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott RollingHashUtil::ModBase(byte_times_multiplier + multiplier); 229c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 230c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott remove_table_ = new_remove_table; 231c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 232c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return true; 233c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 234c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 235c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} // namespace open_vcdiff 236c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 237c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#endif // OPEN_VCDIFF_ROLLING_HASH_H_ 238