1311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Copyright 2007, 2008 Google Inc.
2311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Authors: Jeff Dean, Sanjay Ghemawat, Lincoln Smith
3311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//
4311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Licensed under the Apache License, Version 2.0 (the "License");
5311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// you may not use this file except in compliance with the License.
6311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// You may obtain a copy of the License at
7311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//
8311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//      http://www.apache.org/licenses/LICENSE-2.0
9311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//
10311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Unless required by applicable law or agreed to in writing, software
11311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// distributed under the License is distributed on an "AS IS" BASIS,
12311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// See the License for the specific language governing permissions and
14311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// limitations under the License.
15311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
16311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff#ifndef OPEN_VCDIFF_ROLLING_HASH_H_
17311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff#define OPEN_VCDIFF_ROLLING_HASH_H_
18311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
19311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff#include <config.h>
20311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff#include <stdint.h>  // uint32_t
21732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com#include "compile_assert.h"
22311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff#include "logging.h"
23311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
24311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffnamespace open_vcdiff {
25311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
26311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Rabin-Karp hasher module -- this is a faster version with different
27311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// constants, so it's not quite Rabin-Karp fingerprinting, but its behavior is
28311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// close enough for most applications.
29311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
30311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Definitions common to all hash window sizes.
31311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffclass RollingHashUtil {
32311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff public:
33311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // Multiplier for incremental hashing.  The compiler should be smart enough to
34311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // convert (val * kMult) into ((val << 8) + val).
35311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  static const uint32_t kMult = 257;
36311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
37311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // All hashes are returned modulo "kBase".  Current implementation requires
38311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // kBase <= 2^32/kMult to avoid overflow.  Also, kBase must be a power of two
39311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // so that we can compute modulus efficiently.
40311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  static const uint32_t kBase = (1 << 23);
41311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
42311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // Returns operand % kBase, assuming that kBase is a power of two.
43311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  static inline uint32_t ModBase(uint32_t operand) {
44311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    return operand & (kBase - 1);
45311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
46311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
47311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // Given an unsigned integer "operand", returns an unsigned integer "result"
48311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // such that
49311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  //     result < kBase
50311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // and
51311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  //     ModBase(operand + result) == 0
52311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  static inline uint32_t FindModBaseInverse(uint32_t operand) {
53311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    // The subtraction (0 - operand) produces an unsigned underflow for any
54311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    // operand except 0.  The underflow results in a (very large) unsigned
55311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    // number.  Binary subtraction is used instead of unary negation because
56311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    // some compilers (e.g. Visual Studio 7+) produce a warning if an unsigned
57311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    // value is negated.
58311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    //
59311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    // The C++ mod operation (operand % kBase) may produce different results for
60311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    // different compilers if operand is negative.  That is not a problem in
61311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    // this case, since all numbers used are unsigned, and ModBase does its work
62311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    // using bitwise arithmetic rather than the % operator.
63311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    return ModBase(uint32_t(0) - operand);
64311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
65311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
66311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // Here's the heart of the hash algorithm.  Start with a partial_hash value of
67311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // 0, and run this HashStep once against each byte in the data window to be
68311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // hashed.  The result will be the hash value for the entire data window.  The
69311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // Hash() function, below, does exactly this, albeit with some refinements.
70311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  static inline uint32_t HashStep(uint32_t partial_hash,
71311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                                  unsigned char next_byte) {
72311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    return ModBase((partial_hash * kMult) + next_byte);
73311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
74311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
75311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // Use this function to start computing a new hash value based on the first
76311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // two bytes in the window.  It is equivalent to calling
77311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  //     HashStep(HashStep(0, ptr[0]), ptr[1])
78311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // but takes advantage of the fact that the maximum value of
79311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // (ptr[0] * kMult) + ptr[1] is not large enough to exceed kBase, thus
80311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // avoiding an unnecessary ModBase operation.
81311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  static inline uint32_t HashFirstTwoBytes(const char* ptr) {
82311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    return (static_cast<unsigned char>(ptr[0]) * kMult)
83311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff        + static_cast<unsigned char>(ptr[1]);
84311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
85311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff private:
86311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // Making these private avoids copy constructor and assignment operator.
87311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // No objects of this type should be constructed.
88311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  RollingHashUtil();
89311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  RollingHashUtil(const RollingHashUtil&);  // NOLINT
90311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  void operator=(const RollingHashUtil&);
91311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff};
92311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
93311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// window_size must be >= 2.
94311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdifftemplate<int window_size>
95311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffclass RollingHash {
96311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff public:
97311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // Perform global initialization that is required in order to instantiate a
98311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // RollingHash.  This function *must* be called (preferably on startup) by any
99311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // program that uses a RollingHash.  It is harmless to call this function more
100311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // than once.  It is not thread-safe, but calling it from two different
101311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // threads at the same time can only cause a memory leak, not incorrect
102311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // behavior.  Make sure to call it before spawning any threads that could use
103732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com  // RollingHash.
104732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com  static void Init();
105311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
106311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // Initialize hasher to maintain a window of the specified size.  You need an
107311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // instance of this type to use UpdateHash(), but Hash() does not depend on
108311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // remove_table_, so it is static.
109311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  RollingHash() {
110311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    if (!remove_table_) {
111732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com      VCD_DFATAL << "RollingHash object instantiated"
112732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com                    " before calling RollingHash::Init()" << VCD_ENDL;
113311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    }
114311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
115311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
116311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // Compute a hash of the window "ptr[0, window_size - 1]".
117311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  static uint32_t Hash(const char* ptr) {
118311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    uint32_t h = RollingHashUtil::HashFirstTwoBytes(ptr);
119311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    for (int i = 2; i < window_size; ++i) {
120311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      h = RollingHashUtil::HashStep(h, ptr[i]);
121311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    }
122311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    return h;
123311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
124311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
125311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // Update a hash by removing the oldest byte and adding a new byte.
126311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  //
127311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // UpdateHash takes the hash value of buffer[0] ... buffer[window_size -1]
128311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // along with the value of buffer[0] (the "old_first_byte" argument)
129311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // and the value of buffer[window_size] (the "new_last_byte" argument).
130311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // It quickly computes the hash value of buffer[1] ... buffer[window_size]
131311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // without having to run Hash() on the entire window.
132311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  //
133311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // The larger the window, the more advantage comes from using UpdateHash()
134311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // (which runs in time independent of window_size) instead of Hash().
135311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // Each time window_size doubles, the time to execute Hash() also doubles,
136311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // while the time to execute UpdateHash() remains constant.  Empirical tests
137311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // have borne out this statement.
138311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  uint32_t UpdateHash(uint32_t old_hash,
139311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                      const char old_first_byte,
140311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                      const char new_last_byte) const {
141311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    uint32_t partial_hash = RemoveFirstByteFromHash(old_hash, old_first_byte);
142311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    return RollingHashUtil::HashStep(partial_hash, new_last_byte);
143311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
144311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
145311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff protected:
146311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // Given a full hash value for buffer[0] ... buffer[window_size -1], plus the
147311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // value of the first byte buffer[0], this function returns a *partial* hash
148311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // value for buffer[1] ... buffer[window_size -1].  See the comments in
149311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // Init(), below, for a description of how the contents of remove_table_ are
150311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // computed.
151311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  static uint32_t RemoveFirstByteFromHash(uint32_t full_hash,
152311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                                          unsigned char first_byte) {
153311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    return RollingHashUtil::ModBase(full_hash + remove_table_[first_byte]);
154311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
155311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
156311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff private:
157311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // We keep a table that maps from any byte "b" to
158311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  //    (- b * pow(kMult, window_size - 1)) % kBase
159311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  static const uint32_t* remove_table_;
160311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff};
161311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
162311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// For each window_size, fill a 256-entry table such that
163311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//        the hash value of buffer[0] ... buffer[window_size - 1]
164311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//      + remove_table_[buffer[0]]
165311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//     == the hash value of buffer[1] ... buffer[window_size - 1]
166311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// See the comments in Init(), below, for a description of how the contents of
167311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// remove_table_ are computed.
168311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdifftemplate<int window_size>
169311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffconst uint32_t* RollingHash<window_size>::remove_table_ = NULL;
170311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
171311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Init() checks to make sure that the static object remove_table_ has been
172311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// initialized; if not, it does the considerable work of populating it.  Once
173311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// it's ready, the table can be used for any number of RollingHash objects of
174311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// the same window_size.
175311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//
176311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdifftemplate<int window_size>
177732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.comvoid RollingHash<window_size>::Init() {
178732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com  VCD_COMPILE_ASSERT(window_size >= 2,
179732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com                     RollingHash_window_size_must_be_at_least_2);
180311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  if (remove_table_ == NULL) {
181311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    // The new object is placed into a local pointer instead of directly into
182311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    // remove_table_, for two reasons:
183311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    //   1. remove_table_ is a pointer to const.  The table is populated using
184311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    //      the non-const local pointer and then assigned to the global const
185311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    //      pointer once it's ready.
186311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    //   2. No other thread will ever see remove_table_ pointing to a
187311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    //      partially-initialized table.  If two threads happen to call Init()
188311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    //      at the same time, two tables with the same contents may be created
189311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    //      (causing a memory leak), but the results will be consistent
190311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    //      no matter which of the two tables is used.
191311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    uint32_t* new_remove_table = new uint32_t[256];
192311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    // Compute multiplier.  Concisely, it is:
193311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    //     pow(kMult, (window_size - 1)) % kBase,
194311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    // but we compute the power in integer form.
195311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    uint32_t multiplier = 1;
196311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    for (int i = 0; i < window_size - 1; ++i) {
197311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      multiplier =
198311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff          RollingHashUtil::ModBase(multiplier * RollingHashUtil::kMult);
199311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    }
200311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    // For each character removed_byte, compute
201311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    //     remove_table_[removed_byte] ==
202311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    //         (- (removed_byte * pow(kMult, (window_size - 1)))) % kBase
203311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    // where the power operator "pow" is taken in integer form.
204311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    //
205311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    // If you take a hash value fp representing the hash of
206311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    //     buffer[0] ... buffer[window_size - 1]
207311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    // and add the value of remove_table_[buffer[0]] to it, the result will be
208311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    // a partial hash value for
209311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    //     buffer[1] ... buffer[window_size - 1]
210311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    // that is to say, it no longer includes buffer[0].
211311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    //
212311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    // The following byte at buffer[window_size] can then be merged with this
213311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    // partial hash value to arrive quickly at the hash value for a window that
214311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    // has advanced by one byte, to
215311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    //     buffer[1] ... buffer[window_size]
216311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    // In fact, that is precisely what happens in UpdateHash, above.
217311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    uint32_t byte_times_multiplier = 0;
218311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    for (int removed_byte = 0; removed_byte < 256; ++removed_byte) {
219311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      new_remove_table[removed_byte] =
220311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff          RollingHashUtil::FindModBaseInverse(byte_times_multiplier);
221311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      // Iteratively adding the multiplier in this loop is equivalent to
222311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      // computing (removed_byte * multiplier), and is faster
223311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      byte_times_multiplier =
224311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff          RollingHashUtil::ModBase(byte_times_multiplier + multiplier);
225311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    }
226311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    remove_table_ = new_remove_table;
227311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
228311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff}
229311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
230311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff}  // namespace open_vcdiff
231311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
232311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff#endif  // OPEN_VCDIFF_ROLLING_HASH_H_
233