1311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Copyright 2006, 2008 Google Inc.
2311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Authors: Chandra Chereddi, Lincoln Smith
3311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//
4311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Licensed under the Apache License, Version 2.0 (the "License");
5311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// you may not use this file except in compliance with the License.
6311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// You may obtain a copy of the License at
7311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//
8311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//      http://www.apache.org/licenses/LICENSE-2.0
9311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//
10311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Unless required by applicable law or agreed to in writing, software
11311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// distributed under the License is distributed on an "AS IS" BASIS,
12311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// See the License for the specific language governing permissions and
14311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// limitations under the License.
15311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
16311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff#include <config.h>
17311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff#include "blockhash.h"
18311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff#include <stdint.h>  // uint32_t
1928db8079f707ebdf43ce62cdfd96eb39c8f889e0openvcdiff#include <string.h>  // memcpy, memcmp
20732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com#include <algorithm>  // std::min
21732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com#include "compile_assert.h"
22311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff#include "logging.h"
23311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff#include "rolling_hash.h"
24311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
25311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffnamespace open_vcdiff {
26311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
27311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdifftypedef unsigned long uword_t;  // a machine word                         NOLINT
28311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
29311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffBlockHash::BlockHash(const char* source_data,
30311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                     size_t source_size,
31311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                     int starting_offset)
32311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    : source_data_(source_data),
33311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      source_size_(source_size),
34311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      hash_table_mask_(0),
35311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      starting_offset_(starting_offset),
36311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      last_block_added_(-1) {
37311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff}
38311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
39311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffBlockHash::~BlockHash() { }
40311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
41311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// kBlockSize must be at least 2 to be meaningful.  Since it's a compile-time
42311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// constant, check its value at compile time rather than wasting CPU cycles
43311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// on runtime checks.
44732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.comVCD_COMPILE_ASSERT(BlockHash::kBlockSize >= 2, kBlockSize_must_be_at_least_2);
45311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
46311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// kBlockSize is required to be a power of 2 because multiplication
47311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// (n * kBlockSize), division (n / kBlockSize) and MOD (n % kBlockSize)
48311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// are commonly-used operations.  If kBlockSize is a compile-time
49311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// constant and a power of 2, the compiler can convert these three operations
50311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// into bit-shift (>> or <<) and bitwise-AND (&) operations, which are much
51311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// more efficient than executing full integer multiply, divide, or remainder
52311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// instructions.
53732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.comVCD_COMPILE_ASSERT((BlockHash::kBlockSize & (BlockHash::kBlockSize - 1)) == 0,
54732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com                   kBlockSize_must_be_a_power_of_2);
55311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
56311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffbool BlockHash::Init(bool populate_hash_table) {
57311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  if (!hash_table_.empty() ||
58311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      !next_block_table_.empty() ||
59311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      !last_block_table_.empty()) {
60732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com    VCD_DFATAL << "Init() called twice for same BlockHash object" << VCD_ENDL;
61311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    return false;
62311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
63311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  const size_t table_size = CalcTableSize(source_size_);
64311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  if (table_size == 0) {
65732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com    VCD_DFATAL << "Error finding table size for source size " << source_size_
66732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com               << VCD_ENDL;
67311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    return false;
68311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
69311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // Since table_size is a power of 2, (table_size - 1) is a bit mask
70311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // containing all the bits below table_size.
71311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  hash_table_mask_ = static_cast<uint32_t>(table_size - 1);
72311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  hash_table_.resize(table_size, -1);
73311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  next_block_table_.resize(GetNumberOfBlocks(), -1);
74311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  last_block_table_.resize(GetNumberOfBlocks(), -1);
75311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  if (populate_hash_table) {
76311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    AddAllBlocks();
77311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
78311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  return true;
79311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff}
80311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
81311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffconst BlockHash* BlockHash::CreateDictionaryHash(const char* dictionary_data,
82311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                                                 size_t dictionary_size) {
83311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  BlockHash* new_dictionary_hash = new BlockHash(dictionary_data,
84311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                                                 dictionary_size,
85311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                                                 0);
86311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  if (!new_dictionary_hash->Init(/* populate_hash_table = */ true)) {
87311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    delete new_dictionary_hash;
88311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    return NULL;
89311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  } else {
90311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    return new_dictionary_hash;
91311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
92311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff}
93311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
94311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffBlockHash* BlockHash::CreateTargetHash(const char* target_data,
95311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                                       size_t target_size,
96311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                                       size_t dictionary_size) {
97311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  BlockHash* new_target_hash = new BlockHash(target_data,
98311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                                             target_size,
99311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                                             static_cast<int>(dictionary_size));
100311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  if (!new_target_hash->Init(/* populate_hash_table = */ false)) {
101311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    delete new_target_hash;
102311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    return NULL;
103311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  } else {
104311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    return new_target_hash;
105311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
106311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff}
107311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
108311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Returns zero if an error occurs.
10983bbde0df33922d8dc6fa737cfb306d9caae13b1openvcdiffsize_t BlockHash::CalcTableSize(const size_t dictionary_size) {
110311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // Overallocate the hash table by making it the same size (in bytes)
111311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // as the source data.  This is a trade-off between space and time:
112311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // the empty entries in the hash table will reduce the
113311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // probability of a hash collision to (sizeof(int) / kblockSize),
114311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // and so save time comparing false matches.
115311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  const size_t min_size = (dictionary_size / sizeof(int)) + 1;  // NOLINT
116311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  size_t table_size = 1;
117311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // Find the smallest power of 2 that is >= min_size, and assign
118311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // that value to table_size.
119311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  while (table_size < min_size) {
120311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    table_size <<= 1;
121311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    // Guard against an infinite loop
122311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    if (table_size <= 0) {
123732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com      VCD_DFATAL << "Internal error: CalcTableSize(dictionary_size = "
124732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com                 << dictionary_size
125732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com                 << "): resulting table_size " << table_size
126732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com                 << " is zero or negative" << VCD_ENDL;
127311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      return 0;
128311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    }
129311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
130311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // Check size sanity
131311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  if ((table_size & (table_size - 1)) != 0) {
132732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com    VCD_DFATAL << "Internal error: CalcTableSize(dictionary_size = "
133732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com               << dictionary_size
134732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com               << "): resulting table_size " << table_size
135732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com               << " is not a power of 2" << VCD_ENDL;
136311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    return 0;
137311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
138311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // The loop above tries to find the smallest power of 2 that is >= min_size.
139311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // That value must lie somewhere between min_size and (min_size * 2),
140311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // except for the case (dictionary_size == 0, table_size == 1).
141311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  if ((dictionary_size > 0) && (table_size > (min_size * 2))) {
142732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com    VCD_DFATAL << "Internal error: CalcTableSize(dictionary_size = "
143732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com               << dictionary_size
144732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com               << "): resulting table_size " << table_size
145732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com               << " is too large" << VCD_ENDL;
146311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    return 0;
147311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
148311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  return table_size;
149311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff}
150311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
151311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// If the hash value is already available from the rolling hash,
152311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// call this function to save time.
153311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffvoid BlockHash::AddBlock(uint32_t hash_value) {
154311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  if (hash_table_.empty()) {
155732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com    VCD_DFATAL << "BlockHash::AddBlock() called before BlockHash::Init()"
156732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com               << VCD_ENDL;
157311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    return;
158311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
159311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // The initial value of last_block_added_ is -1.
160311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  int block_number = last_block_added_ + 1;
161311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  const int total_blocks =
162311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      static_cast<int>(source_size_ / kBlockSize);  // round down
163311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  if (block_number >= total_blocks) {
164732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com    VCD_DFATAL << "BlockHash::AddBlock() called"
165732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com                  " with block number " << block_number
166732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com               << " that is past last block " << (total_blocks - 1)
167732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com               << VCD_ENDL;
168311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    return;
169311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
170311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  if (next_block_table_[block_number] != -1) {
171732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com    VCD_DFATAL << "Internal error in BlockHash::AddBlock(): "
172732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com                  "block number = " << block_number
173732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com               << ", next block should be -1 but is "
174732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com               << next_block_table_[block_number] << VCD_ENDL;
175311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    return;
176311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
177311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  const uint32_t hash_table_index = GetHashTableIndex(hash_value);
178311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  const int first_matching_block = hash_table_[hash_table_index];
179311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  if (first_matching_block < 0) {
180311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    // This is the first entry with this hash value
181311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    hash_table_[hash_table_index] = block_number;
182311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    last_block_table_[block_number] = block_number;
183311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  } else {
184311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    // Add this entry at the end of the chain of matching blocks
185311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    const int last_matching_block = last_block_table_[first_matching_block];
186311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    if (next_block_table_[last_matching_block] != -1) {
187732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com      VCD_DFATAL << "Internal error in BlockHash::AddBlock(): "
188732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com                    "first matching block = " << first_matching_block
189732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com                 << ", last matching block = " << last_matching_block
190732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com                 << ", next block should be -1 but is "
191732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com                 << next_block_table_[last_matching_block] << VCD_ENDL;
192311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      return;
193311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    }
194311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    next_block_table_[last_matching_block] = block_number;
195311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    last_block_table_[first_matching_block] = block_number;
196311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
197311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  last_block_added_ = block_number;
198311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff}
199311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
200311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffvoid BlockHash::AddAllBlocks() {
201311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  AddAllBlocksThroughIndex(static_cast<int>(source_size_));
202311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff}
203311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
204311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffvoid BlockHash::AddAllBlocksThroughIndex(int end_index) {
205311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  if (end_index > static_cast<int>(source_size_)) {
206732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com    VCD_DFATAL << "BlockHash::AddAllBlocksThroughIndex() called"
207732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com                  " with index " << end_index
208732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com               << " higher than end index  " << source_size_ << VCD_ENDL;
209311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    return;
210311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
211311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  const int last_index_added = last_block_added_ * kBlockSize;
212311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  if (end_index <= last_index_added) {
213732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com    VCD_DFATAL << "BlockHash::AddAllBlocksThroughIndex() called"
214732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com                  " with index " << end_index
215732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com               << " <= last index added ( " << last_index_added
216732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com               << ")" << VCD_ENDL;
217311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    return;
218311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
219311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  int end_limit = end_index;
220311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // Don't allow reading any indices at or past source_size_.
221311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // The Hash function extends (kBlockSize - 1) bytes past the index,
222311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // so leave a margin of that size.
223311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  int last_legal_hash_index = static_cast<int>(source_size() - kBlockSize);
224311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  if (end_limit > last_legal_hash_index) {
225311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    end_limit = last_legal_hash_index + 1;
226311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
227311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  const char* block_ptr = source_data() + NextIndexToAdd();
228311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  const char* const end_ptr = source_data() + end_limit;
229311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  while (block_ptr < end_ptr) {
230311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    AddBlock(RollingHash<kBlockSize>::Hash(block_ptr));
231311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    block_ptr += kBlockSize;
232311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
233311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff}
234311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
235732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.comVCD_COMPILE_ASSERT((BlockHash::kBlockSize % sizeof(uword_t)) == 0,
236732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com                   kBlockSize_must_be_a_multiple_of_machine_word_size);
237311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
238311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// A recursive template to compare a fixed number
239311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// of (possibly unaligned) machine words starting
240311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// at addresses block1 and block2.  Returns true or false
241311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// depending on whether an exact match was found.
242311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdifftemplate<int number_of_words>
243311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffinline bool CompareWholeWordValues(const char* block1,
244311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                                   const char* block2) {
245311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  return CompareWholeWordValues<1>(block1, block2) &&
246311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff         CompareWholeWordValues<number_of_words - 1>(block1 + sizeof(uword_t),
247311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                                                     block2 + sizeof(uword_t));
248311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff}
249311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
250311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// The base of the recursive template: compare one pair of machine words.
251311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdifftemplate<>
252311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffinline bool CompareWholeWordValues<1>(const char* word1,
253311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                                      const char* word2) {
254311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  uword_t aligned_word1, aligned_word2;
255311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  memcpy(&aligned_word1, word1, sizeof(aligned_word1));
256311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  memcpy(&aligned_word2, word2, sizeof(aligned_word2));
257311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  return aligned_word1 == aligned_word2;
258311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff}
259311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
260311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// A block must be composed of an integral number of machine words
261311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// (uword_t values.)  This function takes advantage of that fact
262311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// by comparing the blocks as series of (possibly unaligned) word values.
263311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// A word-sized comparison can be performed as a single
264311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// machine instruction.  Comparing words instead of bytes means that,
265311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// on a 64-bit platform, this function will use 8 times fewer test-and-branch
266311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// instructions than a byte-by-byte comparison.  Even with the extra
267311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// cost of the calls to memcpy, this method is still at least twice as fast
268311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// as memcmp (measured using gcc on a 64-bit platform, with a block size
269311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// of 32.)  For blocks with identical contents (a common case), this method
270311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// is over six times faster than memcmp.
271311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffinline bool BlockCompareWordsInline(const char* block1, const char* block2) {
272311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  static const size_t kWordsPerBlock = BlockHash::kBlockSize / sizeof(uword_t);
273311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  return CompareWholeWordValues<kWordsPerBlock>(block1, block2);
274311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff}
275311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
276311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffbool BlockHash::BlockCompareWords(const char* block1, const char* block2) {
277311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  return BlockCompareWordsInline(block1, block2);
278311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff}
279311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
280311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffinline bool BlockContentsMatchInline(const char* block1, const char* block2) {
281311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // Optimize for mismatch in first byte.  Since this function is called only
282311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // when the hash values of the two blocks match, it is very likely that either
283311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // the blocks are identical, or else the first byte does not match.
284311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  if (*block1 != *block2) {
285311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    return false;
286311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
287311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff#ifdef VCDIFF_USE_BLOCK_COMPARE_WORDS
288311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  return BlockCompareWordsInline(block1, block2);
289311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff#else  // !VCDIFF_USE_BLOCK_COMPARE_WORDS
290311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  return memcmp(block1, block2, BlockHash::kBlockSize) == 0;
291311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff#endif  // VCDIFF_USE_BLOCK_COMPARE_WORDS
292311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff}
293311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
294311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffbool BlockHash::BlockContentsMatch(const char* block1, const char* block2) {
295311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  return BlockContentsMatchInline(block1, block2);
296311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff}
297311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
298311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffinline int BlockHash::SkipNonMatchingBlocks(int block_number,
299311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                                            const char* block_ptr) const {
300311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  int probes = 0;
301311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  while ((block_number >= 0) &&
302311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff         !BlockContentsMatchInline(block_ptr,
303311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                                   &source_data_[block_number * kBlockSize])) {
304311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    if (++probes > kMaxProbes) {
305311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      return -1;  // Avoid too much chaining
306311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    }
307311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    block_number = next_block_table_[block_number];
308311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
309311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  return block_number;
310311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff}
311311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
312311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Init() must have been called and returned true before using
313311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// FirstMatchingBlock or NextMatchingBlock.  No check is performed
314311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// for this condition; the code will crash if this condition is violated.
315311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffinline int BlockHash::FirstMatchingBlockInline(uint32_t hash_value,
316311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                                               const char* block_ptr) const {
317311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  return SkipNonMatchingBlocks(hash_table_[GetHashTableIndex(hash_value)],
318311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                               block_ptr);
319311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff}
320311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
321311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffint BlockHash::FirstMatchingBlock(uint32_t hash_value,
322311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                                  const char* block_ptr) const {
323311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  return FirstMatchingBlockInline(hash_value, block_ptr);
324311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff}
325311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
326311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffint BlockHash::NextMatchingBlock(int block_number,
327311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                                 const char* block_ptr) const {
328311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  if (static_cast<size_t>(block_number) >= GetNumberOfBlocks()) {
329732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com    VCD_DFATAL << "NextMatchingBlock called for invalid block number "
330732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com               << block_number << VCD_ENDL;
331311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    return -1;
332311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
333311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  return SkipNonMatchingBlocks(next_block_table_[block_number], block_ptr);
334311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff}
335311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
336311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Keep a count of the number of matches found.  This will throttle the
337311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// number of iterations in FindBestMatch.  For example, if the entire
338311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// dictionary is made up of spaces (' ') and the search string is also
339311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// made up of spaces, there will be one match for each block in the
340311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// dictionary.
341311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffinline bool BlockHash::TooManyMatches(int* match_counter) {
342311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  ++(*match_counter);
343311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  return (*match_counter) > kMaxMatchesToCheck;
344311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff}
345311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
346311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Returns the number of bytes to the left of source_match_start
347311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// that match the corresponding bytes to the left of target_match_start.
348311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Will not examine more than max_bytes bytes, which is to say that
349311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// the return value will be in the range [0, max_bytes] inclusive.
350311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffint BlockHash::MatchingBytesToLeft(const char* source_match_start,
351311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                                   const char* target_match_start,
352311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                                   int max_bytes) {
353311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  const char* source_ptr = source_match_start;
354311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  const char* target_ptr = target_match_start;
355311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  int bytes_found = 0;
356311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  while (bytes_found < max_bytes) {
357311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    --source_ptr;
358311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    --target_ptr;
359311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    if (*source_ptr != *target_ptr) {
360311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      break;
361311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    }
362311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    ++bytes_found;
363311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
364311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  return bytes_found;
365311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff}
366311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
367311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Returns the number of bytes starting at source_match_end
368311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// that match the corresponding bytes starting at target_match_end.
369311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Will not examine more than max_bytes bytes, which is to say that
370311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// the return value will be in the range [0, max_bytes] inclusive.
371311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffint BlockHash::MatchingBytesToRight(const char* source_match_end,
372311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                                    const char* target_match_end,
373311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                                    int max_bytes) {
374311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  const char* source_ptr = source_match_end;
375311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  const char* target_ptr = target_match_end;
376311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  int bytes_found = 0;
377311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  while ((bytes_found < max_bytes) && (*source_ptr == *target_ptr)) {
378311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    ++bytes_found;
379311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    ++source_ptr;
380311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    ++target_ptr;
381311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
382311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  return bytes_found;
383311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff}
384311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
385311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// No NULL checks are performed on the pointer arguments.  The caller
386311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// must guarantee that none of the arguments is NULL, or a crash will occur.
387311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//
388311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// The vast majority of calls to FindBestMatch enter the loop *zero* times,
389311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// which is to say that most candidate blocks find no matches in the dictionary.
390311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// The important sections for optimization are therefore the code outside the
391311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// loop and the code within the loop conditions.  Keep this to a minimum.
392311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffvoid BlockHash::FindBestMatch(uint32_t hash_value,
393311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                              const char* target_candidate_start,
394311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                              const char* target_start,
395311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                              size_t target_size,
396311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                              Match* best_match) const {
397311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  int match_counter = 0;
398311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  for (int block_number = FirstMatchingBlockInline(hash_value,
399311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                                                   target_candidate_start);
400311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff       (block_number >= 0) && !TooManyMatches(&match_counter);
401311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff       block_number = NextMatchingBlock(block_number, target_candidate_start)) {
402311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    int source_match_offset = block_number * kBlockSize;
403311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    const int source_match_end = source_match_offset + kBlockSize;
404311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
405311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    int target_match_offset =
406311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff        static_cast<int>(target_candidate_start - target_start);
407311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    const int target_match_end = target_match_offset + kBlockSize;
408311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
409311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    size_t match_size = kBlockSize;
410311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    {
411311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      // Extend match start towards beginning of unencoded data
412311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      const int limit_bytes_to_left = std::min(source_match_offset,
413311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                                               target_match_offset);
414311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      const int matching_bytes_to_left =
415311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff          MatchingBytesToLeft(source_data_ + source_match_offset,
416311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                              target_start + target_match_offset,
417311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                              limit_bytes_to_left);
418311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      source_match_offset -= matching_bytes_to_left;
419311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      target_match_offset -= matching_bytes_to_left;
420311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      match_size += matching_bytes_to_left;
421311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    }
422311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    {
423311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      // Extend match end towards end of unencoded data
424311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      const size_t source_bytes_to_right = source_size_ - source_match_end;
425311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      const size_t target_bytes_to_right = target_size - target_match_end;
426311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      const size_t limit_bytes_to_right = std::min(source_bytes_to_right,
427311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                                                   target_bytes_to_right);
428311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      match_size +=
429311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff          MatchingBytesToRight(source_data_ + source_match_end,
430311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                               target_start + target_match_end,
431311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                               static_cast<int>(limit_bytes_to_right));
432311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    }
433311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    // Update in/out parameter if the best match found was better
434311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    // than any match already stored in *best_match.
435311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    best_match->ReplaceIfBetterMatch(match_size,
436311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                                     source_match_offset + starting_offset_,
437311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                                     target_match_offset);
438311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
439311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff}
440311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
441311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff}  // namespace open_vcdiff
442