1311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Copyright 2006, 2008 Google Inc. 2311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Authors: Chandra Chereddi, Lincoln Smith 3311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// 4311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Licensed under the Apache License, Version 2.0 (the "License"); 5311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// you may not use this file except in compliance with the License. 6311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// You may obtain a copy of the License at 7311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// 8311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// http://www.apache.org/licenses/LICENSE-2.0 9311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// 10311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Unless required by applicable law or agreed to in writing, software 11311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// distributed under the License is distributed on an "AS IS" BASIS, 12311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// See the License for the specific language governing permissions and 14311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// limitations under the License. 15311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 16311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff#include <config.h> 17311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff#include "blockhash.h" 18311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff#include <stdint.h> // uint32_t 1928db8079f707ebdf43ce62cdfd96eb39c8f889e0openvcdiff#include <string.h> // memcpy, memcmp 20732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com#include <algorithm> // std::min 21732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com#include "compile_assert.h" 22311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff#include "logging.h" 23311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff#include "rolling_hash.h" 24311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 25311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffnamespace open_vcdiff { 26311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 27311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdifftypedef unsigned long uword_t; // a machine word NOLINT 28311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 29311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffBlockHash::BlockHash(const char* source_data, 30311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff size_t source_size, 31311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff int starting_offset) 32311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff : source_data_(source_data), 33311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff source_size_(source_size), 34311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff hash_table_mask_(0), 35311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff starting_offset_(starting_offset), 36311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff last_block_added_(-1) { 37311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff} 38311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 39311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffBlockHash::~BlockHash() { } 40311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 41311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// kBlockSize must be at least 2 to be meaningful. Since it's a compile-time 42311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// constant, check its value at compile time rather than wasting CPU cycles 43311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// on runtime checks. 44732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.comVCD_COMPILE_ASSERT(BlockHash::kBlockSize >= 2, kBlockSize_must_be_at_least_2); 45311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 46311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// kBlockSize is required to be a power of 2 because multiplication 47311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// (n * kBlockSize), division (n / kBlockSize) and MOD (n % kBlockSize) 48311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// are commonly-used operations. If kBlockSize is a compile-time 49311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// constant and a power of 2, the compiler can convert these three operations 50311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// into bit-shift (>> or <<) and bitwise-AND (&) operations, which are much 51311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// more efficient than executing full integer multiply, divide, or remainder 52311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// instructions. 53732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.comVCD_COMPILE_ASSERT((BlockHash::kBlockSize & (BlockHash::kBlockSize - 1)) == 0, 54732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com kBlockSize_must_be_a_power_of_2); 55311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 56311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffbool BlockHash::Init(bool populate_hash_table) { 57311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if (!hash_table_.empty() || 58311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff !next_block_table_.empty() || 59311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff !last_block_table_.empty()) { 60732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com VCD_DFATAL << "Init() called twice for same BlockHash object" << VCD_ENDL; 61311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return false; 62311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 63311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff const size_t table_size = CalcTableSize(source_size_); 64311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if (table_size == 0) { 65732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com VCD_DFATAL << "Error finding table size for source size " << source_size_ 66732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com << VCD_ENDL; 67311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return false; 68311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 69311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // Since table_size is a power of 2, (table_size - 1) is a bit mask 70311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // containing all the bits below table_size. 71311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff hash_table_mask_ = static_cast<uint32_t>(table_size - 1); 72311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff hash_table_.resize(table_size, -1); 73311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff next_block_table_.resize(GetNumberOfBlocks(), -1); 74311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff last_block_table_.resize(GetNumberOfBlocks(), -1); 75311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if (populate_hash_table) { 76311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff AddAllBlocks(); 77311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 78311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return true; 79311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff} 80311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 81311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffconst BlockHash* BlockHash::CreateDictionaryHash(const char* dictionary_data, 82311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff size_t dictionary_size) { 83311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff BlockHash* new_dictionary_hash = new BlockHash(dictionary_data, 84311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff dictionary_size, 85311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 0); 86311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if (!new_dictionary_hash->Init(/* populate_hash_table = */ true)) { 87311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff delete new_dictionary_hash; 88311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return NULL; 89311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } else { 90311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return new_dictionary_hash; 91311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 92311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff} 93311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 94311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffBlockHash* BlockHash::CreateTargetHash(const char* target_data, 95311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff size_t target_size, 96311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff size_t dictionary_size) { 97311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff BlockHash* new_target_hash = new BlockHash(target_data, 98311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff target_size, 99311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff static_cast<int>(dictionary_size)); 100311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if (!new_target_hash->Init(/* populate_hash_table = */ false)) { 101311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff delete new_target_hash; 102311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return NULL; 103311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } else { 104311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return new_target_hash; 105311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 106311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff} 107311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 108311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Returns zero if an error occurs. 10983bbde0df33922d8dc6fa737cfb306d9caae13b1openvcdiffsize_t BlockHash::CalcTableSize(const size_t dictionary_size) { 110311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // Overallocate the hash table by making it the same size (in bytes) 111311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // as the source data. This is a trade-off between space and time: 112311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // the empty entries in the hash table will reduce the 113311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // probability of a hash collision to (sizeof(int) / kblockSize), 114311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // and so save time comparing false matches. 115311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff const size_t min_size = (dictionary_size / sizeof(int)) + 1; // NOLINT 116311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff size_t table_size = 1; 117311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // Find the smallest power of 2 that is >= min_size, and assign 118311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // that value to table_size. 119311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff while (table_size < min_size) { 120311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff table_size <<= 1; 121311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // Guard against an infinite loop 122311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if (table_size <= 0) { 123732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com VCD_DFATAL << "Internal error: CalcTableSize(dictionary_size = " 124732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com << dictionary_size 125732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com << "): resulting table_size " << table_size 126732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com << " is zero or negative" << VCD_ENDL; 127311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return 0; 128311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 129311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 130311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // Check size sanity 131311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if ((table_size & (table_size - 1)) != 0) { 132732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com VCD_DFATAL << "Internal error: CalcTableSize(dictionary_size = " 133732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com << dictionary_size 134732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com << "): resulting table_size " << table_size 135732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com << " is not a power of 2" << VCD_ENDL; 136311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return 0; 137311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 138311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // The loop above tries to find the smallest power of 2 that is >= min_size. 139311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // That value must lie somewhere between min_size and (min_size * 2), 140311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // except for the case (dictionary_size == 0, table_size == 1). 141311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if ((dictionary_size > 0) && (table_size > (min_size * 2))) { 142732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com VCD_DFATAL << "Internal error: CalcTableSize(dictionary_size = " 143732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com << dictionary_size 144732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com << "): resulting table_size " << table_size 145732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com << " is too large" << VCD_ENDL; 146311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return 0; 147311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 148311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return table_size; 149311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff} 150311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 151311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// If the hash value is already available from the rolling hash, 152311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// call this function to save time. 153311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffvoid BlockHash::AddBlock(uint32_t hash_value) { 154311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if (hash_table_.empty()) { 155732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com VCD_DFATAL << "BlockHash::AddBlock() called before BlockHash::Init()" 156732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com << VCD_ENDL; 157311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return; 158311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 159311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // The initial value of last_block_added_ is -1. 160311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff int block_number = last_block_added_ + 1; 161311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff const int total_blocks = 162311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff static_cast<int>(source_size_ / kBlockSize); // round down 163311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if (block_number >= total_blocks) { 164732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com VCD_DFATAL << "BlockHash::AddBlock() called" 165732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com " with block number " << block_number 166732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com << " that is past last block " << (total_blocks - 1) 167732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com << VCD_ENDL; 168311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return; 169311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 170311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if (next_block_table_[block_number] != -1) { 171732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com VCD_DFATAL << "Internal error in BlockHash::AddBlock(): " 172732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com "block number = " << block_number 173732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com << ", next block should be -1 but is " 174732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com << next_block_table_[block_number] << VCD_ENDL; 175311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return; 176311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 177311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff const uint32_t hash_table_index = GetHashTableIndex(hash_value); 178311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff const int first_matching_block = hash_table_[hash_table_index]; 179311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if (first_matching_block < 0) { 180311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // This is the first entry with this hash value 181311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff hash_table_[hash_table_index] = block_number; 182311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff last_block_table_[block_number] = block_number; 183311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } else { 184311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // Add this entry at the end of the chain of matching blocks 185311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff const int last_matching_block = last_block_table_[first_matching_block]; 186311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if (next_block_table_[last_matching_block] != -1) { 187732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com VCD_DFATAL << "Internal error in BlockHash::AddBlock(): " 188732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com "first matching block = " << first_matching_block 189732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com << ", last matching block = " << last_matching_block 190732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com << ", next block should be -1 but is " 191732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com << next_block_table_[last_matching_block] << VCD_ENDL; 192311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return; 193311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 194311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff next_block_table_[last_matching_block] = block_number; 195311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff last_block_table_[first_matching_block] = block_number; 196311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 197311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff last_block_added_ = block_number; 198311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff} 199311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 200311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffvoid BlockHash::AddAllBlocks() { 201311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff AddAllBlocksThroughIndex(static_cast<int>(source_size_)); 202311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff} 203311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 204311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffvoid BlockHash::AddAllBlocksThroughIndex(int end_index) { 205311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if (end_index > static_cast<int>(source_size_)) { 206732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com VCD_DFATAL << "BlockHash::AddAllBlocksThroughIndex() called" 207732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com " with index " << end_index 208732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com << " higher than end index " << source_size_ << VCD_ENDL; 209311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return; 210311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 211311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff const int last_index_added = last_block_added_ * kBlockSize; 212311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if (end_index <= last_index_added) { 213732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com VCD_DFATAL << "BlockHash::AddAllBlocksThroughIndex() called" 214732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com " with index " << end_index 215732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com << " <= last index added ( " << last_index_added 216732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com << ")" << VCD_ENDL; 217311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return; 218311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 219311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff int end_limit = end_index; 220311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // Don't allow reading any indices at or past source_size_. 221311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // The Hash function extends (kBlockSize - 1) bytes past the index, 222311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // so leave a margin of that size. 223311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff int last_legal_hash_index = static_cast<int>(source_size() - kBlockSize); 224311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if (end_limit > last_legal_hash_index) { 225311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff end_limit = last_legal_hash_index + 1; 226311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 227311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff const char* block_ptr = source_data() + NextIndexToAdd(); 228311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff const char* const end_ptr = source_data() + end_limit; 229311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff while (block_ptr < end_ptr) { 230311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff AddBlock(RollingHash<kBlockSize>::Hash(block_ptr)); 231311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff block_ptr += kBlockSize; 232311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 233311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff} 234311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 235732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.comVCD_COMPILE_ASSERT((BlockHash::kBlockSize % sizeof(uword_t)) == 0, 236732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com kBlockSize_must_be_a_multiple_of_machine_word_size); 237311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 238311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// A recursive template to compare a fixed number 239311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// of (possibly unaligned) machine words starting 240311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// at addresses block1 and block2. Returns true or false 241311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// depending on whether an exact match was found. 242311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdifftemplate<int number_of_words> 243311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffinline bool CompareWholeWordValues(const char* block1, 244311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff const char* block2) { 245311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return CompareWholeWordValues<1>(block1, block2) && 246311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff CompareWholeWordValues<number_of_words - 1>(block1 + sizeof(uword_t), 247311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff block2 + sizeof(uword_t)); 248311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff} 249311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 250311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// The base of the recursive template: compare one pair of machine words. 251311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdifftemplate<> 252311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffinline bool CompareWholeWordValues<1>(const char* word1, 253311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff const char* word2) { 254311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff uword_t aligned_word1, aligned_word2; 255311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff memcpy(&aligned_word1, word1, sizeof(aligned_word1)); 256311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff memcpy(&aligned_word2, word2, sizeof(aligned_word2)); 257311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return aligned_word1 == aligned_word2; 258311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff} 259311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 260311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// A block must be composed of an integral number of machine words 261311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// (uword_t values.) This function takes advantage of that fact 262311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// by comparing the blocks as series of (possibly unaligned) word values. 263311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// A word-sized comparison can be performed as a single 264311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// machine instruction. Comparing words instead of bytes means that, 265311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// on a 64-bit platform, this function will use 8 times fewer test-and-branch 266311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// instructions than a byte-by-byte comparison. Even with the extra 267311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// cost of the calls to memcpy, this method is still at least twice as fast 268311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// as memcmp (measured using gcc on a 64-bit platform, with a block size 269311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// of 32.) For blocks with identical contents (a common case), this method 270311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// is over six times faster than memcmp. 271311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffinline bool BlockCompareWordsInline(const char* block1, const char* block2) { 272311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff static const size_t kWordsPerBlock = BlockHash::kBlockSize / sizeof(uword_t); 273311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return CompareWholeWordValues<kWordsPerBlock>(block1, block2); 274311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff} 275311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 276311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffbool BlockHash::BlockCompareWords(const char* block1, const char* block2) { 277311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return BlockCompareWordsInline(block1, block2); 278311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff} 279311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 280311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffinline bool BlockContentsMatchInline(const char* block1, const char* block2) { 281311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // Optimize for mismatch in first byte. Since this function is called only 282311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // when the hash values of the two blocks match, it is very likely that either 283311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // the blocks are identical, or else the first byte does not match. 284311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if (*block1 != *block2) { 285311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return false; 286311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 287311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff#ifdef VCDIFF_USE_BLOCK_COMPARE_WORDS 288311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return BlockCompareWordsInline(block1, block2); 289311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff#else // !VCDIFF_USE_BLOCK_COMPARE_WORDS 290311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return memcmp(block1, block2, BlockHash::kBlockSize) == 0; 291311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff#endif // VCDIFF_USE_BLOCK_COMPARE_WORDS 292311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff} 293311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 294311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffbool BlockHash::BlockContentsMatch(const char* block1, const char* block2) { 295311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return BlockContentsMatchInline(block1, block2); 296311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff} 297311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 298311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffinline int BlockHash::SkipNonMatchingBlocks(int block_number, 299311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff const char* block_ptr) const { 300311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff int probes = 0; 301311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff while ((block_number >= 0) && 302311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff !BlockContentsMatchInline(block_ptr, 303311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff &source_data_[block_number * kBlockSize])) { 304311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if (++probes > kMaxProbes) { 305311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return -1; // Avoid too much chaining 306311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 307311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff block_number = next_block_table_[block_number]; 308311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 309311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return block_number; 310311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff} 311311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 312311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Init() must have been called and returned true before using 313311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// FirstMatchingBlock or NextMatchingBlock. No check is performed 314311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// for this condition; the code will crash if this condition is violated. 315311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffinline int BlockHash::FirstMatchingBlockInline(uint32_t hash_value, 316311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff const char* block_ptr) const { 317311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return SkipNonMatchingBlocks(hash_table_[GetHashTableIndex(hash_value)], 318311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff block_ptr); 319311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff} 320311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 321311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffint BlockHash::FirstMatchingBlock(uint32_t hash_value, 322311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff const char* block_ptr) const { 323311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return FirstMatchingBlockInline(hash_value, block_ptr); 324311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff} 325311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 326311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffint BlockHash::NextMatchingBlock(int block_number, 327311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff const char* block_ptr) const { 328311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if (static_cast<size_t>(block_number) >= GetNumberOfBlocks()) { 329732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com VCD_DFATAL << "NextMatchingBlock called for invalid block number " 330732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com << block_number << VCD_ENDL; 331311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return -1; 332311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 333311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return SkipNonMatchingBlocks(next_block_table_[block_number], block_ptr); 334311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff} 335311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 336311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Keep a count of the number of matches found. This will throttle the 337311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// number of iterations in FindBestMatch. For example, if the entire 338311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// dictionary is made up of spaces (' ') and the search string is also 339311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// made up of spaces, there will be one match for each block in the 340311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// dictionary. 341311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffinline bool BlockHash::TooManyMatches(int* match_counter) { 342311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff ++(*match_counter); 343311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return (*match_counter) > kMaxMatchesToCheck; 344311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff} 345311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 346311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Returns the number of bytes to the left of source_match_start 347311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// that match the corresponding bytes to the left of target_match_start. 348311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Will not examine more than max_bytes bytes, which is to say that 349311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// the return value will be in the range [0, max_bytes] inclusive. 350311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffint BlockHash::MatchingBytesToLeft(const char* source_match_start, 351311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff const char* target_match_start, 352311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff int max_bytes) { 353311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff const char* source_ptr = source_match_start; 354311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff const char* target_ptr = target_match_start; 355311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff int bytes_found = 0; 356311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff while (bytes_found < max_bytes) { 357311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff --source_ptr; 358311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff --target_ptr; 359311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if (*source_ptr != *target_ptr) { 360311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff break; 361311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 362311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff ++bytes_found; 363311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 364311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return bytes_found; 365311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff} 366311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 367311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Returns the number of bytes starting at source_match_end 368311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// that match the corresponding bytes starting at target_match_end. 369311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Will not examine more than max_bytes bytes, which is to say that 370311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// the return value will be in the range [0, max_bytes] inclusive. 371311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffint BlockHash::MatchingBytesToRight(const char* source_match_end, 372311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff const char* target_match_end, 373311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff int max_bytes) { 374311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff const char* source_ptr = source_match_end; 375311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff const char* target_ptr = target_match_end; 376311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff int bytes_found = 0; 377311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff while ((bytes_found < max_bytes) && (*source_ptr == *target_ptr)) { 378311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff ++bytes_found; 379311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff ++source_ptr; 380311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff ++target_ptr; 381311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 382311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return bytes_found; 383311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff} 384311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 385311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// No NULL checks are performed on the pointer arguments. The caller 386311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// must guarantee that none of the arguments is NULL, or a crash will occur. 387311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// 388311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// The vast majority of calls to FindBestMatch enter the loop *zero* times, 389311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// which is to say that most candidate blocks find no matches in the dictionary. 390311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// The important sections for optimization are therefore the code outside the 391311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// loop and the code within the loop conditions. Keep this to a minimum. 392311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffvoid BlockHash::FindBestMatch(uint32_t hash_value, 393311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff const char* target_candidate_start, 394311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff const char* target_start, 395311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff size_t target_size, 396311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff Match* best_match) const { 397311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff int match_counter = 0; 398311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff for (int block_number = FirstMatchingBlockInline(hash_value, 399311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff target_candidate_start); 400311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff (block_number >= 0) && !TooManyMatches(&match_counter); 401311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff block_number = NextMatchingBlock(block_number, target_candidate_start)) { 402311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff int source_match_offset = block_number * kBlockSize; 403311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff const int source_match_end = source_match_offset + kBlockSize; 404311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 405311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff int target_match_offset = 406311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff static_cast<int>(target_candidate_start - target_start); 407311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff const int target_match_end = target_match_offset + kBlockSize; 408311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 409311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff size_t match_size = kBlockSize; 410311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff { 411311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // Extend match start towards beginning of unencoded data 412311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff const int limit_bytes_to_left = std::min(source_match_offset, 413311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff target_match_offset); 414311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff const int matching_bytes_to_left = 415311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff MatchingBytesToLeft(source_data_ + source_match_offset, 416311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff target_start + target_match_offset, 417311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff limit_bytes_to_left); 418311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff source_match_offset -= matching_bytes_to_left; 419311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff target_match_offset -= matching_bytes_to_left; 420311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff match_size += matching_bytes_to_left; 421311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 422311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff { 423311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // Extend match end towards end of unencoded data 424311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff const size_t source_bytes_to_right = source_size_ - source_match_end; 425311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff const size_t target_bytes_to_right = target_size - target_match_end; 426311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff const size_t limit_bytes_to_right = std::min(source_bytes_to_right, 427311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff target_bytes_to_right); 428311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff match_size += 429311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff MatchingBytesToRight(source_data_ + source_match_end, 430311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff target_start + target_match_end, 431311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff static_cast<int>(limit_bytes_to_right)); 432311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 433311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // Update in/out parameter if the best match found was better 434311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // than any match already stored in *best_match. 435311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff best_match->ReplaceIfBetterMatch(match_size, 436311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff source_match_offset + starting_offset_, 437311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff target_match_offset); 438311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 439311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff} 440311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 441311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff} // namespace open_vcdiff 442