1c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Copyright 2006 Google Inc. 2c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Authors: Sanjay Ghemawat, Jeff Dean, Chandra Chereddi, Lincoln Smith 3c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 4c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Licensed under the Apache License, Version 2.0 (the "License"); 5c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// you may not use this file except in compliance with the License. 6c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// You may obtain a copy of the License at 7c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 8c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// http://www.apache.org/licenses/LICENSE-2.0 9c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 10c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Unless required by applicable law or agreed to in writing, software 11c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// distributed under the License is distributed on an "AS IS" BASIS, 12c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// See the License for the specific language governing permissions and 14c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// limitations under the License. 15c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 16c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#ifndef OPEN_VCDIFF_VCDIFFENGINE_H_ 17c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#define OPEN_VCDIFF_VCDIFFENGINE_H_ 18c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 19c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include <config.h> 20c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include <stddef.h> // size_t 21c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include <stdint.h> // uint32_t 22c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 23c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottnamespace open_vcdiff { 24c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 25c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottclass BlockHash; 26c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottclass OutputStringInterface; 27c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottclass CodeTableWriterInterface; 28c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 29c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// The VCDiffEngine class is used to find the optimal encoding (in terms of COPY 30c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// and ADD instructions) for a given dictionary and target window. To write the 31c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// instructions for this encoding, it calls the Copy() and Add() methods of the 32c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// code table writer object which is passed as an argument to Encode(). 33c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottclass VCDiffEngine { 34c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott public: 35c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // The minimum size of a string match that is worth putting into a COPY 36c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // instruction. Since this value is more than twice the block size, the 37c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // encoder will always discover a match of this size, no matter whether it is 38c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // aligned on block boundaries in the dictionary text. 39c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott static const size_t kMinimumMatchSize = 32; 40c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 41c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott VCDiffEngine(const char* dictionary, size_t dictionary_size); 42c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 43c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott ~VCDiffEngine(); 44c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 45c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Initializes the object before use. 46c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // This method must be called after constructing a VCDiffEngine object, 47c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // and before any other method may be called. It should not be called 48c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // twice on the same object. 49c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Returns true if initialization succeeded, or false if an error occurred, 50c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // in which case no other method except the destructor may then be used 51c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // on the object. 52c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // The Init() method is the only one allowed to treat hashed_dictionary_ 53c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // as non-const. 54c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott bool Init(); 55c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 56c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott size_t dictionary_size() const { return dictionary_size_; } 57c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 58c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Main worker function. Finds the best matches between the dictionary 59c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // (source) and target data, and uses the coder to write a 60c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // delta file window into *diff. 61c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Because it is a const function, many threads 62c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // can call Encode() at once for the same VCDiffEngine object. 63c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // All thread-specific data will be stored in the coder and diff arguments. 64c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // The coder object must have been fully initialized (by calling its Init() 65c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // method, if any) before calling this function. 66c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // 67c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // look_for_target_matches determines whether to look for matches 68c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // within the previously encoded target data, or just within the source 69c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // (dictionary) data. Please see vcencoder.h for a full explanation 70c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // of this parameter. 71c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott void Encode(const char* target_data, 72c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott size_t target_size, 73c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott bool look_for_target_matches, 74c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott OutputStringInterface* diff, 75c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott CodeTableWriterInterface* coder) const; 76c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 77c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott private: 78c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott static bool ShouldGenerateCopyInstructionForMatchOfSize(size_t size) { 79c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return size >= kMinimumMatchSize; 80c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 81c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 82c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // The following two functions use templates to produce two different 83c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // versions of the code depending on the value of the option 84c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // look_for_target_matches. This approach saves a test-and-branch instruction 85c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // within the inner loop of EncodeCopyForBestMatch. 86c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott template<bool look_for_target_matches> 87c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott void EncodeInternal(const char* target_data, 88c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott size_t target_size, 89c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott OutputStringInterface* diff, 90c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott CodeTableWriterInterface* coder) const; 91c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 92c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // If look_for_target_matches is true, then target_hash must point to a valid 93c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // BlockHash object, and cannot be NULL. If look_for_target_matches is 94c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // false, then the value of target_hash is ignored. 95c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott template<bool look_for_target_matches> 96c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott size_t EncodeCopyForBestMatch(uint32_t hash_value, 97c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const char* target_candidate_start, 98c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const char* unencoded_target_start, 99c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott size_t unencoded_target_size, 100c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const BlockHash* target_hash, 101c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott CodeTableWriterInterface* coder) const; 102c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 103c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott void AddUnmatchedRemainder(const char* unencoded_target_start, 104c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott size_t unencoded_target_size, 105c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott CodeTableWriterInterface* coder) const; 106c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 107c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott void FinishEncoding(size_t target_size, 108c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott OutputStringInterface* diff, 109c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott CodeTableWriterInterface* coder) const; 110c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 111c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const char* dictionary_; // A copy of the dictionary contents 112c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 113c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const size_t dictionary_size_; 114c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 115c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // A hash that contains one element for every kBlockSize bytes of dictionary_. 116c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // This can be reused to encode many different target strings using the 117c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // same dictionary, without the need to compute the hash values each time. 118c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const BlockHash* hashed_dictionary_; 119c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 120c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Making these private avoids implicit copy constructor & assignment operator 121c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott VCDiffEngine(const VCDiffEngine&); 122c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott void operator=(const VCDiffEngine&); 123c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}; 124c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 125c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} // namespace open_vcdiff 126c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 127c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#endif // OPEN_VCDIFF_VCDIFFENGINE_H_ 128