1c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Copyright 2007 Google Inc. 2c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Author: Lincoln Smith 3c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 4c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Licensed under the Apache License, Version 2.0 (the "License"); 5c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// you may not use this file except in compliance with the License. 6c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// You may obtain a copy of the License at 7c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 8c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// http://www.apache.org/licenses/LICENSE-2.0 9c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 10c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Unless required by applicable law or agreed to in writing, software 11c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// distributed under the License is distributed on an "AS IS" BASIS, 12c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// See the License for the specific language governing permissions and 14c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// limitations under the License. 15c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 16c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#ifndef OPEN_VCDIFF_VCENCODER_H_ 17c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#define OPEN_VCDIFF_VCENCODER_H_ 18c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 19c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include <stddef.h> // size_t 20c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include <vector> 21c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "google/output_string.h" 22c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 23c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottnamespace open_vcdiff { 24c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 25c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottclass VCDiffEngine; 26c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottclass VCDiffStreamingEncoderImpl; 27c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 28c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// These flags are passed to the constructor of VCDiffStreamingEncoder 29c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// to determine whether certain open-vcdiff format extensions 30c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// (which are not part of the RFC 3284 draft standard for VCDIFF) 31c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// are employed. 32c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 33c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Because these extensions are not part of the VCDIFF standard, if 34c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// any of these flags except VCD_STANDARD_FORMAT is specified, then the caller 35c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// must be certain that the receiver of the data will be using open-vcdiff 36c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// to decode the delta file, or at least that the receiver can interpret 37c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// these extensions. The encoder will use an 'S' as the fourth character 38c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// in the delta file to indicate that non-standard extensions are being used. 39c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 40c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottenum VCDiffFormatExtensionFlagValues { 41c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // No extensions: the encoded format will conform to the RFC 42c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // draft standard for VCDIFF. 43c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott VCD_STANDARD_FORMAT = 0x00, 44c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // If this flag is specified, then the encoder writes each delta file 45c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // window by interleaving instructions and sizes with their corresponding 46c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // addresses and data, rather than placing these elements 47c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // into three separate sections. This facilitates providing partially 48c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // decoded results when only a portion of a delta file window is received 49c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // (e.g. when HTTP over TCP is used as the transmission protocol.) 50c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott VCD_FORMAT_INTERLEAVED = 0x01, 51c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // If this flag is specified, then an Adler32 checksum 52c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // of the target window data is included in the delta window. 53c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott VCD_FORMAT_CHECKSUM = 0x02 54c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}; 55c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 56c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttypedef int VCDiffFormatExtensionFlags; 57c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 58c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// A HashedDictionary must be constructed from the dictionary data 59c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// in order to use VCDiffStreamingEncoder. If the same dictionary will 60c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// be used to perform several encoding operations, then the caller should 61c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// create the HashedDictionary once and cache it for reuse. This object 62c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// is thread-safe: the same const HashedDictionary can be used 63c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// by several threads simultaneously, each with its own VCDiffStreamingEncoder. 64c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 65c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// dictionary_contents is copied into the HashedDictionary, so the 66c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// caller may free that string, if desired, after the constructor returns. 67c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 68c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottclass HashedDictionary { 69c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott public: 70c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott HashedDictionary(const char* dictionary_contents, 71c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott size_t dictionary_size); 72c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott ~HashedDictionary(); 73c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 74c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Init() must be called before using the HashedDictionary as an argument 75c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // to the VCDiffStreamingEncoder, or for any other purpose except 76c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // destruction. It returns true if initialization succeeded, or false 77c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // if an error occurred, in which case the caller should destroy the object 78c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // without using it. 79c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott bool Init(); 80c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 81c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const VCDiffEngine* engine() const { return engine_; } 82c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 83c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott private: 84c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const VCDiffEngine* engine_; 85c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 86c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Make the copy constructor and assignment operator private 87c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // so that they don't inadvertently get used. 88c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott HashedDictionary(const HashedDictionary&); // NOLINT 89c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott void operator=(const HashedDictionary&); 90c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}; 91c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 92c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// The standard streaming interface to the VCDIFF (RFC 3284) encoder. 93c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// "Streaming" in this context means that, even though the entire set of 94c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// input data to be encoded may not be available at once, the encoder 95c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// can produce partial output based on what is available. Of course, 96c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// the caller should try to maximize the sizes of the data chunks passed 97c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// to the encoder. 98c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottclass VCDiffStreamingEncoder { 99c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott public: 100c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // The HashedDictionary object passed to the constructor must remain valid, 101c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // without being deleted, for the lifetime of the VCDiffStreamingEncoder 102c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // object. 103c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // 104c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // format_extensions allows certain open-vcdiff extensions to the VCDIFF 105c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // format to be included in the encoded output. These extensions are not 106c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // part of the RFC 3284 draft standard, so specifying any extension flags 107c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // will make the output compatible only with open-vcdiff, or with other 108c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // VCDIFF implementations that accept these extensions. See above for an 109c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // explanation of each possible flag value. 110c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // 111c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // *** look_for_target_matches: 112c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // The VCDIFF format allows COPY instruction addresses to reference data from 113c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // the source (dictionary), or from previously encoded target data. 114c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // 115c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // If look_for_target_matches is false, then the encoder will only 116c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // produce COPY instructions that reference source data from the dictionary, 117c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // never from previously encoded target data. This will speed up the encoding 118c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // process, but the encoded data will not be as compact. 119c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // 120c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // If this value is true, then the encoder will produce COPY instructions 121c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // that reference either source data or target data. A COPY instruction from 122c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // the previously encoded target data may even extend into the range of the 123c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // data being produced by that same COPY instruction; for example, if the 124c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // previously encoded target data is "LA", then a single COPY instruction of 125c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // length 10 can produce the additional target data "LALALALALA". 126c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // 127c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // There is a third type of COPY instruction that starts within 128c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // the source data and extends from the end of the source data 129c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // into the beginning of the target data. This VCDIFF encoder will never 130c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // produce a COPY instruction of this third type (regardless of the value of 131c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // look_for_target_matches) because the cost of checking for matches 132c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // across the source-target boundary would not justify its benefits. 133c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // 134c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott VCDiffStreamingEncoder(const HashedDictionary* dictionary, 135c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott VCDiffFormatExtensionFlags format_extensions, 136c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott bool look_for_target_matches); 137c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott ~VCDiffStreamingEncoder(); 138c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 139c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // The client should use these routines as follows: 140c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // HashedDictionary hd(dictionary, dictionary_size); 141c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // if (!hd.Init()) { 142c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // HandleError(); 143c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // return; 144c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // } 145c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // string output_string; 146c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // VCDiffStreamingEncoder v(hd, false, false); 147c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // if (!v.StartEncoding(&output_string)) { 148c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // HandleError(); 149c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // return; // No need to call FinishEncoding() 150c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // } 151c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Process(output_string.data(), output_string.size()); 152c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // output_string.clear(); 153c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // while (get data_buf) { 154c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // if (!v.EncodeChunk(data_buf, data_len, &output_string)) { 155c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // HandleError(); 156c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // return; // No need to call FinishEncoding() 157c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // } 158c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // // The encoding is appended to output_string at each call, 159c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // // so clear output_string once its contents have been processed. 160c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Process(output_string.data(), output_string.size()); 161c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // output_string.clear(); 162c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // } 163c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // if (!v.FinishEncoding(&output_string)) { 164c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // HandleError(); 165c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // return; 166c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // } 167c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Process(output_string.data(), output_string.size()); 168c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // output_string.clear(); 169c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // 170c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // I.e., the allowed pattern of calls is 171c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // StartEncoding EncodeChunk* FinishEncoding 172c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // 173c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // The size of the encoded output depends on the sizes of the chunks 174c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // passed in (i.e. the chunking boundary affects compression). 175c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // However the decoded output is independent of chunk boundaries. 176c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 177c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Sets up the data structures for encoding. 178c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Writes a VCDIFF delta file header (as defined in RFC section 4.1) 179c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // to *output_string. 180c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // 181c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Note: we *append*, so the old contents of *output_string stick around. 182c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // This convention differs from the non-streaming Encode/Decode 183c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // interfaces in VCDiffEncoder. 184c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // 185c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // If an error occurs, this function returns false; otherwise it returns true. 186c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // If this function returns false, the caller does not need to call 187c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // FinishEncoding or to do any cleanup except destroying the 188c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // VCDiffStreamingEncoder object. 189c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott template<class OutputType> 190c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott bool StartEncoding(OutputType* output) { 191c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott OutputString<OutputType> output_string(output); 192c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return StartEncodingToInterface(&output_string); 193c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 194c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 195c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott bool StartEncodingToInterface(OutputStringInterface* output_string); 196c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 197c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Appends compressed encoding for "data" (one complete VCDIFF delta window) 198c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // to *output_string. 199c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // If an error occurs (for example, if StartEncoding was not called 200c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // earlier or StartEncoding returned false), this function returns false; 201c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // otherwise it returns true. The caller does not need to call FinishEncoding 202c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // or do any cleanup except destroying the VCDiffStreamingEncoder 203c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // if this function returns false. 204c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott template<class OutputType> 205c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott bool EncodeChunk(const char* data, size_t len, OutputType* output) { 206c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott OutputString<OutputType> output_string(output); 207c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return EncodeChunkToInterface(data, len, &output_string); 208c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 209c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 210c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott bool EncodeChunkToInterface(const char* data, size_t len, 211c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott OutputStringInterface* output_string); 212c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 213c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Finishes encoding and appends any leftover encoded data to *output_string. 214c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // If an error occurs (for example, if StartEncoding was not called 215c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // earlier or StartEncoding returned false), this function returns false; 216c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // otherwise it returns true. The caller does not need to 217c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // do any cleanup except destroying the VCDiffStreamingEncoder 218c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // if this function returns false. 219c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott template<class OutputType> 220c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott bool FinishEncoding(OutputType* output) { 221c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott OutputString<OutputType> output_string(output); 222c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return FinishEncodingToInterface(&output_string); 223c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 224c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 225c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott bool FinishEncodingToInterface(OutputStringInterface* output_string); 226c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 227c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Replaces the contents of match_counts with a vector of integers, 228c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // one for each possible match length. The value of match_counts[n] 229c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // is equal to the number of matches of length n found so far 230c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // for this VCDiffStreamingEncoder object. 231c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott void GetMatchCounts(std::vector<int>* match_counts) const; 232c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 233c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott private: 234c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott VCDiffStreamingEncoderImpl* const impl_; 235c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 236c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Make the copy constructor and assignment operator private 237c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // so that they don't inadvertently get used. 238c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott VCDiffStreamingEncoder(const VCDiffStreamingEncoder&); // NOLINT 239c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott void operator=(const VCDiffStreamingEncoder&); 240c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}; 241c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 242c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// A simpler (non-streaming) interface to the VCDIFF encoder that can be used 243c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// if the entire target data string is available. 244c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 245c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottclass VCDiffEncoder { 246c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott public: 247c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott VCDiffEncoder(const char* dictionary_contents, size_t dictionary_size) 248c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott : dictionary_(dictionary_contents, dictionary_size), 249c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott encoder_(NULL), 250c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott flags_(VCD_STANDARD_FORMAT), 251c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott look_for_target_matches_(true) { } 252c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 253c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott ~VCDiffEncoder() { 254c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott delete encoder_; 255c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 256c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 257c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // By default, VCDiffEncoder uses standard VCDIFF format. This function 258c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // can be used before calling Encode(), to specify that interleaved format 259c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // and/or checksum format should be used. 260c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott void SetFormatFlags(VCDiffFormatExtensionFlags flags) { flags_ = flags; } 261c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 262c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // By default, VCDiffEncoder looks for matches in the dictionary and also in 263c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // the previously encoded target data. This function can be used before 264c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // calling Encode(), to specify whether or not target matching should be 265c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // enabled. 266c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott void SetTargetMatching(bool look_for_target_matches) { 267c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott look_for_target_matches_ = look_for_target_matches; 268c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 269c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 270c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Replaces old contents of output_string with the encoded form of 271c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // target_data. 272c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott template<class OutputType> 273c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott bool Encode(const char* target_data, 274c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott size_t target_len, 275c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott OutputType* output) { 276c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott OutputString<OutputType> output_string(output); 277c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return EncodeToInterface(target_data, target_len, &output_string); 278c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 279c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 280c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott private: 281c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott bool EncodeToInterface(const char* target_data, 282c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott size_t target_len, 283c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott OutputStringInterface* output_string); 284c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 285c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott HashedDictionary dictionary_; 286c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott VCDiffStreamingEncoder* encoder_; 287c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott VCDiffFormatExtensionFlags flags_; 288c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott bool look_for_target_matches_; 289c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 290c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Make the copy constructor and assignment operator private 291c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // so that they don't inadvertently get used. 292c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott VCDiffEncoder(const VCDiffEncoder&); // NOLINT 293c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott void operator=(const VCDiffEncoder&); 294c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}; 295c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 296c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} // namespace open_vcdiff 297c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 298c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#endif // OPEN_VCDIFF_VCENCODER_H_ 299