1c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Copyright 2007 Google Inc.
2c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Author: Lincoln Smith
3c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
4c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Licensed under the Apache License, Version 2.0 (the "License");
5c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// you may not use this file except in compliance with the License.
6c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// You may obtain a copy of the License at
7c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
8c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//      http://www.apache.org/licenses/LICENSE-2.0
9c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
10c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Unless required by applicable law or agreed to in writing, software
11c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// distributed under the License is distributed on an "AS IS" BASIS,
12c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// See the License for the specific language governing permissions and
14c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// limitations under the License.
15c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
16c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#ifndef OPEN_VCDIFF_VCENCODER_H_
17c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#define OPEN_VCDIFF_VCENCODER_H_
18c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
19c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include <stddef.h>  // size_t
20c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include <vector>
21c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "google/output_string.h"
22c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
23c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottnamespace open_vcdiff {
24c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
25c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottclass VCDiffEngine;
26c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottclass VCDiffStreamingEncoderImpl;
27c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
28c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// These flags are passed to the constructor of VCDiffStreamingEncoder
29c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// to determine whether certain open-vcdiff format extensions
30c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// (which are not part of the RFC 3284 draft standard for VCDIFF)
31c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// are employed.
32c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
33c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Because these extensions are not part of the VCDIFF standard, if
34c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// any of these flags except VCD_STANDARD_FORMAT is specified, then the caller
35c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// must be certain that the receiver of the data will be using open-vcdiff
36c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// to decode the delta file, or at least that the receiver can interpret
37c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// these extensions.  The encoder will use an 'S' as the fourth character
38c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// in the delta file to indicate that non-standard extensions are being used.
39c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
40c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottenum VCDiffFormatExtensionFlagValues {
41c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // No extensions: the encoded format will conform to the RFC
42c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // draft standard for VCDIFF.
43c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  VCD_STANDARD_FORMAT = 0x00,
44c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // If this flag is specified, then the encoder writes each delta file
45c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // window by interleaving instructions and sizes with their corresponding
46c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // addresses and data, rather than placing these elements
47c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // into three separate sections.  This facilitates providing partially
48c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // decoded results when only a portion of a delta file window is received
49c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // (e.g. when HTTP over TCP is used as the transmission protocol.)
50c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  VCD_FORMAT_INTERLEAVED = 0x01,
51c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // If this flag is specified, then an Adler32 checksum
52c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // of the target window data is included in the delta window.
53c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  VCD_FORMAT_CHECKSUM = 0x02
54c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott};
55c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
56c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttypedef int VCDiffFormatExtensionFlags;
57c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
58c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// A HashedDictionary must be constructed from the dictionary data
59c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// in order to use VCDiffStreamingEncoder.  If the same dictionary will
60c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// be used to perform several encoding operations, then the caller should
61c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// create the HashedDictionary once and cache it for reuse.  This object
62c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// is thread-safe: the same const HashedDictionary can be used
63c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// by several threads simultaneously, each with its own VCDiffStreamingEncoder.
64c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
65c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// dictionary_contents is copied into the HashedDictionary, so the
66c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// caller may free that string, if desired, after the constructor returns.
67c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
68c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottclass HashedDictionary {
69c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott public:
70c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  HashedDictionary(const char* dictionary_contents,
71c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                   size_t dictionary_size);
72c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  ~HashedDictionary();
73c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
74c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Init() must be called before using the HashedDictionary as an argument
75c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // to the VCDiffStreamingEncoder, or for any other purpose except
76c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // destruction.  It returns true if initialization succeeded, or false
77c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // if an error occurred, in which case the caller should destroy the object
78c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // without using it.
79c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  bool Init();
80c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
81c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  const VCDiffEngine* engine() const { return engine_; }
82c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
83c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott private:
84c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  const VCDiffEngine* engine_;
85c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
86c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Make the copy constructor and assignment operator private
87c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // so that they don't inadvertently get used.
88c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  HashedDictionary(const HashedDictionary&);  // NOLINT
89c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  void operator=(const HashedDictionary&);
90c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott};
91c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
92c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// The standard streaming interface to the VCDIFF (RFC 3284) encoder.
93c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// "Streaming" in this context means that, even though the entire set of
94c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// input data to be encoded may not be available at once, the encoder
95c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// can produce partial output based on what is available.  Of course,
96c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// the caller should try to maximize the sizes of the data chunks passed
97c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// to the encoder.
98c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottclass VCDiffStreamingEncoder {
99c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott public:
100c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // The HashedDictionary object passed to the constructor must remain valid,
101c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // without being deleted, for the lifetime of the VCDiffStreamingEncoder
102c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // object.
103c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //
104c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // format_extensions allows certain open-vcdiff extensions to the VCDIFF
105c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // format to be included in the encoded output.  These extensions are not
106c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // part of the RFC 3284 draft standard, so specifying any extension flags
107c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // will make the output compatible only with open-vcdiff, or with other
108c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // VCDIFF implementations that accept these extensions.  See above for an
109c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // explanation of each possible flag value.
110c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //
111c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // *** look_for_target_matches:
112c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // The VCDIFF format allows COPY instruction addresses to reference data from
113c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // the source (dictionary), or from previously encoded target data.
114c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //
115c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // If look_for_target_matches is false, then the encoder will only
116c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // produce COPY instructions that reference source data from the dictionary,
117c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // never from previously encoded target data.  This will speed up the encoding
118c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // process, but the encoded data will not be as compact.
119c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //
120c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // If this value is true, then the encoder will produce COPY instructions
121c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // that reference either source data or target data.  A COPY instruction from
122c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // the previously encoded target data may even extend into the range of the
123c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // data being produced by that same COPY instruction; for example, if the
124c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // previously encoded target data is "LA", then a single COPY instruction of
125c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // length 10 can produce the additional target data "LALALALALA".
126c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //
127c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // There is a third type of COPY instruction that starts within
128c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // the source data and extends from the end of the source data
129c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // into the beginning of the target data.  This VCDIFF encoder will never
130c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // produce a COPY instruction of this third type (regardless of the value of
131c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // look_for_target_matches) because the cost of checking for matches
132c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // across the source-target boundary would not justify its benefits.
133c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //
134c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  VCDiffStreamingEncoder(const HashedDictionary* dictionary,
135c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                         VCDiffFormatExtensionFlags format_extensions,
136c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                         bool look_for_target_matches);
137c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  ~VCDiffStreamingEncoder();
138c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
139c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // The client should use these routines as follows:
140c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //    HashedDictionary hd(dictionary, dictionary_size);
141c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //    if (!hd.Init()) {
142c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //      HandleError();
143c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //      return;
144c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //    }
145c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //    string output_string;
146c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //    VCDiffStreamingEncoder v(hd, false, false);
147c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //    if (!v.StartEncoding(&output_string)) {
148c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //      HandleError();
149c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //      return;  // No need to call FinishEncoding()
150c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //    }
151c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //    Process(output_string.data(), output_string.size());
152c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //    output_string.clear();
153c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //    while (get data_buf) {
154c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //      if (!v.EncodeChunk(data_buf, data_len, &output_string)) {
155c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //        HandleError();
156c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //        return;  // No need to call FinishEncoding()
157c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //      }
158c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //      // The encoding is appended to output_string at each call,
159c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //      // so clear output_string once its contents have been processed.
160c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //      Process(output_string.data(), output_string.size());
161c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //      output_string.clear();
162c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //    }
163c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //    if (!v.FinishEncoding(&output_string)) {
164c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //      HandleError();
165c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //      return;
166c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //    }
167c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //    Process(output_string.data(), output_string.size());
168c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //    output_string.clear();
169c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //
170c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // I.e., the allowed pattern of calls is
171c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //    StartEncoding EncodeChunk* FinishEncoding
172c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //
173c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // The size of the encoded output depends on the sizes of the chunks
174c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // passed in (i.e. the chunking boundary affects compression).
175c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // However the decoded output is independent of chunk boundaries.
176c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
177c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Sets up the data structures for encoding.
178c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Writes a VCDIFF delta file header (as defined in RFC section 4.1)
179c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // to *output_string.
180c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //
181c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Note: we *append*, so the old contents of *output_string stick around.
182c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // This convention differs from the non-streaming Encode/Decode
183c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // interfaces in VCDiffEncoder.
184c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //
185c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // If an error occurs, this function returns false; otherwise it returns true.
186c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // If this function returns false, the caller does not need to call
187c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // FinishEncoding or to do any cleanup except destroying the
188c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // VCDiffStreamingEncoder object.
189c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  template<class OutputType>
190c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  bool StartEncoding(OutputType* output) {
191c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    OutputString<OutputType> output_string(output);
192c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return StartEncodingToInterface(&output_string);
193c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
194c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
195c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  bool StartEncodingToInterface(OutputStringInterface* output_string);
196c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
197c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Appends compressed encoding for "data" (one complete VCDIFF delta window)
198c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // to *output_string.
199c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // If an error occurs (for example, if StartEncoding was not called
200c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // earlier or StartEncoding returned false), this function returns false;
201c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // otherwise it returns true.  The caller does not need to call FinishEncoding
202c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // or do any cleanup except destroying the VCDiffStreamingEncoder
203c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // if this function returns false.
204c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  template<class OutputType>
205c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  bool EncodeChunk(const char* data, size_t len, OutputType* output) {
206c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    OutputString<OutputType> output_string(output);
207c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return EncodeChunkToInterface(data, len, &output_string);
208c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
209c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
210c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  bool EncodeChunkToInterface(const char* data, size_t len,
211c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                              OutputStringInterface* output_string);
212c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
213c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Finishes encoding and appends any leftover encoded data to *output_string.
214c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // If an error occurs (for example, if StartEncoding was not called
215c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // earlier or StartEncoding returned false), this function returns false;
216c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // otherwise it returns true.  The caller does not need to
217c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // do any cleanup except destroying the VCDiffStreamingEncoder
218c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // if this function returns false.
219c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  template<class OutputType>
220c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  bool FinishEncoding(OutputType* output) {
221c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    OutputString<OutputType> output_string(output);
222c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return FinishEncodingToInterface(&output_string);
223c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
224c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
225c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  bool FinishEncodingToInterface(OutputStringInterface* output_string);
226c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
227c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Replaces the contents of match_counts with a vector of integers,
228c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // one for each possible match length.  The value of match_counts[n]
229c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // is equal to the number of matches of length n found so far
230c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // for this VCDiffStreamingEncoder object.
231c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  void GetMatchCounts(std::vector<int>* match_counts) const;
232c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
233c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott private:
234c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  VCDiffStreamingEncoderImpl* const impl_;
235c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
236c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Make the copy constructor and assignment operator private
237c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // so that they don't inadvertently get used.
238c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  VCDiffStreamingEncoder(const VCDiffStreamingEncoder&);  // NOLINT
239c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  void operator=(const VCDiffStreamingEncoder&);
240c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott};
241c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
242c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// A simpler (non-streaming) interface to the VCDIFF encoder that can be used
243c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// if the entire target data string is available.
244c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
245c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottclass VCDiffEncoder {
246c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott public:
247c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  VCDiffEncoder(const char* dictionary_contents, size_t dictionary_size)
248c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      : dictionary_(dictionary_contents, dictionary_size),
249c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        encoder_(NULL),
250c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        flags_(VCD_STANDARD_FORMAT),
251c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        look_for_target_matches_(true) { }
252c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
253c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  ~VCDiffEncoder() {
254c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    delete encoder_;
255c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
256c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
257c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // By default, VCDiffEncoder uses standard VCDIFF format.  This function
258c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // can be used before calling Encode(), to specify that interleaved format
259c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // and/or checksum format should be used.
260c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  void SetFormatFlags(VCDiffFormatExtensionFlags flags) { flags_ = flags; }
261c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
262c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // By default, VCDiffEncoder looks for matches in the dictionary and also in
263c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // the previously encoded target data.  This function can be used before
264c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // calling Encode(), to specify whether or not target matching should be
265c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // enabled.
266c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  void SetTargetMatching(bool look_for_target_matches) {
267c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    look_for_target_matches_ = look_for_target_matches;
268c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
269c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
270c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Replaces old contents of output_string with the encoded form of
271c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // target_data.
272c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  template<class OutputType>
273c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  bool Encode(const char* target_data,
274c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott              size_t target_len,
275c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott              OutputType* output) {
276c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    OutputString<OutputType> output_string(output);
277c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return EncodeToInterface(target_data, target_len, &output_string);
278c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
279c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
280c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott private:
281c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  bool EncodeToInterface(const char* target_data,
282c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                         size_t target_len,
283c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                         OutputStringInterface* output_string);
284c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
285c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  HashedDictionary dictionary_;
286c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  VCDiffStreamingEncoder* encoder_;
287c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  VCDiffFormatExtensionFlags flags_;
288c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  bool look_for_target_matches_;
289c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
290c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Make the copy constructor and assignment operator private
291c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // so that they don't inadvertently get used.
292c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  VCDiffEncoder(const VCDiffEncoder&);  // NOLINT
293c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  void operator=(const VCDiffEncoder&);
294c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott};
295c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
296c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}  // namespace open_vcdiff
297c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
298c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#endif  // OPEN_VCDIFF_VCENCODER_H_
299