1// Copyright 2007 Google Inc.
2// Author: Lincoln Smith
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8//      http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15//
16// Classes to implement an Encoder for the format described in
17// RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format.
18// The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html
19//
20// The RFC describes the possibility of using a secondary compressor
21// to further reduce the size of each section of the VCDIFF output.
22// That feature is not supported in this implementation of the encoder
23// and decoder.
24// No secondary compressor types have been publicly registered with
25// the IANA at http://www.iana.org/assignments/vcdiff-comp-ids
26// in the more than five years since the registry was created, so there
27// is no standard set of compressor IDs which would be generated by other
28// encoders or accepted by other decoders.
29
30#include <config.h>
31#include "google/vcencoder.h"
32#include <vector>
33#include "checksum.h"
34#include "encodetable.h"
35#include "logging.h"
36#include "google/output_string.h"
37#include "vcdiffengine.h"
38
39namespace open_vcdiff {
40
41HashedDictionary::HashedDictionary(const char* dictionary_contents,
42                                   size_t dictionary_size)
43    : engine_(new VCDiffEngine(dictionary_contents, dictionary_size)) { }
44
45HashedDictionary::~HashedDictionary() { delete engine_; }
46
47bool HashedDictionary::Init() {
48  return const_cast<VCDiffEngine*>(engine_)->Init();
49}
50
51class VCDiffStreamingEncoderImpl {
52 public:
53  VCDiffStreamingEncoderImpl(const HashedDictionary* dictionary,
54                             VCDiffFormatExtensionFlags format_extensions,
55                             bool look_for_target_matches);
56
57  // These functions are identical to their counterparts
58  // in VCDiffStreamingEncoder.
59  bool StartEncoding(OutputStringInterface* out);
60
61  bool EncodeChunk(const char* data, size_t len, OutputStringInterface* out);
62
63  bool FinishEncoding(OutputStringInterface* out);
64
65  const std::vector<int>& match_counts() const {
66    return coder_.match_counts();
67  }
68
69 private:
70  // Write the header (as defined in section 4.1 of the RFC) to *output.
71  // This includes information that can be gathered
72  // before the first chunk of input is available.
73  void WriteHeader(OutputStringInterface* output) const;
74
75  const VCDiffEngine* engine_;
76
77  // This implementation of the encoder uses the default
78  // code table.  A VCDiffCodeTableWriter could also be constructed
79  // using a custom code table.
80  VCDiffCodeTableWriter coder_;
81
82  const VCDiffFormatExtensionFlags format_extensions_;
83
84  // Determines whether to look for matches within the previously encoded
85  // target data, or just within the source (dictionary) data.  Please see
86  // vcencoder.h for a full explanation of this parameter.
87  const bool look_for_target_matches_;
88
89  // This state variable is used to ensure that StartEncoding(), EncodeChunk(),
90  // and FinishEncoding() are called in the correct order.  It will be true
91  // if StartEncoding() has been called, followed by zero or more calls to
92  // EncodeChunk(), but FinishEncoding() has not yet been called.  It will
93  // be false initially, and also after FinishEncoding() has been called.
94  bool encode_chunk_allowed_;
95
96  // Making these private avoids implicit copy constructor & assignment operator
97  VCDiffStreamingEncoderImpl(const VCDiffStreamingEncoderImpl&);  // NOLINT
98  void operator=(const VCDiffStreamingEncoderImpl&);
99};
100
101inline VCDiffStreamingEncoderImpl::VCDiffStreamingEncoderImpl(
102    const HashedDictionary* dictionary,
103    VCDiffFormatExtensionFlags format_extensions,
104    bool look_for_target_matches)
105    : engine_(dictionary->engine()),
106      coder_((format_extensions & VCD_FORMAT_INTERLEAVED) != 0),
107      format_extensions_(format_extensions),
108      look_for_target_matches_(look_for_target_matches),
109      encode_chunk_allowed_(false) { }
110
111inline void VCDiffStreamingEncoderImpl::WriteHeader(
112    OutputStringInterface* output) const {
113  DeltaFileHeader header_data = {
114    0xD6,  // Header1: "V" | 0x80
115    0xC3,  // Header2: "C" | 0x80
116    0xC4,  // Header3: "D" | 0x80
117    0x00,  // Header4: Draft standard format
118    0x00 };  // Hdr_Indicator:
119             // No compression, no custom code table
120  if (format_extensions_ != VCD_STANDARD_FORMAT) {
121    header_data.header4 = 'S';  // Header4: VCDIFF/SDCH, extensions used
122  }
123  output->append(reinterpret_cast<const char*>(&header_data),
124                 sizeof(header_data));
125  // If custom cache table sizes or a custom code table were used
126  // for encoding, here is where they would be appended to *output.
127  // This implementation of the encoder does not use those features,
128  // although the decoder can understand and interpret them.
129}
130
131inline bool VCDiffStreamingEncoderImpl::StartEncoding(
132    OutputStringInterface* out) {
133  if (!coder_.Init(engine_->dictionary_size())) {
134    LOG(DFATAL) << "Internal error: "
135                   "Initialization of code table writer failed" << LOG_ENDL;
136    return false;
137  }
138  WriteHeader(out);
139  encode_chunk_allowed_ = true;
140  return true;
141}
142
143inline bool VCDiffStreamingEncoderImpl::EncodeChunk(
144    const char* data,
145    size_t len,
146    OutputStringInterface* out) {
147  if (!encode_chunk_allowed_) {
148    LOG(ERROR) << "EncodeChunk called before StartEncoding" << LOG_ENDL;
149    return false;
150  }
151  if ((format_extensions_ & VCD_FORMAT_CHECKSUM) != 0) {
152    coder_.AddChecksum(ComputeAdler32(data, len));
153  }
154  engine_->Encode(data, len, look_for_target_matches_, out, &coder_);
155  return true;
156}
157
158inline bool VCDiffStreamingEncoderImpl::FinishEncoding(
159    OutputStringInterface* /*out*/) {
160  if (!encode_chunk_allowed_) {
161    LOG(ERROR) << "FinishEncoding called before StartEncoding" << LOG_ENDL;
162    return false;
163  }
164  encode_chunk_allowed_ = false;
165  // There should not be any need to output more data
166  // since EncodeChunk() encodes a complete target window
167  // and there is no end-of-delta-file marker.
168  return true;
169}
170
171VCDiffStreamingEncoder::VCDiffStreamingEncoder(
172    const HashedDictionary* dictionary,
173    VCDiffFormatExtensionFlags format_extensions,
174    bool look_for_target_matches)
175    : impl_(new VCDiffStreamingEncoderImpl(dictionary,
176                                           format_extensions,
177                                           look_for_target_matches)) { }
178
179VCDiffStreamingEncoder::~VCDiffStreamingEncoder() { delete impl_; }
180
181bool VCDiffStreamingEncoder::StartEncodingToInterface(
182    OutputStringInterface* out) {
183  return impl_->StartEncoding(out);
184}
185
186bool VCDiffStreamingEncoder::EncodeChunkToInterface(
187    const char* data,
188    size_t len,
189    OutputStringInterface* out) {
190  return impl_->EncodeChunk(data, len, out);
191}
192
193bool VCDiffStreamingEncoder::FinishEncodingToInterface(
194    OutputStringInterface* out) {
195  return impl_->FinishEncoding(out);
196}
197
198void VCDiffStreamingEncoder::GetMatchCounts(
199    std::vector<int>* match_counts) const {
200  if (!match_counts) {
201    LOG(DFATAL) << "GetMatchCounts() called with NULL argument" << LOG_ENDL;
202    return;
203  }
204  *match_counts = impl_->match_counts();
205}
206
207bool VCDiffEncoder::EncodeToInterface(const char* target_data,
208                                      size_t target_len,
209                                      OutputStringInterface* out) {
210  out->clear();
211  if (!encoder_) {
212    if (!dictionary_.Init()) {
213      LOG(ERROR) << "Error initializing HashedDictionary" << LOG_ENDL;
214      return false;
215    }
216    encoder_ = new VCDiffStreamingEncoder(&dictionary_,
217                                          flags_,
218                                          look_for_target_matches_);
219  }
220  if (!encoder_->StartEncodingToInterface(out)) {
221    return false;
222  }
223  if (!encoder_->EncodeChunkToInterface(target_data, target_len, out)) {
224    return false;
225  }
226  return encoder_->FinishEncodingToInterface(out);
227}
228
229}  // namespace open_vcdiff
230