1// Copyright 2007 Google Inc.
2// Author: Lincoln Smith
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8//      http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15//
16// Classes to implement an Encoder for the format described in
17// RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format.
18// The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html
19//
20// The RFC describes the possibility of using a secondary compressor
21// to further reduce the size of each section of the VCDIFF output.
22// That feature is not supported in this implementation of the encoder
23// and decoder.
24// No secondary compressor types have been publicly registered with
25// the IANA at http://www.iana.org/assignments/vcdiff-comp-ids
26// in the more than five years since the registry was created, so there
27// is no standard set of compressor IDs which would be generated by other
28// encoders or accepted by other decoders.
29
30#include <config.h>
31#include <memory>  // auto_ptr
32#include "checksum.h"
33#include "encodetable.h"
34#include "google/output_string.h"
35#include "google/vcencoder.h"
36#include "jsonwriter.h"
37#include "logging.h"
38#include "vcdiffengine.h"
39
40namespace open_vcdiff {
41
42HashedDictionary::HashedDictionary(const char* dictionary_contents,
43                                   size_t dictionary_size)
44    : engine_(new VCDiffEngine(dictionary_contents, dictionary_size)) { }
45
46HashedDictionary::~HashedDictionary() { delete engine_; }
47
48bool HashedDictionary::Init() {
49  return const_cast<VCDiffEngine*>(engine_)->Init();
50}
51
52class VCDiffStreamingEncoderImpl {
53 public:
54  VCDiffStreamingEncoderImpl(const HashedDictionary* dictionary,
55                             VCDiffFormatExtensionFlags format_extensions,
56                             bool look_for_target_matches);
57
58  // These functions are identical to their counterparts
59  // in VCDiffStreamingEncoder.
60  bool StartEncoding(OutputStringInterface* out);
61
62  bool EncodeChunk(const char* data, size_t len, OutputStringInterface* out);
63
64  bool FinishEncoding(OutputStringInterface* out);
65
66 private:
67  const VCDiffEngine* engine_;
68
69  std::auto_ptr<CodeTableWriterInterface> coder_;
70
71  const VCDiffFormatExtensionFlags format_extensions_;
72
73  // Determines whether to look for matches within the previously encoded
74  // target data, or just within the source (dictionary) data.  Please see
75  // vcencoder.h for a full explanation of this parameter.
76  const bool look_for_target_matches_;
77
78  // This state variable is used to ensure that StartEncoding(), EncodeChunk(),
79  // and FinishEncoding() are called in the correct order.  It will be true
80  // if StartEncoding() has been called, followed by zero or more calls to
81  // EncodeChunk(), but FinishEncoding() has not yet been called.  It will
82  // be false initially, and also after FinishEncoding() has been called.
83  bool encode_chunk_allowed_;
84
85  // Making these private avoids implicit copy constructor & assignment operator
86  VCDiffStreamingEncoderImpl(const VCDiffStreamingEncoderImpl&);  // NOLINT
87  void operator=(const VCDiffStreamingEncoderImpl&);
88};
89
90inline VCDiffStreamingEncoderImpl::VCDiffStreamingEncoderImpl(
91    const HashedDictionary* dictionary,
92    VCDiffFormatExtensionFlags format_extensions,
93    bool look_for_target_matches)
94    : engine_(dictionary->engine()),
95      format_extensions_(format_extensions),
96      look_for_target_matches_(look_for_target_matches),
97      encode_chunk_allowed_(false) {
98  if (format_extensions & VCD_FORMAT_JSON) {
99    coder_.reset(new JSONCodeTableWriter());
100  } else {
101    // This implementation of the encoder uses the default
102    // code table.  A VCDiffCodeTableWriter could also be constructed
103    // using a custom code table.
104    coder_.reset(new VCDiffCodeTableWriter(
105        (format_extensions & VCD_FORMAT_INTERLEAVED) != 0));
106  }
107}
108
109inline bool VCDiffStreamingEncoderImpl::StartEncoding(
110    OutputStringInterface* out) {
111  if (!coder_->Init(engine_->dictionary_size())) {
112    VCD_DFATAL << "Internal error: "
113                  "Initialization of code table writer failed" << VCD_ENDL;
114    return false;
115  }
116  coder_->WriteHeader(out, format_extensions_);
117  encode_chunk_allowed_ = true;
118  return true;
119}
120
121inline bool VCDiffStreamingEncoderImpl::EncodeChunk(
122    const char* data,
123    size_t len,
124    OutputStringInterface* out) {
125  if (!encode_chunk_allowed_) {
126    VCD_ERROR << "EncodeChunk called before StartEncoding" << VCD_ENDL;
127    return false;
128  }
129  if ((format_extensions_ & VCD_FORMAT_CHECKSUM) != 0) {
130    coder_->AddChecksum(ComputeAdler32(data, len));
131  }
132  engine_->Encode(data, len, look_for_target_matches_, out, coder_.get());
133  return true;
134}
135
136inline bool VCDiffStreamingEncoderImpl::FinishEncoding(
137    OutputStringInterface* out) {
138  if (!encode_chunk_allowed_) {
139    VCD_ERROR << "FinishEncoding called before StartEncoding" << VCD_ENDL;
140    return false;
141  }
142  encode_chunk_allowed_ = false;
143  coder_->FinishEncoding(out);
144  return true;
145}
146
147VCDiffStreamingEncoder::VCDiffStreamingEncoder(
148    const HashedDictionary* dictionary,
149    VCDiffFormatExtensionFlags format_extensions,
150    bool look_for_target_matches)
151    : impl_(new VCDiffStreamingEncoderImpl(dictionary,
152                                           format_extensions,
153                                           look_for_target_matches)) { }
154
155VCDiffStreamingEncoder::~VCDiffStreamingEncoder() { delete impl_; }
156
157bool VCDiffStreamingEncoder::StartEncodingToInterface(
158    OutputStringInterface* out) {
159  return impl_->StartEncoding(out);
160}
161
162bool VCDiffStreamingEncoder::EncodeChunkToInterface(
163    const char* data,
164    size_t len,
165    OutputStringInterface* out) {
166  return impl_->EncodeChunk(data, len, out);
167}
168
169bool VCDiffStreamingEncoder::FinishEncodingToInterface(
170    OutputStringInterface* out) {
171  return impl_->FinishEncoding(out);
172}
173
174bool VCDiffEncoder::EncodeToInterface(const char* target_data,
175                                      size_t target_len,
176                                      OutputStringInterface* out) {
177  out->clear();
178  if (!encoder_) {
179    if (!dictionary_.Init()) {
180      VCD_ERROR << "Error initializing HashedDictionary" << VCD_ENDL;
181      return false;
182    }
183    encoder_ = new VCDiffStreamingEncoder(&dictionary_,
184                                          flags_,
185                                          look_for_target_matches_);
186  }
187  if (!encoder_->StartEncodingToInterface(out)) {
188    return false;
189  }
190  if (!encoder_->EncodeChunkToInterface(target_data, target_len, out)) {
191    return false;
192  }
193  return encoder_->FinishEncodingToInterface(out);
194}
195
196}  // namespace open_vcdiff
197