1// Copyright 2008 Google Inc.
2// Author: Lincoln Smith
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8//      http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16#ifndef OPEN_VCDIFF_ENCODETABLE_H_
17#define OPEN_VCDIFF_ENCODETABLE_H_
18
19#include <config.h>
20#include <stddef.h>  // size_t
21#include <stdint.h>  // int32_t
22#include <string>
23#include "addrcache.h"
24#include "checksum.h"
25#include "codetable.h"
26#include "codetablewriter_interface.h"
27
28namespace open_vcdiff {
29
30class OutputStringInterface;
31class VCDiffInstructionMap;
32
33// The method calls after construction *must* conform
34// to the following pattern:
35//    {{Add|Copy|Run}* [AddChecksum] Output}*
36//
37// When Output has been called in this sequence, a complete target window
38// (as defined in RFC 3284 section 4.3) will have been appended to
39// out (unless no calls to Add, Run, or Copy were made, in which
40// case Output will do nothing.)  The output will not be available for use
41// until after each call to Output().
42//
43// NOT threadsafe.
44//
45class VCDiffCodeTableWriter : public CodeTableWriterInterface {
46 public:
47  // This constructor uses the default code table.
48  // If interleaved is true, the encoder writes each delta file window
49  // by interleaving instructions and sizes with their corresponding
50  // addresses and data, rather than placing these elements into three
51  // separate sections.  This facilitates providing partially
52  // decoded results when only a portion of a delta file window
53  // is received (e.g. when HTTP over TCP is used as the
54  // transmission protocol.)  The interleaved format is
55  // not consistent with the VCDIFF draft standard.
56  //
57  explicit VCDiffCodeTableWriter(bool interleaved);
58
59  // Uses a non-standard code table and non-standard cache sizes.  The caller
60  // must guarantee that code_table_data remains allocated for the lifetime of
61  // the VCDiffCodeTableWriter object.  Note that this is different from how
62  // VCDiffCodeTableReader::UseCodeTable works.  It is assumed that a given
63  // encoder will use either the default code table or a statically-defined
64  // non-standard code table, whereas the decoder must have the ability to read
65  // an arbitrary non-standard code table from a delta file and discard it once
66  // the file has been decoded.
67  //
68  VCDiffCodeTableWriter(bool interleaved,
69                        int near_cache_size,
70                        int same_cache_size,
71                        const VCDiffCodeTableData& code_table_data,
72                        unsigned char max_mode);
73
74  virtual ~VCDiffCodeTableWriter();
75
76  // Initializes the constructed object for use.
77  // This method must be called after a VCDiffCodeTableWriter is constructed
78  // and before any of its other methods can be called.  It will return
79  // false if there was an error initializing the object, or true if it
80  // was successful.  After the object has been initialized and used,
81  // Init() can be called again to restore the initial state of the object.
82  //
83  virtual bool Init(size_t dictionary_size);
84
85  // Write the header (as defined in section 4.1 of the RFC) to *out.
86  // This includes information that can be gathered
87  // before the first chunk of input is available.
88  virtual void WriteHeader(OutputStringInterface* out,
89                           VCDiffFormatExtensionFlags format_extensions);
90
91  virtual size_t target_length() const { return target_length_; }
92
93  // Encode an ADD opcode with the "size" bytes starting at data
94  virtual void Add(const char* data, size_t size);
95
96  // Encode a COPY opcode with args "offset" (into dictionary) and "size" bytes.
97  virtual void Copy(int32_t offset, size_t size);
98
99  // Encode a RUN opcode for "size" copies of the value "byte".
100  virtual void Run(size_t size, unsigned char byte);
101
102  virtual void AddChecksum(VCDChecksum checksum) {
103    add_checksum_ = true;
104    checksum_ = checksum;
105  }
106
107  // Appends the encoded delta window to the output
108  // string.  The output string is not null-terminated and may contain embedded
109  // '\0' characters.
110  virtual void Output(OutputStringInterface* out);
111
112  // There should not be any need to output more data
113  // since EncodeChunk() encodes a complete target window
114  // and there is no end-of-delta-file marker.
115  virtual void FinishEncoding(OutputStringInterface* /*out*/) {}
116
117 private:
118  typedef std::string string;
119
120  // The maximum value for the mode of a COPY instruction.
121  const unsigned char max_mode_;
122
123  // If interleaved is true, sets data_for_add_and_run_ and
124  // addresses_for_copy_ to point at instructions_and_sizes_,
125  // so that instructions, sizes, addresses and data will be
126  // combined into a single interleaved stream.
127  // If interleaved is false, sets data_for_add_and_run_ and
128  // addresses_for_copy_ to point at their corresponding
129  // separate_... strings, so that the three sections will
130  // be generated separately from one another.
131  //
132  void InitSectionPointers(bool interleaved);
133
134  // Determines the best opcode to encode an instruction, and appends
135  // or substitutes that opcode and its size into the
136  // instructions_and_sizes_ string.
137  //
138  void EncodeInstruction(VCDiffInstructionType inst,
139                         size_t size,
140                         unsigned char mode);
141
142  void EncodeInstruction(VCDiffInstructionType inst, size_t size) {
143    return EncodeInstruction(inst, size, 0);
144  }
145
146  // Calculates the number of bytes needed to store the given size value as a
147  // variable-length integer (VarintBE).
148  static size_t CalculateLengthOfSizeAsVarint(size_t size);
149
150  // Appends the size value to the string as a variable-length integer.
151  static void AppendSizeToString(size_t size, string* out);
152
153  // Appends the size value to the output string as a variable-length integer.
154  static void AppendSizeToOutputString(size_t size, OutputStringInterface* out);
155
156  // Calculates the "Length of the delta encoding" field for the delta window
157  // header, based on the sizes of the sections and of the other header
158  // elements.
159  size_t CalculateLengthOfTheDeltaEncoding() const;
160
161  // None of the following 'string' objects are null-terminated.
162
163  // A series of instruction opcodes, each of which may be followed
164  // by one or two Varint values representing the size parameters
165  // of the first and second instruction in the opcode.
166  string instructions_and_sizes_;
167
168  // A series of data arguments (byte values) used for ADD and RUN
169  // instructions.  Depending on whether interleaved output is used
170  // for streaming or not, the pointer may point to
171  // separate_data_for_add_and_run_ or to instructions_and_sizes_.
172  string *data_for_add_and_run_;
173  string separate_data_for_add_and_run_;
174
175  // A series of Varint addresses used for COPY instructions.
176  // For the SAME mode, a byte value is stored instead of a Varint.
177  // Depending on whether interleaved output is used
178  // for streaming or not, the pointer may point to
179  // separate_addresses_for_copy_ or to instructions_and_sizes_.
180  string *addresses_for_copy_;
181  string separate_addresses_for_copy_;
182
183  VCDiffAddressCache address_cache_;
184
185  size_t dictionary_size_;
186
187  // The number of bytes of target data that has been encoded so far.
188  // Each time Add(), Copy(), or Run() is called, this will be incremented.
189  // The target length is used to compute HERE mode addresses
190  // for COPY instructions, and is also written into the header
191  // of the delta window when Output() is called.
192  //
193  size_t target_length_;
194
195  const VCDiffCodeTableData* code_table_data_;
196
197  // The instruction map facilitates finding an opcode quickly given an
198  // instruction inst, size, and mode.  This is an alternate representation
199  // of the same information that is found in code_table_data_.
200  //
201  const VCDiffInstructionMap* instruction_map_;
202
203  // The zero-based index within instructions_and_sizes_ of the byte
204  // that contains the last single-instruction opcode generated by
205  // EncodeInstruction().  (See that function for exhaustive details.)
206  // It is necessary to use an index rather than a pointer for this value
207  // because instructions_and_sizes_ may be resized, which would invalidate
208  // any pointers into its data buffer.  The value -1 is reserved to mean that
209  // either no opcodes have been generated yet, or else the last opcode
210  // generated was a double-instruction opcode.
211  //
212  int last_opcode_index_;
213
214  // If true, an Adler32 checksum of the target window data will be written as
215  // a variable-length integer, just after the size of the addresses section.
216  //
217  bool add_checksum_;
218
219  // The checksum to be written to the current target window,
220  // if add_checksum_ is true.
221  // This will not be calculated based on the individual calls to Add(), Run(),
222  // and Copy(), which would be unnecessarily expensive.  Instead, the code
223  // that uses the VCDiffCodeTableWriter object is expected to calculate
224  // the checksum all at once and to call AddChecksum() with that value.
225  // Must be called sometime before calling Output(), though it can be called
226  // either before or after the calls to Add(), Run(), and Copy().
227  //
228  VCDChecksum checksum_;
229
230  // Making these private avoids implicit copy constructor & assignment operator
231  VCDiffCodeTableWriter(const VCDiffCodeTableWriter&);  // NOLINT
232  void operator=(const VCDiffCodeTableWriter&);
233};
234
235};  // namespace open_vcdiff
236
237#endif  // OPEN_VCDIFF_ENCODETABLE_H_
238