1// Copyright 2008 Google Inc.
2// Author: Lincoln Smith
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8//      http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15//
16// Implements a Decoder for the format described in
17// RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format.
18// The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html
19//
20// The RFC describes the possibility of using a secondary compressor
21// to further reduce the size of each section of the VCDIFF output.
22// That feature is not supported in this implementation of the encoder
23// and decoder.
24// No secondary compressor types have been publicly registered with
25// the IANA at http://www.iana.org/assignments/vcdiff-comp-ids
26// in the more than five years since the registry was created, so there
27// is no standard set of compressor IDs which would be generated by other
28// encoders or accepted by other decoders.
29
30#include <config.h>
31#include "google/vcdecoder.h"
32#include <stddef.h>  // size_t, ptrdiff_t
33#include <stdint.h>  // int32_t
34#include <string.h>  // memcpy, memset
35#include <memory>  // auto_ptr
36#include <string>
37#include "addrcache.h"
38#include "checksum.h"
39#include "codetable.h"
40#include "decodetable.h"
41#include "headerparser.h"
42#include "logging.h"
43#include "google/output_string.h"
44#include "varint_bigendian.h"
45#include "vcdiff_defs.h"
46
47namespace open_vcdiff {
48
49// This class is used to parse delta file windows as described
50// in RFC sections 4.2 and 4.3.  Its methods are not thread-safe.
51//
52// Here is the window format copied from the RFC:
53//
54// Window1
55//     Win_Indicator                            - byte
56//     [Source segment size]                    - integer
57//     [Source segment position]                - integer
58//     The delta encoding of the target window
59//         Length of the delta encoding         - integer
60//         The delta encoding
61//             Size of the target window        - integer
62//             Delta_Indicator                  - byte
63//             Length of data for ADDs and RUNs - integer
64//             Length of instructions and sizes - integer
65//             Length of addresses for COPYs    - integer
66//             Data section for ADDs and RUNs   - array of bytes
67//             Instructions and sizes section   - array of bytes
68//             Addresses section for COPYs      - array of bytes
69// Window2
70// ...
71//
72// Sample usage:
73//
74// VCDiffDeltaFileWindow delta_window_;
75// delta_window_.Init(parent);
76// ParseableChunk parseable_chunk(input_buffer,
77//                                input_size,
78//                                leftover_unencoded_bytes);
79// while (!parseable_chunk.Empty()) {
80//   switch (delta_window_.DecodeWindow(&parseable_chunk)) {
81//     case RESULT_END_OF_DATA:
82//       <Read more input and retry DecodeWindow later.>
83//     case RESULT_ERROR:
84//       <Handle error case.  An error log message has already been generated.>
85//   }
86// }
87//
88// DecodeWindow consumes only a single window, and needs to be placed within
89// a loop if multiple windows are to be processed.
90//
91class VCDiffDeltaFileWindow {
92 public:
93  VCDiffDeltaFileWindow();
94  ~VCDiffDeltaFileWindow();
95
96  // Init() should be called immediately after constructing the
97  // VCDiffDeltaFileWindow().  It must be called before DecodeWindow() can be
98  // invoked, or an error will occur.
99  void Init(VCDiffStreamingDecoderImpl* parent);
100
101  // Resets the pointers to the data sections in the current window.
102  void Reset();
103
104  bool UseCodeTable(const VCDiffCodeTableData& code_table_data,
105                    unsigned char max_mode) {
106    return reader_.UseCodeTable(code_table_data, max_mode);
107  }
108
109  // Decodes a single delta window using the input data from *parseable_chunk.
110  // Appends the decoded target window to parent_->decoded_target().  Returns
111  // RESULT_SUCCESS if an entire window was decoded, or RESULT_END_OF_DATA if
112  // the end of input was reached before the entire window could be decoded and
113  // more input is expected (only possible if IsInterleaved() is true), or
114  // RESULT_ERROR if an error occurred during decoding.  In the RESULT_ERROR
115  // case, the value of parseable_chunk->pointer_ is undefined; otherwise,
116  // parseable_chunk->Advance() is called to point to the input data position
117  // just after the data that has been decoded.
118  //
119  VCDiffResult DecodeWindow(ParseableChunk* parseable_chunk);
120
121  bool FoundWindowHeader() const {
122    return found_header_;
123  }
124
125  bool MoreDataExpected() const {
126    // When parsing an interleaved-format delta file,
127    // every time DecodeBody() exits, interleaved_bytes_expected_
128    // will be decremented by the number of bytes parsed.  If it
129    // reaches zero, then there is no more data expected because
130    // the size of the interleaved section (given in the window
131    // header) has been reached.
132    return IsInterleaved() && (interleaved_bytes_expected_ > 0);
133  }
134
135  size_t target_window_start_pos() const { return target_window_start_pos_; }
136
137  void set_target_window_start_pos(size_t new_start_pos) {
138    target_window_start_pos_ = new_start_pos;
139  }
140
141  // Returns the number of bytes remaining to be decoded in the target window.
142  // If not in the process of decoding a window, returns 0.
143  size_t TargetBytesRemaining();
144
145 private:
146  // Reads the header of the window section as described in RFC sections 4.2 and
147  // 4.3, up to and including the value "Length of addresses for COPYs".  If the
148  // entire header is found, this function sets up the DeltaWindowSections
149  // instructions_and_sizes_, data_for_add_and_run_, and addresses_for_copy_ so
150  // that the decoder can begin decoding the opcodes in these sections.  Returns
151  // RESULT_ERROR if an error occurred, or RESULT_END_OF_DATA if the end of
152  // available data was reached before the entire header could be read.  (The
153  // latter may be an error condition if there is no more data available.)
154  // Otherwise, returns RESULT_SUCCESS and advances parseable_chunk past the
155  // parsed header.
156  //
157  VCDiffResult ReadHeader(ParseableChunk* parseable_chunk);
158
159  // After the window header has been parsed as far as the Delta_Indicator,
160  // this function is called to parse the following delta window header fields:
161  //
162  //     Length of data for ADDs and RUNs - integer (VarintBE format)
163  //     Length of instructions and sizes - integer (VarintBE format)
164  //     Length of addresses for COPYs    - integer (VarintBE format)
165  //
166  // If has_checksum_ is true, it also looks for the following element:
167  //
168  //     Adler32 checksum            - unsigned 32-bit integer (VarintBE format)
169  //
170  // It sets up the DeltaWindowSections instructions_and_sizes_,
171  // data_for_add_and_run_, and addresses_for_copy_.  If the interleaved format
172  // is being used, all three sections will include the entire window body; if
173  // the standard format is used, three non-overlapping window sections will be
174  // defined.  Returns RESULT_ERROR if an error occurred, or RESULT_END_OF_DATA
175  // if standard format is being used and there is not enough input data to read
176  // the entire window body.  Otherwise, returns RESULT_SUCCESS.
177  VCDiffResult SetUpWindowSections(VCDiffHeaderParser* header_parser);
178
179  // Decodes the body of the window section as described in RFC sections 4.3,
180  // including the sections "Data section for ADDs and RUNs", "Instructions
181  // and sizes section", and "Addresses section for COPYs".  These sections
182  // must already have been set up by ReadWindowHeader().  Returns a
183  // non-negative value on success, or RESULT_END_OF_DATA if the end of input
184  // was reached before the entire window could be decoded (only possible if
185  // IsInterleaved() is true), or RESULT_ERROR if an error occurred during
186  // decoding.  Appends as much of the decoded target window as possible to
187  // parent->decoded_target().
188  //
189  int DecodeBody(ParseableChunk* parseable_chunk);
190
191  // Returns the number of bytes already decoded into the target window.
192  size_t TargetBytesDecoded();
193
194  // Decodes a single ADD instruction, updating parent_->decoded_target_.
195  VCDiffResult DecodeAdd(size_t size);
196
197  // Decodes a single RUN instruction, updating parent_->decoded_target_.
198  VCDiffResult DecodeRun(size_t size);
199
200  // Decodes a single COPY instruction, updating parent_->decoded_target_.
201  VCDiffResult DecodeCopy(size_t size, unsigned char mode);
202
203  // When using the interleaved format, this function is called both on parsing
204  // the header and on resuming after a RESULT_END_OF_DATA was returned from a
205  // previous call to DecodeBody().  It sets up all three section pointers to
206  // reference the same interleaved stream of instructions, sizes, addresses,
207  // and data.  These pointers must be reset every time that work resumes on a
208  // delta window,  because the input data string may have been changed or
209  // resized since DecodeBody() last returned.
210  void UpdateInterleavedSectionPointers(const char* data_pos,
211                                        const char* data_end) {
212    const ptrdiff_t available_data = data_end - data_pos;
213    // Don't read past the end of currently-available data
214    if (available_data > interleaved_bytes_expected_) {
215      instructions_and_sizes_.Init(data_pos, interleaved_bytes_expected_);
216    } else {
217      instructions_and_sizes_.Init(data_pos, available_data);
218    }
219    data_for_add_and_run_.Init(&instructions_and_sizes_);
220    addresses_for_copy_.Init(&instructions_and_sizes_);
221  }
222
223  // If true, the interleaved format described in AllowInterleaved() is used
224  // for the current delta file.  Only valid after ReadWindowHeader() has been
225  // called and returned a positive number (i.e., the whole header was parsed),
226  // but before the window has finished decoding.
227  //
228  bool IsInterleaved() const {
229    // If the sections are interleaved, both addresses_for_copy_ and
230    // data_for_add_and_run_ should point at instructions_and_sizes_.
231    return !addresses_for_copy_.IsOwned();
232  }
233
234  // Executes a single COPY or ADD instruction, appending data to
235  // parent_->decoded_target().
236  void CopyBytes(const char* data, size_t size);
237
238  // Executes a single RUN instruction, appending data to
239  // parent_->decoded_target().
240  void RunByte(unsigned char byte, size_t size);
241
242  // Advance *parseable_chunk to point to the current position in the
243  // instructions/sizes section.  If interleaved format is used, then
244  // decrement the number of expected bytes in the instructions/sizes section
245  // by the number of instruction/size bytes parsed.
246  void UpdateInstructionPointer(ParseableChunk* parseable_chunk);
247
248  // The parent object which was passed to Init().
249  VCDiffStreamingDecoderImpl* parent_;
250
251  // This value will be true if VCDiffDeltaFileWindow::ReadDeltaWindowHeader()
252  // has been called and succeeded in parsing the delta window header, but the
253  // entire window has not yet been decoded.
254  bool found_header_;
255
256  // Contents and length of the current source window.  source_segment_ptr_
257  // will be non-NULL if (a) the window section header for the current window
258  // has been read, but the window has not yet finished decoding; or
259  // (b) the window did not specify a source segment.
260  const char* source_segment_ptr_;
261  size_t source_segment_length_;
262
263  // The delta encoding window sections as defined in RFC section 4.3.
264  // The pointer for each section will be incremented as data is consumed and
265  // decoded from that section.  If the interleaved format is used,
266  // data_for_add_and_run_ and addresses_for_copy_ will both point to
267  // instructions_and_sizes_; otherwise, they will be separate data sections.
268  //
269  DeltaWindowSection instructions_and_sizes_;
270  DeltaWindowSection data_for_add_and_run_;
271  DeltaWindowSection addresses_for_copy_;
272
273  // The expected bytes left to decode in instructions_and_sizes_.  Only used
274  // for the interleaved format.
275  int interleaved_bytes_expected_;
276
277  // The expected length of the target window once it has been decoded.
278  size_t target_window_length_;
279
280  // The index in decoded_target at which the first byte of the current
281  // target window was/will be written.
282  size_t target_window_start_pos_;
283
284  // If has_checksum_ is true, then expected_checksum_ contains an Adler32
285  // checksum of the target window data.  This is an extension included in the
286  // VCDIFF 'S' (SDCH) format, but is not part of the RFC 3284 draft standard.
287  bool has_checksum_;
288  VCDChecksum expected_checksum_;
289
290  VCDiffCodeTableReader reader_;
291
292  // Making these private avoids implicit copy constructor & assignment operator
293  VCDiffDeltaFileWindow(const VCDiffDeltaFileWindow&);  // NOLINT
294  void operator=(const VCDiffDeltaFileWindow&);
295};
296
297// *** Inline methods for VCDiffDeltaFileWindow
298
299inline VCDiffDeltaFileWindow::VCDiffDeltaFileWindow() : parent_(NULL) {
300  Reset();
301}
302
303inline VCDiffDeltaFileWindow::~VCDiffDeltaFileWindow() { }
304
305inline void VCDiffDeltaFileWindow::Init(VCDiffStreamingDecoderImpl* parent) {
306  parent_ = parent;
307}
308
309class VCDiffStreamingDecoderImpl {
310 public:
311  typedef std::string string;
312
313  // The default maximum target file size (and target window size) if
314  // SetMaximumTargetFileSize() is not called.
315  static const size_t kDefaultMaximumTargetFileSize = 67108864U;  // 64 MB
316
317  // The largest value that can be passed to SetMaximumTargetWindowSize().
318  // Using a larger value will result in an error.
319  static const size_t kTargetSizeLimit = 2147483647U;  // INT32_MAX
320
321  // A constant that is the default value for planned_target_file_size_,
322  // indicating that the decoder does not have an expected length
323  // for the target data.
324  static const size_t kUnlimitedBytes = static_cast<size_t>(-3);
325
326  VCDiffStreamingDecoderImpl();
327  ~VCDiffStreamingDecoderImpl();
328
329  // Resets all member variables to their initial states.
330  void Reset();
331
332  // These functions are identical to their counterparts
333  // in VCDiffStreamingDecoder.
334  //
335  void StartDecoding(const char* dictionary_ptr, size_t dictionary_size);
336
337  bool DecodeChunk(const char* data,
338                   size_t len,
339                   OutputStringInterface* output_string);
340
341  bool FinishDecoding();
342
343  // If true, the version of VCDIFF used in the current delta file allows
344  // for the interleaved format, in which instructions, addresses and data
345  // are all sent interleaved in the instructions section of each window
346  // rather than being sent in separate sections.  This is not part of
347  // the VCDIFF draft standard, so we've defined a special version code
348  // 'S' which implies that this feature is available.  Even if interleaving
349  // is supported, it is not mandatory; interleaved format will be implied
350  // if the address and data sections are both zero-length.
351  //
352  bool AllowInterleaved() const { return vcdiff_version_code_ == 'S'; }
353
354  // If true, the version of VCDIFF used in the current delta file allows
355  // each delta window to contain an Adler32 checksum of the target window data.
356  // If the bit 0x08 (VCD_CHECKSUM) is set in the Win_Indicator flags, then
357  // this checksum will appear as a variable-length integer, just after the
358  // "length of addresses for COPYs" value and before the window data sections.
359  // It is possible for some windows in a delta file to use the checksum feature
360  // and for others not to use it (and leave the flag bit set to 0.)
361  // Just as with AllowInterleaved(), this extension is not part of the draft
362  // standard and is only available when the version code 'S' is specified.
363  //
364  bool AllowChecksum() const { return vcdiff_version_code_ == 'S'; }
365
366  bool SetMaximumTargetFileSize(size_t new_maximum_target_file_size) {
367    maximum_target_file_size_ = new_maximum_target_file_size;
368    return true;
369  }
370
371  bool SetMaximumTargetWindowSize(size_t new_maximum_target_window_size) {
372    if (new_maximum_target_window_size > kTargetSizeLimit) {
373      VCD_ERROR << "Specified maximum target window size "
374                << new_maximum_target_window_size << " exceeds limit of "
375                << kTargetSizeLimit << " bytes" << VCD_ENDL;
376      return false;
377    }
378    maximum_target_window_size_ = new_maximum_target_window_size;
379    return true;
380  }
381
382  // See description of planned_target_file_size_, below.
383  bool HasPlannedTargetFileSize() const {
384    return planned_target_file_size_ != kUnlimitedBytes;
385  }
386
387  void SetPlannedTargetFileSize(size_t planned_target_file_size) {
388    planned_target_file_size_ = planned_target_file_size;
389  }
390
391  void AddToTotalTargetWindowSize(size_t window_size) {
392    total_of_target_window_sizes_ += window_size;
393  }
394
395  // Checks to see whether the decoded target data has reached its planned size.
396  bool ReachedPlannedTargetFileSize() const {
397    if (!HasPlannedTargetFileSize()) {
398      return false;
399    }
400    // The planned target file size should not have been exceeded.
401    // TargetWindowWouldExceedSizeLimits() ensures that the advertised size of
402    // each target window would not make the target file exceed that limit, and
403    // DecodeBody() will return RESULT_ERROR if the actual decoded output ever
404    // exceeds the advertised target window size.
405    if (total_of_target_window_sizes_ > planned_target_file_size_) {
406      VCD_DFATAL << "Internal error: Decoded data size "
407                 << total_of_target_window_sizes_
408                 << " exceeds planned target file size "
409                 << planned_target_file_size_ << VCD_ENDL;
410      return true;
411    }
412    return total_of_target_window_sizes_ == planned_target_file_size_;
413  }
414
415  // Checks to see whether adding a new target window of the specified size
416  // would exceed the planned target file size, the maximum target file size,
417  // or the maximum target window size.  If so, logs an error and returns true;
418  // otherwise, returns false.
419  bool TargetWindowWouldExceedSizeLimits(size_t window_size) const;
420
421  // Returns the amount of input data passed to the last DecodeChunk()
422  // that was not consumed by the decoder.  This is essential if
423  // SetPlannedTargetFileSize() is being used, in order to preserve the
424  // remaining input data stream once the planned target file has been decoded.
425  size_t GetUnconsumedDataSize() const {
426    return unparsed_bytes_.size();
427  }
428
429  // This function will return true if the decoder has parsed a complete delta
430  // file header plus zero or more delta file windows, with no data left over.
431  // It will also return true if no delta data at all was decoded.  If these
432  // conditions are not met, then FinishDecoding() should not be called.
433  bool IsDecodingComplete() const {
434    if (!FoundFileHeader()) {
435      // No complete delta file header has been parsed yet.  DecodeChunk()
436      // may have received some data that it hasn't yet parsed, in which case
437      // decoding is incomplete.
438      return unparsed_bytes_.empty();
439    } else if (custom_code_table_decoder_.get()) {
440      // The decoder is in the middle of parsing a custom code table.
441      return false;
442    } else if (delta_window_.FoundWindowHeader()) {
443      // The decoder is in the middle of parsing an interleaved format delta
444      // window.
445      return false;
446    } else if (ReachedPlannedTargetFileSize()) {
447      // The decoder found exactly the planned number of bytes.  In this case
448      // it is OK for unparsed_bytes_ to be non-empty; it contains the leftover
449      // data after the end of the delta file.
450      return true;
451    } else {
452      // No complete delta file window has been parsed yet.  DecodeChunk()
453      // may have received some data that it hasn't yet parsed, in which case
454      // decoding is incomplete.
455      return unparsed_bytes_.empty();
456    }
457  }
458
459  const char* dictionary_ptr() const { return dictionary_ptr_; }
460
461  size_t dictionary_size() const { return dictionary_size_; }
462
463  VCDiffAddressCache* addr_cache() { return addr_cache_.get(); }
464
465  string* decoded_target() { return &decoded_target_; }
466
467  bool allow_vcd_target() const { return allow_vcd_target_; }
468
469  void SetAllowVcdTarget(bool allow_vcd_target) {
470    if (start_decoding_was_called_) {
471      VCD_DFATAL << "SetAllowVcdTarget() called after StartDecoding()"
472                 << VCD_ENDL;
473      return;
474    }
475    allow_vcd_target_ = allow_vcd_target;
476  }
477
478 private:
479  // Reads the VCDiff delta file header section as described in RFC section 4.1,
480  // except the custom code table data.  Returns RESULT_ERROR if an error
481  // occurred, or RESULT_END_OF_DATA if the end of available data was reached
482  // before the entire header could be read.  (The latter may be an error
483  // condition if there is no more data available.)  Otherwise, advances
484  // data->position_ past the header and returns RESULT_SUCCESS.
485  //
486  VCDiffResult ReadDeltaFileHeader(ParseableChunk* data);
487
488  // Indicates whether or not the header has already been read.
489  bool FoundFileHeader() const { return addr_cache_.get() != NULL; }
490
491  // If ReadDeltaFileHeader() finds the VCD_CODETABLE flag set within the delta
492  // file header, this function parses the custom cache sizes and initializes
493  // a nested VCDiffStreamingDecoderImpl object that will be used to parse the
494  // custom code table in ReadCustomCodeTable().  Returns RESULT_ERROR if an
495  // error occurred, or RESULT_END_OF_DATA if the end of available data was
496  // reached before the custom cache sizes could be read.  Otherwise, returns
497  // the number of bytes read.
498  //
499  int InitCustomCodeTable(const char* data_start, const char* data_end);
500
501  // If a custom code table was specified in the header section that was parsed
502  // by ReadDeltaFileHeader(), this function makes a recursive call to another
503  // VCDiffStreamingDecoderImpl object (custom_code_table_decoder_), since the
504  // custom code table is expected to be supplied as an embedded VCDIFF
505  // encoding that uses the standard code table.  Returns RESULT_ERROR if an
506  // error occurs, or RESULT_END_OF_DATA if the end of available data was
507  // reached before the entire custom code table could be read.  Otherwise,
508  // returns RESULT_SUCCESS and sets *data_ptr to the position after the encoded
509  // custom code table.  If the function returns RESULT_SUCCESS or
510  // RESULT_END_OF_DATA, it advances data->position_ past the parsed bytes.
511  //
512  VCDiffResult ReadCustomCodeTable(ParseableChunk* data);
513
514  // Called after the decoder exhausts all input data.  This function
515  // copies from decoded_target_ into output_string all the data that
516  // has not yet been output.  It sets decoded_target_output_position_
517  // to mark the start of the next data that needs to be output.
518  void AppendNewOutputText(OutputStringInterface* output_string);
519
520  // Appends to output_string the portion of decoded_target_ that has
521  // not yet been output, then clears decoded_target_.  This function is
522  // called after each complete target window has been decoded if
523  // allow_vcd_target is false.  In that case, there is no need to retain
524  // target data from any window except the current window.
525  void FlushDecodedTarget(OutputStringInterface* output_string);
526
527  // Contents and length of the source (dictionary) data.
528  const char* dictionary_ptr_;
529  size_t dictionary_size_;
530
531  // This string will be used to store any unparsed bytes left over when
532  // DecodeChunk() reaches the end of its input and returns RESULT_END_OF_DATA.
533  // It will also be used to concatenate those unparsed bytes with the data
534  // supplied to the next call to DecodeChunk(), so that they appear in
535  // contiguous memory.
536  string unparsed_bytes_;
537
538  // The portion of the target file that has been decoded so far.  This will be
539  // used to fill the output string for DecodeChunk(), and will also be used to
540  // execute COPY instructions that reference target data.  Since the source
541  // window can come from a range of addresses in the previously decoded target
542  // data, the entire target file needs to be available to the decoder, not just
543  // the current target window.
544  string decoded_target_;
545
546  // The VCDIFF version byte (also known as "header4") from the
547  // delta file header.
548  unsigned char vcdiff_version_code_;
549
550  VCDiffDeltaFileWindow delta_window_;
551
552  std::auto_ptr<VCDiffAddressCache> addr_cache_;
553
554  // Will be NULL unless a custom code table has been defined.
555  std::auto_ptr<VCDiffCodeTableData> custom_code_table_;
556
557  // Used to receive the decoded custom code table.
558  string custom_code_table_string_;
559
560  // If a custom code table is specified, it will be expressed
561  // as an embedded VCDIFF delta file which uses the default code table
562  // as the source file (dictionary).  Use a child decoder object
563  // to decode that delta file.
564  std::auto_ptr<VCDiffStreamingDecoderImpl> custom_code_table_decoder_;
565
566  // If set, then the decoder is expecting *exactly* this number of
567  // target bytes to be decoded from one or more delta file windows.
568  // If this number is exceeded while decoding a window, but was not met
569  // before starting on that window, an error will be reported.
570  // If FinishDecoding() is called before this number is met, an error
571  // will also be reported.  This feature is used for decoding the
572  // embedded code table data within a VCDIFF delta file; we want to
573  // stop processing the embedded data once the entire code table has
574  // been decoded, and treat the rest of the available data as part
575  // of the enclosing delta file.
576  size_t planned_target_file_size_;
577
578  size_t maximum_target_file_size_;
579
580  size_t maximum_target_window_size_;
581
582  // Contains the sum of the decoded sizes of all target windows seen so far,
583  // including the expected total size of the current target window in progress
584  // (even if some of the current target window has not yet been decoded.)
585  size_t total_of_target_window_sizes_;
586
587  // Contains the byte position within decoded_target_ of the first data that
588  // has not yet been output by AppendNewOutputText().
589  size_t decoded_target_output_position_;
590
591  // This value is used to ensure the correct order of calls to the interface
592  // functions, i.e., a single call to StartDecoding(), followed by zero or
593  // more calls to DecodeChunk(), followed by a single call to
594  // FinishDecoding().
595  bool start_decoding_was_called_;
596
597  // If this value is true then the VCD_TARGET flag can be specified to allow
598  // the source segment to be chosen from the previously-decoded target data.
599  // (This is the default behavior.)  If it is false, then specifying the
600  // VCD_TARGET flag is considered an error, and the decoder does not need to
601  // keep in memory any decoded target data prior to the current window.
602  bool allow_vcd_target_;
603
604  // Making these private avoids implicit copy constructor & assignment operator
605  VCDiffStreamingDecoderImpl(const VCDiffStreamingDecoderImpl&);  // NOLINT
606  void operator=(const VCDiffStreamingDecoderImpl&);
607};
608
609// *** Methods for VCDiffStreamingDecoderImpl
610
611const size_t VCDiffStreamingDecoderImpl::kDefaultMaximumTargetFileSize;
612const size_t VCDiffStreamingDecoderImpl::kUnlimitedBytes;
613
614VCDiffStreamingDecoderImpl::VCDiffStreamingDecoderImpl()
615    : maximum_target_file_size_(kDefaultMaximumTargetFileSize),
616      maximum_target_window_size_(kDefaultMaximumTargetFileSize),
617      allow_vcd_target_(true) {
618  delta_window_.Init(this);
619  Reset();
620}
621
622// Reset() will delete the component objects without reallocating them.
623VCDiffStreamingDecoderImpl::~VCDiffStreamingDecoderImpl() { Reset(); }
624
625void VCDiffStreamingDecoderImpl::Reset() {
626  start_decoding_was_called_ = false;
627  dictionary_ptr_ = NULL;
628  dictionary_size_ = 0;
629  vcdiff_version_code_ = '\0';
630  planned_target_file_size_ = kUnlimitedBytes;
631  total_of_target_window_sizes_ = 0;
632  addr_cache_.reset();
633  custom_code_table_.reset();
634  custom_code_table_decoder_.reset();
635  delta_window_.Reset();
636  decoded_target_output_position_ = 0;
637}
638
639void VCDiffStreamingDecoderImpl::StartDecoding(const char* dictionary_ptr,
640                                               size_t dictionary_size) {
641  if (start_decoding_was_called_) {
642    VCD_DFATAL << "StartDecoding() called twice without FinishDecoding()"
643               << VCD_ENDL;
644    return;
645  }
646  unparsed_bytes_.clear();
647  decoded_target_.clear();  // delta_window_.Reset() depends on this
648  Reset();
649  dictionary_ptr_ = dictionary_ptr;
650  dictionary_size_ = dictionary_size;
651  start_decoding_was_called_ = true;
652}
653
654// Reads the VCDiff delta file header section as described in RFC section 4.1:
655//
656//     Header1                                  - byte = 0xD6 (ASCII 'V' | 0x80)
657//     Header2                                  - byte = 0xC3 (ASCII 'C' | 0x80)
658//     Header3                                  - byte = 0xC4 (ASCII 'D' | 0x80)
659//     Header4                                  - byte
660//     Hdr_Indicator                            - byte
661//     [Secondary compressor ID]                - byte
662//     [Length of code table data]              - integer
663//     [Code table data]
664//
665// Initializes the code table and address cache objects.  Returns RESULT_ERROR
666// if an error occurred, and RESULT_END_OF_DATA if the end of available data was
667// reached before the entire header could be read.  (The latter may be an error
668// condition if there is no more data available.)  Otherwise, returns
669// RESULT_SUCCESS, and removes the header bytes from the data string.
670//
671// It's relatively inefficient to expect this function to parse any number of
672// input bytes available, down to 1 byte, but it is necessary in case the input
673// is not a properly formatted VCDIFF delta file.  If the entire input consists
674// of two bytes "12", then we should recognize that it does not match the
675// initial VCDIFF magic number "VCD" and report an error, rather than waiting
676// indefinitely for more input that will never arrive.
677//
678VCDiffResult VCDiffStreamingDecoderImpl::ReadDeltaFileHeader(
679    ParseableChunk* data) {
680  if (FoundFileHeader()) {
681    return RESULT_SUCCESS;
682  }
683  size_t data_size = data->UnparsedSize();
684  const DeltaFileHeader* header =
685      reinterpret_cast<const DeltaFileHeader*>(data->UnparsedData());
686  bool wrong_magic_number = false;
687  switch (data_size) {
688    // Verify only the bytes that are available.
689    default:
690      // Found header contents up to and including VCDIFF version
691      vcdiff_version_code_ = header->header4;
692      if ((vcdiff_version_code_ != 0x00) &&  // Draft standard VCDIFF (RFC 3284)
693          (vcdiff_version_code_ != 'S')) {   // Enhancements for SDCH protocol
694        VCD_ERROR << "Unrecognized VCDIFF format version" << VCD_ENDL;
695        return RESULT_ERROR;
696      }
697      // fall through
698    case 3:
699      if (header->header3 != 0xC4) {  // magic value 'D' | 0x80
700        wrong_magic_number = true;
701      }
702      // fall through
703    case 2:
704      if (header->header2 != 0xC3) {  // magic value 'C' | 0x80
705        wrong_magic_number = true;
706      }
707      // fall through
708    case 1:
709      if (header->header1 != 0xD6) {  // magic value 'V' | 0x80
710        wrong_magic_number = true;
711      }
712      // fall through
713    case 0:
714      if (wrong_magic_number) {
715        VCD_ERROR << "Did not find VCDIFF header bytes; "
716                      "input is not a VCDIFF delta file" << VCD_ENDL;
717        return RESULT_ERROR;
718      }
719      if (data_size < sizeof(DeltaFileHeader)) return RESULT_END_OF_DATA;
720  }
721  // Secondary compressor not supported.
722  if (header->hdr_indicator & VCD_DECOMPRESS) {
723    VCD_ERROR << "Secondary compression is not supported" << VCD_ENDL;
724    return RESULT_ERROR;
725  }
726  if (header->hdr_indicator & VCD_CODETABLE) {
727    int bytes_parsed = InitCustomCodeTable(
728        data->UnparsedData() + sizeof(DeltaFileHeader),
729        data->End());
730    switch (bytes_parsed) {
731      case RESULT_ERROR:
732        return RESULT_ERROR;
733      case RESULT_END_OF_DATA:
734        return RESULT_END_OF_DATA;
735      default:
736        data->Advance(sizeof(DeltaFileHeader) + bytes_parsed);
737    }
738  } else {
739    addr_cache_.reset(new VCDiffAddressCache);
740    // addr_cache_->Init() will be called
741    // from VCDiffStreamingDecoderImpl::DecodeChunk()
742    data->Advance(sizeof(DeltaFileHeader));
743  }
744  return RESULT_SUCCESS;
745}
746
747int VCDiffStreamingDecoderImpl::InitCustomCodeTable(const char* data_start,
748                                                    const char* data_end) {
749  // A custom code table is being specified.  Parse the variable-length
750  // cache sizes and begin parsing the encoded custom code table.
751  int32_t near_cache_size = 0, same_cache_size = 0;
752  VCDiffHeaderParser header_parser(data_start, data_end);
753  if (!header_parser.ParseInt32("size of near cache", &near_cache_size)) {
754    return header_parser.GetResult();
755  }
756  if (!header_parser.ParseInt32("size of same cache", &same_cache_size)) {
757    return header_parser.GetResult();
758  }
759  custom_code_table_.reset(new struct VCDiffCodeTableData);
760  memset(custom_code_table_.get(), 0, sizeof(struct VCDiffCodeTableData));
761  custom_code_table_string_.clear();
762  addr_cache_.reset(new VCDiffAddressCache(near_cache_size, same_cache_size));
763  // addr_cache_->Init() will be called
764  // from VCDiffStreamingDecoderImpl::DecodeChunk()
765
766  // If we reach this point (the start of the custom code table)
767  // without encountering a RESULT_END_OF_DATA condition, then we won't call
768  // ReadDeltaFileHeader() again for this delta file.
769  //
770  // Instantiate a recursive decoder to interpret the custom code table
771  // as a VCDIFF encoding of the default code table.
772  custom_code_table_decoder_.reset(new VCDiffStreamingDecoderImpl);
773  custom_code_table_decoder_->StartDecoding(
774      reinterpret_cast<const char*>(
775          &VCDiffCodeTableData::kDefaultCodeTableData),
776      sizeof(VCDiffCodeTableData::kDefaultCodeTableData));
777  custom_code_table_decoder_->SetPlannedTargetFileSize(
778      sizeof(*custom_code_table_));
779  return static_cast<int>(header_parser.ParsedSize());
780}
781
782VCDiffResult VCDiffStreamingDecoderImpl::ReadCustomCodeTable(
783    ParseableChunk* data) {
784  if (!custom_code_table_decoder_.get()) {
785    return RESULT_SUCCESS;
786  }
787  if (!custom_code_table_.get()) {
788    VCD_DFATAL << "Internal error:  custom_code_table_decoder_ is set,"
789                  " but custom_code_table_ is NULL" << VCD_ENDL;
790    return RESULT_ERROR;
791  }
792  OutputString<string> output_string(&custom_code_table_string_);
793  if (!custom_code_table_decoder_->DecodeChunk(data->UnparsedData(),
794                                               data->UnparsedSize(),
795                                               &output_string)) {
796    return RESULT_ERROR;
797  }
798  if (custom_code_table_string_.length() < sizeof(*custom_code_table_)) {
799    // Skip over the consumed data.
800    data->Finish();
801    return RESULT_END_OF_DATA;
802  }
803  if (!custom_code_table_decoder_->FinishDecoding()) {
804    return RESULT_ERROR;
805  }
806  if (custom_code_table_string_.length() != sizeof(*custom_code_table_)) {
807    VCD_DFATAL << "Decoded custom code table size ("
808               << custom_code_table_string_.length()
809               << ") does not match size of a code table ("
810               << sizeof(*custom_code_table_) << ")" << VCD_ENDL;
811    return RESULT_ERROR;
812  }
813  memcpy(custom_code_table_.get(),
814         custom_code_table_string_.data(),
815         sizeof(*custom_code_table_));
816  custom_code_table_string_.clear();
817  // Skip over the consumed data.
818  data->FinishExcept(custom_code_table_decoder_->GetUnconsumedDataSize());
819  custom_code_table_decoder_.reset();
820  delta_window_.UseCodeTable(*custom_code_table_, addr_cache_->LastMode());
821  return RESULT_SUCCESS;
822}
823
824void VCDiffStreamingDecoderImpl::FlushDecodedTarget(
825    OutputStringInterface* output_string) {
826  output_string->append(
827      decoded_target_.data() + decoded_target_output_position_,
828      decoded_target_.size() - decoded_target_output_position_);
829  decoded_target_.clear();
830  delta_window_.set_target_window_start_pos(0);
831  decoded_target_output_position_ = 0;
832}
833
834void VCDiffStreamingDecoderImpl::AppendNewOutputText(
835    OutputStringInterface* output_string) {
836  const size_t bytes_decoded_this_chunk =
837      decoded_target_.size() - decoded_target_output_position_;
838  if (bytes_decoded_this_chunk > 0) {
839    size_t target_bytes_remaining = delta_window_.TargetBytesRemaining();
840    if (target_bytes_remaining > 0) {
841      // The decoder is midway through decoding a target window.  Resize
842      // output_string to match the expected length.  The interface guarantees
843      // not to resize output_string more than once per target window decoded.
844      output_string->ReserveAdditionalBytes(bytes_decoded_this_chunk
845                                            + target_bytes_remaining);
846    }
847    output_string->append(
848        decoded_target_.data() + decoded_target_output_position_,
849        bytes_decoded_this_chunk);
850    decoded_target_output_position_ = decoded_target_.size();
851  }
852}
853
854bool VCDiffStreamingDecoderImpl::DecodeChunk(
855    const char* data,
856    size_t len,
857    OutputStringInterface* output_string) {
858  if (!start_decoding_was_called_) {
859    VCD_DFATAL << "DecodeChunk() called without StartDecoding()" << VCD_ENDL;
860    Reset();
861    return false;
862  }
863  ParseableChunk parseable_chunk(data, len);
864  if (!unparsed_bytes_.empty()) {
865    unparsed_bytes_.append(data, len);
866    parseable_chunk.SetDataBuffer(unparsed_bytes_.data(),
867                                  unparsed_bytes_.size());
868  }
869  VCDiffResult result = ReadDeltaFileHeader(&parseable_chunk);
870  if (RESULT_SUCCESS == result) {
871    result = ReadCustomCodeTable(&parseable_chunk);
872  }
873  if (RESULT_SUCCESS == result) {
874    while (!parseable_chunk.Empty()) {
875      result = delta_window_.DecodeWindow(&parseable_chunk);
876      if (RESULT_SUCCESS != result) {
877        break;
878      }
879      if (ReachedPlannedTargetFileSize()) {
880        // Found exactly the length we expected.  Stop decoding.
881        break;
882      }
883      if (!allow_vcd_target()) {
884        // VCD_TARGET will never be used to reference target data before the
885        // start of the current window, so flush and clear the contents of
886        // decoded_target_.
887        FlushDecodedTarget(output_string);
888      }
889    }
890  }
891  if (RESULT_ERROR == result) {
892    Reset();  // Don't allow further DecodeChunk calls
893    return false;
894  }
895  unparsed_bytes_.assign(parseable_chunk.UnparsedData(),
896                         parseable_chunk.UnparsedSize());
897  AppendNewOutputText(output_string);
898  return true;
899}
900
901// Finishes decoding after all data has been received.  Returns true
902// if decoding of the entire stream was successful.
903bool VCDiffStreamingDecoderImpl::FinishDecoding() {
904  bool success = true;
905  if (!start_decoding_was_called_) {
906    VCD_WARNING << "FinishDecoding() called before StartDecoding(),"
907                   " or called after DecodeChunk() returned false"
908                << VCD_ENDL;
909    success = false;
910  } else if (!IsDecodingComplete()) {
911    VCD_ERROR << "FinishDecoding() called before parsing entire"
912                 " delta file window" << VCD_ENDL;
913    success = false;
914  }
915  // Reset the object state for the next decode operation
916  Reset();
917  return success;
918}
919
920bool VCDiffStreamingDecoderImpl::TargetWindowWouldExceedSizeLimits(
921    size_t window_size) const {
922  if (window_size > maximum_target_window_size_) {
923    VCD_ERROR << "Length of target window (" << window_size
924              << ") exceeds limit of " << maximum_target_window_size_
925              << " bytes" << VCD_ENDL;
926    return true;
927  }
928  if (HasPlannedTargetFileSize()) {
929    // The logical expression to check would be:
930    //
931    //   total_of_target_window_sizes_ + window_size > planned_target_file_size_
932    //
933    // but the addition might cause an integer overflow if target_bytes_to_add
934    // is very large.  So it is better to check target_bytes_to_add against
935    // the remaining planned target bytes.
936    size_t remaining_planned_target_file_size =
937        planned_target_file_size_ - total_of_target_window_sizes_;
938    if (window_size > remaining_planned_target_file_size) {
939      VCD_ERROR << "Length of target window (" << window_size
940                << " bytes) plus previous windows ("
941                << total_of_target_window_sizes_
942                << " bytes) would exceed planned size of "
943                << planned_target_file_size_ << " bytes" << VCD_ENDL;
944      return true;
945    }
946  }
947  size_t remaining_maximum_target_bytes =
948      maximum_target_file_size_ - total_of_target_window_sizes_;
949  if (window_size > remaining_maximum_target_bytes) {
950    VCD_ERROR << "Length of target window (" << window_size
951              << " bytes) plus previous windows ("
952              << total_of_target_window_sizes_
953              << " bytes) would exceed maximum target file size of "
954              << maximum_target_file_size_ << " bytes" << VCD_ENDL;
955    return true;
956  }
957  return false;
958}
959
960// *** Methods for VCDiffDeltaFileWindow
961
962void VCDiffDeltaFileWindow::Reset() {
963  found_header_ = false;
964
965  // Mark the start of the current target window.
966  target_window_start_pos_ = parent_ ? parent_->decoded_target()->size() : 0U;
967  target_window_length_ = 0;
968
969  source_segment_ptr_ = NULL;
970  source_segment_length_ = 0;
971
972  instructions_and_sizes_.Invalidate();
973  data_for_add_and_run_.Invalidate();
974  addresses_for_copy_.Invalidate();
975
976  interleaved_bytes_expected_ = 0;
977
978  has_checksum_ = false;
979  expected_checksum_ = 0;
980}
981
982VCDiffResult VCDiffDeltaFileWindow::SetUpWindowSections(
983    VCDiffHeaderParser* header_parser) {
984  size_t add_and_run_data_length = 0;
985  size_t instructions_and_sizes_length = 0;
986  size_t addresses_length = 0;
987  if (!header_parser->ParseSectionLengths(has_checksum_,
988                                          &add_and_run_data_length,
989                                          &instructions_and_sizes_length,
990                                          &addresses_length,
991                                          &expected_checksum_)) {
992    return header_parser->GetResult();
993  }
994  if (parent_->AllowInterleaved() &&
995      (add_and_run_data_length == 0) &&
996      (addresses_length == 0)) {
997    // The interleaved format is being used.
998    interleaved_bytes_expected_ =
999        static_cast<int>(instructions_and_sizes_length);
1000    UpdateInterleavedSectionPointers(header_parser->UnparsedData(),
1001                                     header_parser->End());
1002  } else {
1003    // If interleaved format is not used, then the whole window contents
1004    // must be available before decoding can begin.  If only part of
1005    // the current window is available, then report end of data
1006    // and re-parse the whole header when DecodeChunk() is called again.
1007    if (header_parser->UnparsedSize() < (add_and_run_data_length +
1008                                         instructions_and_sizes_length +
1009                                         addresses_length)) {
1010      return RESULT_END_OF_DATA;
1011    }
1012    data_for_add_and_run_.Init(header_parser->UnparsedData(),
1013                               add_and_run_data_length);
1014    instructions_and_sizes_.Init(data_for_add_and_run_.End(),
1015                                 instructions_and_sizes_length);
1016    addresses_for_copy_.Init(instructions_and_sizes_.End(), addresses_length);
1017    if (addresses_for_copy_.End() != header_parser->EndOfDeltaWindow()) {
1018      VCD_ERROR << "The end of the instructions section "
1019                   "does not match the end of the delta window" << VCD_ENDL;
1020      return RESULT_ERROR;
1021    }
1022  }
1023  reader_.Init(instructions_and_sizes_.UnparsedDataAddr(),
1024               instructions_and_sizes_.End());
1025  return RESULT_SUCCESS;
1026}
1027
1028// Here are the elements of the delta window header to be parsed,
1029// from section 4 of the RFC:
1030//
1031//     Window1
1032//         Win_Indicator                            - byte
1033//         [Source segment size]                    - integer
1034//         [Source segment position]                - integer
1035//         The delta encoding of the target window
1036//             Length of the delta encoding         - integer
1037//             The delta encoding
1038//                 Size of the target window        - integer
1039//                 Delta_Indicator                  - byte
1040//                 Length of data for ADDs and RUNs - integer
1041//                 Length of instructions and sizes - integer
1042//                 Length of addresses for COPYs    - integer
1043//                 Data section for ADDs and RUNs   - array of bytes
1044//                 Instructions and sizes section   - array of bytes
1045//                 Addresses section for COPYs      - array of bytes
1046//
1047VCDiffResult VCDiffDeltaFileWindow::ReadHeader(
1048    ParseableChunk* parseable_chunk) {
1049  std::string* decoded_target = parent_->decoded_target();
1050  VCDiffHeaderParser header_parser(parseable_chunk->UnparsedData(),
1051                                   parseable_chunk->End());
1052  size_t source_segment_position = 0;
1053  unsigned char win_indicator = 0;
1054  if (!header_parser.ParseWinIndicatorAndSourceSegment(
1055          parent_->dictionary_size(),
1056          decoded_target->size(),
1057          parent_->allow_vcd_target(),
1058          &win_indicator,
1059          &source_segment_length_,
1060          &source_segment_position)) {
1061    return header_parser.GetResult();
1062  }
1063  has_checksum_ = parent_->AllowChecksum() && (win_indicator & VCD_CHECKSUM);
1064  if (!header_parser.ParseWindowLengths(&target_window_length_)) {
1065    return header_parser.GetResult();
1066  }
1067  if (parent_->TargetWindowWouldExceedSizeLimits(target_window_length_)) {
1068    // An error has been logged by TargetWindowWouldExceedSizeLimits().
1069    return RESULT_ERROR;
1070  }
1071  header_parser.ParseDeltaIndicator();
1072  VCDiffResult setup_return_code = SetUpWindowSections(&header_parser);
1073  if (RESULT_SUCCESS != setup_return_code) {
1074    return setup_return_code;
1075  }
1076  // Reserve enough space in the output string for the current target window.
1077  const size_t wanted_capacity =
1078      target_window_start_pos_ + target_window_length_;
1079  if (decoded_target->capacity() < wanted_capacity) {
1080    decoded_target->reserve(wanted_capacity);
1081  }
1082  // Get a pointer to the start of the source segment.
1083  if (win_indicator & VCD_SOURCE) {
1084    source_segment_ptr_ = parent_->dictionary_ptr() + source_segment_position;
1085  } else if (win_indicator & VCD_TARGET) {
1086    // This assignment must happen after the reserve().
1087    // decoded_target should not be resized again while processing this window,
1088    // so source_segment_ptr_ should remain valid.
1089    source_segment_ptr_ = decoded_target->data() + source_segment_position;
1090  }
1091  // The whole window header was found and parsed successfully.
1092  found_header_ = true;
1093  parseable_chunk->Advance(header_parser.ParsedSize());
1094  parent_->AddToTotalTargetWindowSize(target_window_length_);
1095  return RESULT_SUCCESS;
1096}
1097
1098void VCDiffDeltaFileWindow::UpdateInstructionPointer(
1099    ParseableChunk* parseable_chunk) {
1100  if (IsInterleaved()) {
1101    size_t bytes_parsed = instructions_and_sizes_.ParsedSize();
1102    // Reduce expected instruction segment length by bytes parsed
1103    interleaved_bytes_expected_ -= static_cast<int>(bytes_parsed);
1104    parseable_chunk->Advance(bytes_parsed);
1105  }
1106}
1107
1108inline size_t VCDiffDeltaFileWindow::TargetBytesDecoded() {
1109  return parent_->decoded_target()->size() - target_window_start_pos_;
1110}
1111
1112size_t VCDiffDeltaFileWindow::TargetBytesRemaining() {
1113  if (target_window_length_ == 0) {
1114    // There is no window being decoded at present
1115    return 0;
1116  } else {
1117    return target_window_length_ - TargetBytesDecoded();
1118  }
1119}
1120
1121inline void VCDiffDeltaFileWindow::CopyBytes(const char* data, size_t size) {
1122  parent_->decoded_target()->append(data, size);
1123}
1124
1125inline void VCDiffDeltaFileWindow::RunByte(unsigned char byte, size_t size) {
1126  parent_->decoded_target()->append(size, byte);
1127}
1128
1129VCDiffResult VCDiffDeltaFileWindow::DecodeAdd(size_t size) {
1130  if (size > data_for_add_and_run_.UnparsedSize()) {
1131    return RESULT_END_OF_DATA;
1132  }
1133  // Write the next "size" data bytes
1134  CopyBytes(data_for_add_and_run_.UnparsedData(), size);
1135  data_for_add_and_run_.Advance(size);
1136  return RESULT_SUCCESS;
1137}
1138
1139VCDiffResult VCDiffDeltaFileWindow::DecodeRun(size_t size) {
1140  if (data_for_add_and_run_.Empty()) {
1141    return RESULT_END_OF_DATA;
1142  }
1143  // Write "size" copies of the next data byte
1144  RunByte(*data_for_add_and_run_.UnparsedData(), size);
1145  data_for_add_and_run_.Advance(1);
1146  return RESULT_SUCCESS;
1147}
1148
1149VCDiffResult VCDiffDeltaFileWindow::DecodeCopy(size_t size,
1150                                               unsigned char mode) {
1151  // Keep track of the number of target bytes decoded as a local variable
1152  // to avoid recalculating it each time it is needed.
1153  size_t target_bytes_decoded = TargetBytesDecoded();
1154  const VCDAddress here_address =
1155      static_cast<VCDAddress>(source_segment_length_ + target_bytes_decoded);
1156  const VCDAddress decoded_address = parent_->addr_cache()->DecodeAddress(
1157      here_address,
1158      mode,
1159      addresses_for_copy_.UnparsedDataAddr(),
1160      addresses_for_copy_.End());
1161  switch (decoded_address) {
1162    case RESULT_ERROR:
1163      VCD_ERROR << "Unable to decode address for COPY" << VCD_ENDL;
1164      return RESULT_ERROR;
1165    case RESULT_END_OF_DATA:
1166      return RESULT_END_OF_DATA;
1167    default:
1168      if ((decoded_address < 0) || (decoded_address > here_address)) {
1169        VCD_DFATAL << "Internal error: unexpected address " << decoded_address
1170                   << " returned from DecodeAddress, with here_address = "
1171                   << here_address << VCD_ENDL;
1172        return RESULT_ERROR;
1173      }
1174      break;
1175  }
1176  size_t address = static_cast<size_t>(decoded_address);
1177  if ((address + size) <= source_segment_length_) {
1178    // Copy all data from source segment
1179    CopyBytes(&source_segment_ptr_[address], size);
1180    return RESULT_SUCCESS;
1181  }
1182  // Copy some data from target window...
1183  if (address < source_segment_length_) {
1184    // ... plus some data from source segment
1185    const size_t partial_copy_size = source_segment_length_ - address;
1186    CopyBytes(&source_segment_ptr_[address], partial_copy_size);
1187    target_bytes_decoded += partial_copy_size;
1188    address += partial_copy_size;
1189    size -= partial_copy_size;
1190  }
1191  address -= source_segment_length_;
1192  // address is now based at start of target window
1193  const char* const target_segment_ptr = parent_->decoded_target()->data() +
1194                                         target_window_start_pos_;
1195  while (size > (target_bytes_decoded - address)) {
1196    // Recursive copy that extends into the yet-to-be-copied target data
1197    const size_t partial_copy_size = target_bytes_decoded - address;
1198    CopyBytes(&target_segment_ptr[address], partial_copy_size);
1199    target_bytes_decoded += partial_copy_size;
1200    address += partial_copy_size;
1201    size -= partial_copy_size;
1202  }
1203  CopyBytes(&target_segment_ptr[address], size);
1204  return RESULT_SUCCESS;
1205}
1206
1207int VCDiffDeltaFileWindow::DecodeBody(ParseableChunk* parseable_chunk) {
1208  if (IsInterleaved() && (instructions_and_sizes_.UnparsedData()
1209                              != parseable_chunk->UnparsedData())) {
1210    VCD_DFATAL << "Internal error: interleaved format is used, but the"
1211                  " input pointer does not point to the instructions section"
1212               << VCD_ENDL;
1213    return RESULT_ERROR;
1214  }
1215  while (TargetBytesDecoded() < target_window_length_) {
1216    int32_t decoded_size = VCD_INSTRUCTION_ERROR;
1217    unsigned char mode = 0;
1218    VCDiffInstructionType instruction =
1219        reader_.GetNextInstruction(&decoded_size, &mode);
1220    switch (instruction) {
1221      case VCD_INSTRUCTION_END_OF_DATA:
1222        UpdateInstructionPointer(parseable_chunk);
1223        return RESULT_END_OF_DATA;
1224      case VCD_INSTRUCTION_ERROR:
1225        return RESULT_ERROR;
1226      default:
1227        break;
1228    }
1229    const size_t size = static_cast<size_t>(decoded_size);
1230    // The value of "size" itself could be enormous (say, INT32_MAX)
1231    // so check it individually against the limit to protect against
1232    // overflow when adding it to something else.
1233    if ((size > target_window_length_) ||
1234        ((size + TargetBytesDecoded()) > target_window_length_)) {
1235      VCD_ERROR << VCDiffInstructionName(instruction)
1236                << " with size " << size
1237                << " plus existing " << TargetBytesDecoded()
1238                << " bytes of target data exceeds length of target"
1239                   " window (" << target_window_length_ << " bytes)"
1240                << VCD_ENDL;
1241      return RESULT_ERROR;
1242    }
1243    VCDiffResult result = RESULT_SUCCESS;
1244    switch (instruction) {
1245      case VCD_ADD:
1246        result = DecodeAdd(size);
1247        break;
1248      case VCD_RUN:
1249        result = DecodeRun(size);
1250        break;
1251      case VCD_COPY:
1252        result = DecodeCopy(size, mode);
1253        break;
1254      default:
1255        VCD_DFATAL << "Unexpected instruction type " << instruction
1256                   << "in opcode stream" << VCD_ENDL;
1257        return RESULT_ERROR;
1258    }
1259    switch (result) {
1260      case RESULT_END_OF_DATA:
1261        reader_.UnGetInstruction();
1262        UpdateInstructionPointer(parseable_chunk);
1263        return RESULT_END_OF_DATA;
1264      case RESULT_ERROR:
1265        return RESULT_ERROR;
1266      case RESULT_SUCCESS:
1267        break;
1268    }
1269  }
1270  if (TargetBytesDecoded() != target_window_length_) {
1271    VCD_ERROR << "Decoded target window size (" << TargetBytesDecoded()
1272              << " bytes) does not match expected size ("
1273              << target_window_length_ << " bytes)" << VCD_ENDL;
1274    return RESULT_ERROR;
1275  }
1276  const char* const target_window_start =
1277      parent_->decoded_target()->data() + target_window_start_pos_;
1278  if (has_checksum_ &&
1279      (ComputeAdler32(target_window_start, target_window_length_)
1280           != expected_checksum_)) {
1281    VCD_ERROR << "Target data does not match checksum; this could mean "
1282                 "that the wrong dictionary was used" << VCD_ENDL;
1283    return RESULT_ERROR;
1284  }
1285  if (!instructions_and_sizes_.Empty()) {
1286    VCD_ERROR << "Excess instructions and sizes left over "
1287                 "after decoding target window" << VCD_ENDL;
1288      return RESULT_ERROR;
1289  }
1290  if (!IsInterleaved()) {
1291    // Standard format is being used, with three separate sections for the
1292    // instructions, data, and addresses.
1293    if (!data_for_add_and_run_.Empty()) {
1294      VCD_ERROR << "Excess ADD/RUN data left over "
1295                   "after decoding target window" << VCD_ENDL;
1296        return RESULT_ERROR;
1297    }
1298    if (!addresses_for_copy_.Empty()) {
1299      VCD_ERROR << "Excess COPY addresses left over "
1300                   "after decoding target window" << VCD_ENDL;
1301        return RESULT_ERROR;
1302    }
1303    // Reached the end of the window.  Update the ParseableChunk to point to the
1304    // end of the addresses section, which is the last section in the window.
1305    parseable_chunk->SetPosition(addresses_for_copy_.End());
1306  } else {
1307    // Interleaved format is being used.
1308    UpdateInstructionPointer(parseable_chunk);
1309  }
1310  return RESULT_SUCCESS;
1311}
1312
1313VCDiffResult VCDiffDeltaFileWindow::DecodeWindow(
1314    ParseableChunk* parseable_chunk) {
1315  if (!parent_) {
1316    VCD_DFATAL << "Internal error: VCDiffDeltaFileWindow::DecodeWindow() "
1317                  "called before VCDiffDeltaFileWindow::Init()" << VCD_ENDL;
1318    return RESULT_ERROR;
1319  }
1320  if (!found_header_) {
1321    switch (ReadHeader(parseable_chunk)) {
1322      case RESULT_END_OF_DATA:
1323        return RESULT_END_OF_DATA;
1324      case RESULT_ERROR:
1325        return RESULT_ERROR;
1326      default:
1327        // Reset address cache between windows (RFC section 5.1)
1328        if (!parent_->addr_cache()->Init()) {
1329          VCD_DFATAL << "Error initializing address cache" << VCD_ENDL;
1330          return RESULT_ERROR;
1331        }
1332    }
1333  } else {
1334    // We are resuming a window that was partially decoded before a
1335    // RESULT_END_OF_DATA was returned.  This can only happen on the first
1336    // loop iteration, and only if the interleaved format is enabled and used.
1337    if (!IsInterleaved()) {
1338      VCD_DFATAL << "Internal error: Resumed decoding of a delta file window"
1339                    " when interleaved format is not being used" << VCD_ENDL;
1340      return RESULT_ERROR;
1341    }
1342    UpdateInterleavedSectionPointers(parseable_chunk->UnparsedData(),
1343                                     parseable_chunk->End());
1344    reader_.UpdatePointers(instructions_and_sizes_.UnparsedDataAddr(),
1345                           instructions_and_sizes_.End());
1346  }
1347  switch (DecodeBody(parseable_chunk)) {
1348    case RESULT_END_OF_DATA:
1349      if (MoreDataExpected()) {
1350        return RESULT_END_OF_DATA;
1351      } else {
1352        VCD_ERROR << "End of data reached while decoding VCDIFF delta file"
1353                  << VCD_ENDL;
1354        // fall through to RESULT_ERROR case
1355      }
1356    case RESULT_ERROR:
1357      return RESULT_ERROR;
1358    default:
1359      break;  // DecodeBody succeeded
1360  }
1361  // Get ready to read a new delta window
1362  Reset();
1363  return RESULT_SUCCESS;
1364}
1365
1366// *** Methods for VCDiffStreamingDecoder
1367
1368VCDiffStreamingDecoder::VCDiffStreamingDecoder()
1369: impl_(new VCDiffStreamingDecoderImpl) { }
1370
1371VCDiffStreamingDecoder::~VCDiffStreamingDecoder() { delete impl_; }
1372
1373void VCDiffStreamingDecoder::StartDecoding(const char* source, size_t len) {
1374  impl_->StartDecoding(source, len);
1375}
1376
1377bool VCDiffStreamingDecoder::DecodeChunkToInterface(
1378    const char* data,
1379    size_t len,
1380    OutputStringInterface* output_string) {
1381  return impl_->DecodeChunk(data, len, output_string);
1382}
1383
1384bool VCDiffStreamingDecoder::FinishDecoding() {
1385  return impl_->FinishDecoding();
1386}
1387
1388bool VCDiffStreamingDecoder::SetMaximumTargetFileSize(
1389    size_t new_maximum_target_file_size) {
1390  return impl_->SetMaximumTargetFileSize(new_maximum_target_file_size);
1391}
1392
1393bool VCDiffStreamingDecoder::SetMaximumTargetWindowSize(
1394    size_t new_maximum_target_window_size) {
1395  return impl_->SetMaximumTargetWindowSize(new_maximum_target_window_size);
1396}
1397
1398void VCDiffStreamingDecoder::SetAllowVcdTarget(bool allow_vcd_target) {
1399  impl_->SetAllowVcdTarget(allow_vcd_target);
1400}
1401
1402bool VCDiffDecoder::DecodeToInterface(const char* dictionary_ptr,
1403                                      size_t dictionary_size,
1404                                      const string& encoding,
1405                                      OutputStringInterface* target) {
1406  target->clear();
1407  decoder_.StartDecoding(dictionary_ptr, dictionary_size);
1408  if (!decoder_.DecodeChunkToInterface(encoding.data(),
1409                                       encoding.size(),
1410                                       target)) {
1411    return false;
1412  }
1413  return decoder_.FinishDecoding();
1414}
1415
1416}  // namespace open_vcdiff
1417