vcdecoder.cc revision c7f5f8508d98d5952d42ed7648c2a8f30a4da156
1// Copyright 2008 Google Inc.
2// Author: Lincoln Smith
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8//      http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15//
16// Implements a Decoder for the format described in
17// RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format.
18// The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html
19//
20// The RFC describes the possibility of using a secondary compressor
21// to further reduce the size of each section of the VCDIFF output.
22// That feature is not supported in this implementation of the encoder
23// and decoder.
24// No secondary compressor types have been publicly registered with
25// the IANA at http://www.iana.org/assignments/vcdiff-comp-ids
26// in the more than five years since the registry was created, so there
27// is no standard set of compressor IDs which would be generated by other
28// encoders or accepted by other decoders.
29
30#include <config.h>
31#include "google/vcdecoder.h"
32#include <stddef.h>  // size_t, ptrdiff_t
33#include <stdint.h>  // int32_t
34#include <string.h>  // memcpy, memset
35#include <memory>  // auto_ptr
36#include <string>
37#include "addrcache.h"
38#include "checksum.h"
39#include "codetable.h"
40#include "decodetable.h"
41#include "headerparser.h"
42#include "logging.h"
43#include "google/output_string.h"
44#include "varint_bigendian.h"
45#include "vcdiff_defs.h"
46
47namespace open_vcdiff {
48
49// This class is used to parse delta file windows as described
50// in RFC sections 4.2 and 4.3.  Its methods are not thread-safe.
51//
52// Here is the window format copied from the RFC:
53//
54// Window1
55//     Win_Indicator                            - byte
56//     [Source segment size]                    - integer
57//     [Source segment position]                - integer
58//     The delta encoding of the target window
59//         Length of the delta encoding         - integer
60//         The delta encoding
61//             Size of the target window        - integer
62//             Delta_Indicator                  - byte
63//             Length of data for ADDs and RUNs - integer
64//             Length of instructions and sizes - integer
65//             Length of addresses for COPYs    - integer
66//             Data section for ADDs and RUNs   - array of bytes
67//             Instructions and sizes section   - array of bytes
68//             Addresses section for COPYs      - array of bytes
69// Window2
70// ...
71//
72// Sample usage:
73//
74// VCDiffDeltaFileWindow delta_window_;
75// delta_window_.Init(parent);
76// ParseableChunk parseable_chunk(input_buffer,
77//                                input_size,
78//                                leftover_unencoded_bytes);
79// switch (delta_window_.DecodeWindows(&parseable_chunk)) {
80//   case RESULT_END_OF_DATA:
81//     <Read more input and retry DecodeWindows later.>
82//   case RESULT_ERROR:
83//     <Handle error case.  An error log message has already been generated.>
84// }
85//
86// DecodeWindows consumes as many windows from the input as it can.  It only
87// needs to be placed within a loop if the loop is used to obtain more input
88// (delta file) data.
89//
90class VCDiffDeltaFileWindow {
91 public:
92  VCDiffDeltaFileWindow();
93  ~VCDiffDeltaFileWindow();
94
95  // Init() should be called immediately after constructing the
96  // VCDiffDeltaFileWindow().  It must be called before DecodeWindows() can be
97  // invoked, or an error will occur.
98  void Init(VCDiffStreamingDecoderImpl* parent);
99
100  // Resets the pointers to the data sections in the current window.
101  void Reset();
102
103  bool UseCodeTable(const VCDiffCodeTableData& code_table_data,
104                    unsigned char max_mode) {
105    return reader_.UseCodeTable(code_table_data, max_mode);
106  }
107
108  // Decodes as many delta windows as possible using the input data from
109  // *parseable_chunk.  Appends the decoded target windows to
110  // parent_->decoded_target().  Returns RESULT_SUCCESS on success, or
111  // RESULT_END_OF_DATA if the end of input was reached before the entire window
112  // could be decoded and more input is expected (only possible if
113  // IsInterleaved() is true), or RESULT_ERROR if an error occurred during
114  // decoding.  In the RESULT_ERROR case, the value of parseable_chunk->pointer_
115  // is undefined; otherwise, parseable_chunk->Advance() is called to point to
116  // the input data position just after the data that has been decoded.
117  //
118  // If planned_target_file_size is not set to kUnlimitedBytes, then the decoder
119  // expects *exactly* this number of target bytes to be decoded from one or
120  // more delta file windows.  If this number is met exactly after finishing a
121  // delta window, this function will return RESULT_SUCCESS without processing
122  // any more bytes from data_pointer.  If this number is exceeded while
123  // decoding a window, but was not met before starting that window,
124  // then RESULT_ERROR will be returned.
125  //
126  VCDiffResult DecodeWindows(ParseableChunk* parseable_chunk);
127
128  bool FoundWindowHeader() const {
129    return found_header_;
130  }
131
132  bool MoreDataExpected() const {
133    // When parsing an interleaved-format delta file,
134    // every time DecodeBody() exits, interleaved_bytes_expected_
135    // will be decremented by the number of bytes parsed.  If it
136    // reaches zero, then there is no more data expected because
137    // the size of the interleaved section (given in the window
138    // header) has been reached.
139    return IsInterleaved() && (interleaved_bytes_expected_ > 0);
140  }
141
142  size_t target_window_start_pos() const { return target_window_start_pos_; }
143
144  void set_target_window_start_pos(size_t new_start_pos) {
145    target_window_start_pos_ = new_start_pos;
146  }
147
148  // Returns the number of bytes remaining to be decoded in the target window.
149  // If not in the process of decoding a window, returns 0.
150  size_t TargetBytesRemaining();
151
152 private:
153  // Reads the header of the window section as described in RFC sections 4.2 and
154  // 4.3, up to and including the value "Length of addresses for COPYs".  If the
155  // entire header is found, this function sets up the DeltaWindowSections
156  // instructions_and_sizes_, data_for_add_and_run_, and addresses_for_copy_ so
157  // that the decoder can begin decoding the opcodes in these sections.  Returns
158  // RESULT_ERROR if an error occurred, or RESULT_END_OF_DATA if the end of
159  // available data was reached before the entire header could be read.  (The
160  // latter may be an error condition if there is no more data available.)
161  // Otherwise, returns RESULT_SUCCESS and advances parseable_chunk past the
162  // parsed header.
163  //
164  VCDiffResult ReadHeader(ParseableChunk* parseable_chunk);
165
166  // After the window header has been parsed as far as the Delta_Indicator,
167  // this function is called to parse the following delta window header fields:
168  //
169  //     Length of data for ADDs and RUNs - integer (VarintBE format)
170  //     Length of instructions and sizes - integer (VarintBE format)
171  //     Length of addresses for COPYs    - integer (VarintBE format)
172  //
173  // If has_checksum_ is true, it also looks for the following element:
174  //
175  //     Adler32 checksum            - unsigned 32-bit integer (VarintBE format)
176  //
177  // It sets up the DeltaWindowSections instructions_and_sizes_,
178  // data_for_add_and_run_, and addresses_for_copy_.  If the interleaved format
179  // is being used, all three sections will include the entire window body; if
180  // the standard format is used, three non-overlapping window sections will be
181  // defined.  Returns RESULT_ERROR if an error occurred, or RESULT_END_OF_DATA
182  // if standard format is being used and there is not enough input data to read
183  // the entire window body.  Otherwise, returns RESULT_SUCCESS.
184  VCDiffResult SetUpWindowSections(VCDiffHeaderParser* header_parser);
185
186  // Decodes the body of the window section as described in RFC sections 4.3,
187  // including the sections "Data section for ADDs and RUNs", "Instructions
188  // and sizes section", and "Addresses section for COPYs".  These sections
189  // must already have been set up by ReadWindowHeader().  Returns a
190  // non-negative value on success, or RESULT_END_OF_DATA if the end of input
191  // was reached before the entire window could be decoded (only possible if
192  // IsInterleaved() is true), or RESULT_ERROR if an error occurred during
193  // decoding.  Appends as much of the decoded target window as possible to
194  // parent->decoded_target().
195  //
196  int DecodeBody(ParseableChunk* parseable_chunk);
197
198  // Returns the number of bytes already decoded into the target window.
199  size_t TargetBytesDecoded();
200
201  // Decodes a single ADD instruction, updating parent_->decoded_target_.
202  VCDiffResult DecodeAdd(size_t size);
203
204  // Decodes a single RUN instruction, updating parent_->decoded_target_.
205  VCDiffResult DecodeRun(size_t size);
206
207  // Decodes a single COPY instruction, updating parent_->decoded_target_.
208  VCDiffResult DecodeCopy(size_t size, unsigned char mode);
209
210  // When using the interleaved format, this function is called both on parsing
211  // the header and on resuming after a RESULT_END_OF_DATA was returned from a
212  // previous call to DecodeBody().  It sets up all three section pointers to
213  // reference the same interleaved stream of instructions, sizes, addresses,
214  // and data.  These pointers must be reset every time that work resumes on a
215  // delta window,  because the input data string may have been changed or
216  // resized since DecodeBody() last returned.
217  void UpdateInterleavedSectionPointers(const char* data_pos,
218                                        const char* data_end) {
219    const ptrdiff_t available_data = data_end - data_pos;
220    // Don't read past the end of currently-available data
221    if (available_data > interleaved_bytes_expected_) {
222      instructions_and_sizes_.Init(data_pos, interleaved_bytes_expected_);
223    } else {
224      instructions_and_sizes_.Init(data_pos, available_data);
225    }
226    data_for_add_and_run_.Init(&instructions_and_sizes_);
227    addresses_for_copy_.Init(&instructions_and_sizes_);
228  }
229
230  // If true, the interleaved format described in AllowInterleaved() is used
231  // for the current delta file.  Only valid after ReadWindowHeader() has been
232  // called and returned a positive number (i.e., the whole header was parsed),
233  // but before the window has finished decoding.
234  //
235  bool IsInterleaved() const {
236    // If the sections are interleaved, both addresses_for_copy_ and
237    // data_for_add_and_run_ should point at instructions_and_sizes_.
238    return !addresses_for_copy_.IsOwned();
239  }
240
241  // Executes a single COPY or ADD instruction, appending data to
242  // parent_->decoded_target().
243  void CopyBytes(const char* data, size_t size);
244
245  // Executes a single RUN instruction, appending data to
246  // parent_->decoded_target().
247  void RunByte(unsigned char byte, size_t size);
248
249  // Advance *parseable_chunk to point to the current position in the
250  // instructions/sizes section.  If interleaved format is used, then
251  // decrement the number of expected bytes in the instructions/sizes section
252  // by the number of instruction/size bytes parsed.
253  void UpdateInstructionPointer(ParseableChunk* parseable_chunk);
254
255  // The parent object which was passed to Init().
256  VCDiffStreamingDecoderImpl* parent_;
257
258  // This value will be true if VCDiffDeltaFileWindow::ReadDeltaWindowHeader()
259  // has been called and succeeded in parsing the delta window header, but the
260  // entire window has not yet been decoded.
261  bool found_header_;
262
263  // Contents and length of the current source window.  source_segment_ptr_
264  // will be non-NULL if (a) the window section header for the current window
265  // has been read, but the window has not yet finished decoding; or
266  // (b) the window did not specify a source segment.
267  const char* source_segment_ptr_;
268  size_t source_segment_length_;
269
270  // The delta encoding window sections as defined in RFC section 4.3.
271  // The pointer for each section will be incremented as data is consumed and
272  // decoded from that section.  If the interleaved format is used,
273  // data_for_add_and_run_ and addresses_for_copy_ will both point to
274  // instructions_and_sizes_; otherwise, they will be separate data sections.
275  //
276  DeltaWindowSection instructions_and_sizes_;
277  DeltaWindowSection data_for_add_and_run_;
278  DeltaWindowSection addresses_for_copy_;
279
280  // The expected bytes left to decode in instructions_and_sizes_.  Only used
281  // for the interleaved format.
282  int interleaved_bytes_expected_;
283
284  // The expected length of the target window once it has been decoded.
285  size_t target_window_length_;
286
287  // The index in decoded_target at which the first byte of the current
288  // target window was/will be written.
289  size_t target_window_start_pos_;
290
291  // If has_checksum_ is true, then expected_checksum_ contains an Adler32
292  // checksum of the target window data.  This is an extension included in the
293  // VCDIFF 'S' (SDCH) format, but is not part of the RFC 3284 draft standard.
294  bool has_checksum_;
295  VCDChecksum expected_checksum_;
296
297  VCDiffCodeTableReader reader_;
298
299  // Making these private avoids implicit copy constructor & assignment operator
300  VCDiffDeltaFileWindow(const VCDiffDeltaFileWindow&);  // NOLINT
301  void operator=(const VCDiffDeltaFileWindow&);
302};
303
304// *** Inline methods for VCDiffDeltaFileWindow
305
306inline VCDiffDeltaFileWindow::VCDiffDeltaFileWindow() : parent_(NULL) {
307  Reset();
308}
309
310inline VCDiffDeltaFileWindow::~VCDiffDeltaFileWindow() { }
311
312inline void VCDiffDeltaFileWindow::Init(VCDiffStreamingDecoderImpl* parent) {
313  parent_ = parent;
314}
315
316class VCDiffStreamingDecoderImpl {
317 public:
318  typedef std::string string;
319
320  // The default maximum target file size (and target window size) if
321  // SetMaximumTargetFileSize() is not called.
322  static const size_t kDefaultMaximumTargetFileSize = 67108864U;  // 64 MB
323
324  // The largest value that can be passed to SetMaximumTargetWindowSize().
325  // Using a larger value will result in an error.
326  static const size_t kTargetSizeLimit = 2147483647U;  // INT32_MAX
327
328  // A constant that is the default value for planned_target_file_size_,
329  // indicating that the decoder does not have an expected length
330  // for the target data.
331  static const size_t kUnlimitedBytes = static_cast<size_t>(-3);
332
333  VCDiffStreamingDecoderImpl();
334  ~VCDiffStreamingDecoderImpl();
335
336  // Resets all member variables to their initial states.
337  void Reset();
338
339  // These functions are identical to their counterparts
340  // in VCDiffStreamingDecoder.
341  //
342  void StartDecoding(const char* dictionary_ptr, size_t dictionary_size);
343
344  bool DecodeChunk(const char* data,
345                   size_t len,
346                   OutputStringInterface* output_string);
347
348  bool FinishDecoding();
349
350  // If true, the version of VCDIFF used in the current delta file allows
351  // for the interleaved format, in which instructions, addresses and data
352  // are all sent interleaved in the instructions section of each window
353  // rather than being sent in separate sections.  This is not part of
354  // the VCDIFF draft standard, so we've defined a special version code
355  // 'S' which implies that this feature is available.  Even if interleaving
356  // is supported, it is not mandatory; interleaved format will be implied
357  // if the address and data sections are both zero-length.
358  //
359  bool AllowInterleaved() const { return vcdiff_version_code_ == 'S'; }
360
361  // If true, the version of VCDIFF used in the current delta file allows
362  // each delta window to contain an Adler32 checksum of the target window data.
363  // If the bit 0x08 (VCD_CHECKSUM) is set in the Win_Indicator flags, then
364  // this checksum will appear as a variable-length integer, just after the
365  // "length of addresses for COPYs" value and before the window data sections.
366  // It is possible for some windows in a delta file to use the checksum feature
367  // and for others not to use it (and leave the flag bit set to 0.)
368  // Just as with AllowInterleaved(), this extension is not part of the draft
369  // standard and is only available when the version code 'S' is specified.
370  //
371  bool AllowChecksum() const { return vcdiff_version_code_ == 'S'; }
372
373  bool SetMaximumTargetFileSize(size_t new_maximum_target_file_size) {
374    maximum_target_file_size_ = new_maximum_target_file_size;
375    return true;
376  }
377
378  bool SetMaximumTargetWindowSize(size_t new_maximum_target_window_size) {
379    if (new_maximum_target_window_size > kTargetSizeLimit) {
380      LOG(ERROR) << "Specified maximum target window size "
381                 << new_maximum_target_window_size << " exceeds limit of "
382                 << kTargetSizeLimit << " bytes" << LOG_ENDL;
383      return false;
384    }
385    maximum_target_window_size_ = new_maximum_target_window_size;
386    return true;
387  }
388
389  // See description of planned_target_file_size_, below.
390  bool HasPlannedTargetFileSize() const {
391    return planned_target_file_size_ != kUnlimitedBytes;
392  }
393
394  void SetPlannedTargetFileSize(size_t planned_target_file_size) {
395    planned_target_file_size_ = planned_target_file_size;
396  }
397
398  void AddToTotalTargetWindowSize(size_t window_size) {
399    total_of_target_window_sizes_ += window_size;
400  }
401
402  // Checks to see whether the decoded target data has reached its planned size.
403  bool ReachedPlannedTargetFileSize() const {
404    if (!HasPlannedTargetFileSize()) {
405      return false;
406    }
407    // The planned target file size should not have been exceeded.
408    // TargetWindowWouldExceedSizeLimits() ensures that the advertised size of
409    // each target window would not make the target file exceed that limit, and
410    // DecodeBody() will return RESULT_ERROR if the actual decoded output ever
411    // exceeds the advertised target window size.
412    if (total_of_target_window_sizes_ > planned_target_file_size_) {
413      LOG(DFATAL) << "Internal error: Decoded data size "
414                  << total_of_target_window_sizes_
415                  << " exceeds planned target file size "
416                  << planned_target_file_size_ << LOG_ENDL;
417      return true;
418    }
419    return total_of_target_window_sizes_ == planned_target_file_size_;
420  }
421
422  // Checks to see whether adding a new target window of the specified size
423  // would exceed the planned target file size, the maximum target file size,
424  // or the maximum target window size.  If so, logs an error and returns true;
425  // otherwise, returns false.
426  bool TargetWindowWouldExceedSizeLimits(size_t window_size) const;
427
428  // Returns the amount of input data passed to the last DecodeChunk()
429  // that was not consumed by the decoder.  This is essential if
430  // SetPlannedTargetFileSize() is being used, in order to preserve the
431  // remaining input data stream once the planned target file has been decoded.
432  size_t GetUnconsumedDataSize() const {
433    return unparsed_bytes_.size();
434  }
435
436  // This function will return true if the decoder has parsed a complete delta
437  // file header plus zero or more delta file windows, with no data left over.
438  // It will also return true if no delta data at all was decoded.  If these
439  // conditions are not met, then FinishDecoding() should not be called.
440  bool IsDecodingComplete() const {
441    if (!FoundFileHeader()) {
442      // No complete delta file header has been parsed yet.  DecodeChunk()
443      // may have received some data that it hasn't yet parsed, in which case
444      // decoding is incomplete.
445      return unparsed_bytes_.empty();
446    } else if (custom_code_table_decoder_.get()) {
447      // The decoder is in the middle of parsing a custom code table.
448      return false;
449    } else if (delta_window_.FoundWindowHeader()) {
450      // The decoder is in the middle of parsing an interleaved format delta
451      // window.
452      return false;
453    } else if (ReachedPlannedTargetFileSize()) {
454      // The decoder found exactly the planned number of bytes.  In this case
455      // it is OK for unparsed_bytes_ to be non-empty; it contains the leftover
456      // data after the end of the delta file.
457      return true;
458    } else {
459      // No complete delta file window has been parsed yet.  DecodeChunk()
460      // may have received some data that it hasn't yet parsed, in which case
461      // decoding is incomplete.
462      return unparsed_bytes_.empty();
463    }
464  }
465
466  const char* dictionary_ptr() const { return dictionary_ptr_; }
467
468  size_t dictionary_size() const { return dictionary_size_; }
469
470  VCDiffAddressCache* addr_cache() { return addr_cache_.get(); }
471
472  string* decoded_target() { return &decoded_target_; }
473
474  bool allow_vcd_target() const { return allow_vcd_target_; }
475
476  void SetAllowVcdTarget(bool allow_vcd_target) {
477    if (start_decoding_was_called_) {
478      LOG(DFATAL) << "SetAllowVcdTarget() called after StartDecoding()"
479                  << LOG_ENDL;
480      return;
481    }
482    allow_vcd_target_ = allow_vcd_target;
483  }
484
485  // Removes the contents of decoded_target_ that precede the beginning of the
486  // current window.
487  void TruncateToBeginningOfWindow();
488
489 private:
490  // Reads the VCDiff delta file header section as described in RFC section 4.1,
491  // except the custom code table data.  Returns RESULT_ERROR if an error
492  // occurred, or RESULT_END_OF_DATA if the end of available data was reached
493  // before the entire header could be read.  (The latter may be an error
494  // condition if there is no more data available.)  Otherwise, advances
495  // data->position_ past the header and returns RESULT_SUCCESS.
496  //
497  VCDiffResult ReadDeltaFileHeader(ParseableChunk* data);
498
499  // Indicates whether or not the header has already been read.
500  bool FoundFileHeader() const { return addr_cache_.get() != NULL; }
501
502  // If ReadDeltaFileHeader() finds the VCD_CODETABLE flag set within the delta
503  // file header, this function parses the custom cache sizes and initializes
504  // a nested VCDiffStreamingDecoderImpl object that will be used to parse the
505  // custom code table in ReadCustomCodeTable().  Returns RESULT_ERROR if an
506  // error occurred, or RESULT_END_OF_DATA if the end of available data was
507  // reached before the custom cache sizes could be read.  Otherwise, returns
508  // the number of bytes read.
509  //
510  int InitCustomCodeTable(const char* data_start, const char* data_end);
511
512  // If a custom code table was specified in the header section that was parsed
513  // by ReadDeltaFileHeader(), this function makes a recursive call to another
514  // VCDiffStreamingDecoderImpl object (custom_code_table_decoder_), since the
515  // custom code table is expected to be supplied as an embedded VCDIFF
516  // encoding that uses the standard code table.  Returns RESULT_ERROR if an
517  // error occurs, or RESULT_END_OF_DATA if the end of available data was
518  // reached before the entire custom code table could be read.  Otherwise,
519  // returns RESULT_SUCCESS and sets *data_ptr to the position after the encoded
520  // custom code table.  If the function returns RESULT_SUCCESS or
521  // RESULT_END_OF_DATA, it advances data->position_ past the parsed bytes.
522  //
523  VCDiffResult ReadCustomCodeTable(ParseableChunk* data);
524
525  // Contents and length of the source (dictionary) data.
526  const char* dictionary_ptr_;
527  size_t dictionary_size_;
528
529  // This string will be used to store any unparsed bytes left over when
530  // DecodeChunk() reaches the end of its input and returns RESULT_END_OF_DATA.
531  // It will also be used to concatenate those unparsed bytes with the data
532  // supplied to the next call to DecodeChunk(), so that they appear in
533  // contiguous memory.
534  string unparsed_bytes_;
535
536  // The portion of the target file that has been decoded so far.  This will be
537  // used to fill the output string for DecodeChunk(), and will also be used to
538  // execute COPY instructions that reference target data.  Since the source
539  // window can come from a range of addresses in the previously decoded target
540  // data, the entire target file needs to be available to the decoder, not just
541  // the current target window.
542  string decoded_target_;
543
544  // The VCDIFF version byte (also known as "header4") from the
545  // delta file header.
546  unsigned char vcdiff_version_code_;
547
548  VCDiffDeltaFileWindow delta_window_;
549
550  std::auto_ptr<VCDiffAddressCache> addr_cache_;
551
552  // Will be NULL unless a custom code table has been defined.
553  std::auto_ptr<VCDiffCodeTableData> custom_code_table_;
554
555  // Used to receive the decoded custom code table.
556  string custom_code_table_string_;
557
558  // If a custom code table is specified, it will be expressed
559  // as an embedded VCDIFF delta file which uses the default code table
560  // as the source file (dictionary).  Use a child decoder object
561  // to decode that delta file.
562  std::auto_ptr<VCDiffStreamingDecoderImpl> custom_code_table_decoder_;
563
564  // If set, then the decoder is expecting *exactly* this number of
565  // target bytes to be decoded from one or more delta file windows.
566  // If this number is exceeded while decoding a window, but was not met
567  // before starting on that window, an error will be reported.
568  // If FinishDecoding() is called before this number is met, an error
569  // will also be reported.  This feature is used for decoding the
570  // embedded code table data within a VCDIFF delta file; we want to
571  // stop processing the embedded data once the entire code table has
572  // been decoded, and treat the rest of the available data as part
573  // of the enclosing delta file.
574  size_t planned_target_file_size_;
575
576  size_t maximum_target_file_size_;
577
578  size_t maximum_target_window_size_;
579
580  // Contains the sum of the decoded sizes of all target windows seen so far,
581  // including the expected total size of the current target window in progress
582  // (even if some of the current target window has not yet been decoded.)
583  size_t total_of_target_window_sizes_;
584
585  // This value is used to ensure the correct order of calls to the interface
586  // functions, i.e., a single call to StartDecoding(), followed by zero or
587  // more calls to DecodeChunk(), followed by a single call to
588  // FinishDecoding().
589  bool start_decoding_was_called_;
590
591  // If this value is true then the VCD_TARGET flag can be specified to allow
592  // the source segment to be chosen from the previously-decoded target data.
593  // (This is the default behavior.)  If it is false, then specifying the
594  // VCD_TARGET flag is considered an error, and the decoder does not need to
595  // keep in memory any decoded target data prior to the current window.
596  bool allow_vcd_target_;
597
598  // Making these private avoids implicit copy constructor & assignment operator
599  VCDiffStreamingDecoderImpl(const VCDiffStreamingDecoderImpl&);  // NOLINT
600  void operator=(const VCDiffStreamingDecoderImpl&);
601};
602
603// *** Methods for VCDiffStreamingDecoderImpl
604
605const size_t VCDiffStreamingDecoderImpl::kDefaultMaximumTargetFileSize;
606const size_t VCDiffStreamingDecoderImpl::kUnlimitedBytes;
607
608VCDiffStreamingDecoderImpl::VCDiffStreamingDecoderImpl()
609    : maximum_target_file_size_(kDefaultMaximumTargetFileSize),
610      maximum_target_window_size_(kDefaultMaximumTargetFileSize),
611      allow_vcd_target_(true) {
612  delta_window_.Init(this);
613  Reset();
614}
615
616// Reset() will delete the component objects without reallocating them.
617VCDiffStreamingDecoderImpl::~VCDiffStreamingDecoderImpl() { Reset(); }
618
619void VCDiffStreamingDecoderImpl::Reset() {
620  start_decoding_was_called_ = false;
621  dictionary_ptr_ = NULL;
622  dictionary_size_ = 0;
623  vcdiff_version_code_ = '\0';
624  planned_target_file_size_ = kUnlimitedBytes;
625  total_of_target_window_sizes_ = 0;
626  addr_cache_.reset();
627  custom_code_table_.reset();
628  custom_code_table_decoder_.reset();
629  delta_window_.Reset();
630}
631
632void VCDiffStreamingDecoderImpl::TruncateToBeginningOfWindow() {
633  // Conserve the data for the current window that has been partially decoded.
634  decoded_target_.erase(0, delta_window_.target_window_start_pos());
635  delta_window_.set_target_window_start_pos(0);
636}
637
638void VCDiffStreamingDecoderImpl::StartDecoding(const char* dictionary_ptr,
639                                               size_t dictionary_size) {
640  if (start_decoding_was_called_) {
641    LOG(DFATAL) << "StartDecoding() called twice without FinishDecoding()"
642                << LOG_ENDL;
643    return;
644  }
645  unparsed_bytes_.clear();
646  decoded_target_.clear();  // delta_window_.Reset() depends on this
647  Reset();
648  dictionary_ptr_ = dictionary_ptr;
649  dictionary_size_ = dictionary_size;
650  start_decoding_was_called_ = true;
651}
652
653// Reads the VCDiff delta file header section as described in RFC section 4.1:
654//
655//     Header1                                  - byte = 0xD6 (ASCII 'V' | 0x80)
656//     Header2                                  - byte = 0xC3 (ASCII 'C' | 0x80)
657//     Header3                                  - byte = 0xC4 (ASCII 'D' | 0x80)
658//     Header4                                  - byte
659//     Hdr_Indicator                            - byte
660//     [Secondary compressor ID]                - byte
661//     [Length of code table data]              - integer
662//     [Code table data]
663//
664// Initializes the code table and address cache objects.  Returns RESULT_ERROR
665// if an error occurred, and RESULT_END_OF_DATA if the end of available data was
666// reached before the entire header could be read.  (The latter may be an error
667// condition if there is no more data available.)  Otherwise, returns
668// RESULT_SUCCESS, and removes the header bytes from the data string.
669//
670// It's relatively inefficient to expect this function to parse any number of
671// input bytes available, down to 1 byte, but it is necessary in case the input
672// is not a properly formatted VCDIFF delta file.  If the entire input consists
673// of two bytes "12", then we should recognize that it does not match the
674// initial VCDIFF magic number "VCD" and report an error, rather than waiting
675// indefinitely for more input that will never arrive.
676//
677VCDiffResult VCDiffStreamingDecoderImpl::ReadDeltaFileHeader(
678    ParseableChunk* data) {
679  if (FoundFileHeader()) {
680    return RESULT_SUCCESS;
681  }
682  size_t data_size = data->UnparsedSize();
683  const DeltaFileHeader* header =
684      reinterpret_cast<const DeltaFileHeader*>(data->UnparsedData());
685  bool wrong_magic_number = false;
686  switch (data_size) {
687    // Verify only the bytes that are available.
688    default:
689      // Found header contents up to and including VCDIFF version
690      vcdiff_version_code_ = header->header4;
691      if ((vcdiff_version_code_ != 0x00) &&  // Draft standard VCDIFF (RFC 3284)
692          (vcdiff_version_code_ != 'S')) {   // Enhancements for SDCH protocol
693        LOG(ERROR) << "Unrecognized VCDIFF format version" << LOG_ENDL;
694        return RESULT_ERROR;
695      }
696      // fall through
697    case 3:
698      if (header->header3 != 0xC4) {  // magic value 'D' | 0x80
699        wrong_magic_number = true;
700      }
701      // fall through
702    case 2:
703      if (header->header2 != 0xC3) {  // magic value 'C' | 0x80
704        wrong_magic_number = true;
705      }
706      // fall through
707    case 1:
708      if (header->header1 != 0xD6) {  // magic value 'V' | 0x80
709        wrong_magic_number = true;
710      }
711      // fall through
712    case 0:
713      if (wrong_magic_number) {
714        LOG(ERROR) << "Did not find VCDIFF header bytes; "
715                      "input is not a VCDIFF delta file" << LOG_ENDL;
716        return RESULT_ERROR;
717      }
718      if (data_size < sizeof(DeltaFileHeader)) return RESULT_END_OF_DATA;
719  }
720  // Secondary compressor not supported.
721  if (header->hdr_indicator & VCD_DECOMPRESS) {
722    LOG(ERROR) << "Secondary compression is not supported" << LOG_ENDL;
723    return RESULT_ERROR;
724  }
725  if (header->hdr_indicator & VCD_CODETABLE) {
726    int bytes_parsed = InitCustomCodeTable(
727        data->UnparsedData() + sizeof(DeltaFileHeader),
728        data->End());
729    switch (bytes_parsed) {
730      case RESULT_ERROR:
731        return RESULT_ERROR;
732      case RESULT_END_OF_DATA:
733        return RESULT_END_OF_DATA;
734      default:
735        data->Advance(sizeof(DeltaFileHeader) + bytes_parsed);
736    }
737  } else {
738    addr_cache_.reset(new VCDiffAddressCache);
739    // addr_cache_->Init() will be called
740    // from VCDiffStreamingDecoderImpl::DecodeChunk()
741    data->Advance(sizeof(DeltaFileHeader));
742  }
743  return RESULT_SUCCESS;
744}
745
746int VCDiffStreamingDecoderImpl::InitCustomCodeTable(const char* data_start,
747                                                    const char* data_end) {
748  // A custom code table is being specified.  Parse the variable-length
749  // cache sizes and begin parsing the encoded custom code table.
750  int32_t near_cache_size = 0, same_cache_size = 0;
751  VCDiffHeaderParser header_parser(data_start, data_end);
752  if (!header_parser.ParseInt32("size of near cache", &near_cache_size)) {
753    return header_parser.GetResult();
754  }
755  if (!header_parser.ParseInt32("size of same cache", &same_cache_size)) {
756    return header_parser.GetResult();
757  }
758  custom_code_table_.reset(new struct VCDiffCodeTableData);
759  memset(custom_code_table_.get(), 0, sizeof(struct VCDiffCodeTableData));
760  custom_code_table_string_.clear();
761  addr_cache_.reset(new VCDiffAddressCache(near_cache_size, same_cache_size));
762  // addr_cache_->Init() will be called
763  // from VCDiffStreamingDecoderImpl::DecodeChunk()
764
765  // If we reach this point (the start of the custom code table)
766  // without encountering a RESULT_END_OF_DATA condition, then we won't call
767  // ReadDeltaFileHeader() again for this delta file.
768  //
769  // Instantiate a recursive decoder to interpret the custom code table
770  // as a VCDIFF encoding of the default code table.
771  custom_code_table_decoder_.reset(new VCDiffStreamingDecoderImpl);
772  custom_code_table_decoder_->StartDecoding(
773      reinterpret_cast<const char*>(
774          &VCDiffCodeTableData::kDefaultCodeTableData),
775      sizeof(VCDiffCodeTableData::kDefaultCodeTableData));
776  custom_code_table_decoder_->SetPlannedTargetFileSize(
777      sizeof(*custom_code_table_));
778  return static_cast<int>(header_parser.ParsedSize());
779}
780
781VCDiffResult VCDiffStreamingDecoderImpl::ReadCustomCodeTable(
782    ParseableChunk* data) {
783  if (!custom_code_table_decoder_.get()) {
784    return RESULT_SUCCESS;
785  }
786  if (!custom_code_table_.get()) {
787    LOG(DFATAL) << "Internal error:  custom_code_table_decoder_ is set,"
788                   " but custom_code_table_ is NULL" << LOG_ENDL;
789    return RESULT_ERROR;
790  }
791  OutputString<string> output_string(&custom_code_table_string_);
792  if (!custom_code_table_decoder_->DecodeChunk(data->UnparsedData(),
793                                               data->UnparsedSize(),
794                                               &output_string)) {
795    return RESULT_ERROR;
796  }
797  if (custom_code_table_string_.length() < sizeof(*custom_code_table_)) {
798    // Skip over the consumed data.
799    data->Finish();
800    return RESULT_END_OF_DATA;
801  }
802  if (!custom_code_table_decoder_->FinishDecoding()) {
803    return RESULT_ERROR;
804  }
805  if (custom_code_table_string_.length() != sizeof(*custom_code_table_)) {
806    LOG(DFATAL) << "Decoded custom code table size ("
807                << custom_code_table_string_.length()
808                << ") does not match size of a code table ("
809                << sizeof(*custom_code_table_) << ")" << LOG_ENDL;
810    return RESULT_ERROR;
811  }
812  memcpy(custom_code_table_.get(),
813         custom_code_table_string_.data(),
814         sizeof(*custom_code_table_));
815  custom_code_table_string_.clear();
816  // Skip over the consumed data.
817  data->FinishExcept(custom_code_table_decoder_->GetUnconsumedDataSize());
818  custom_code_table_decoder_.reset();
819  delta_window_.UseCodeTable(*custom_code_table_, addr_cache_->LastMode());
820  return RESULT_SUCCESS;
821}
822
823namespace {
824
825class TrackNewOutputText {
826 public:
827  typedef std::string string;
828
829  explicit TrackNewOutputText(const string& decoded_target)
830      : decoded_target_(decoded_target),
831      initial_decoded_target_size_(decoded_target.size()) { }
832
833  void AppendNewOutputText(size_t target_bytes_remaining,
834                           OutputStringInterface* output_string) {
835    const size_t bytes_decoded_this_chunk =
836        decoded_target_.size() - initial_decoded_target_size_;
837    if (bytes_decoded_this_chunk > 0) {
838      if (target_bytes_remaining > 0) {
839        // The decoder is midway through decoding a target window.  Resize
840        // output_string to match the expected length.  The interface guarantees
841        // not to resize the output_string more than once per target window
842        // decoded.
843        output_string->ReserveAdditionalBytes(bytes_decoded_this_chunk
844                                              + target_bytes_remaining);
845      }
846      output_string->append(
847          decoded_target_.data() + initial_decoded_target_size_,
848          bytes_decoded_this_chunk);
849    }
850  }
851
852 private:
853  const string& decoded_target_;
854  size_t initial_decoded_target_size_;
855};
856
857}  // anonymous namespace
858
859bool VCDiffStreamingDecoderImpl::DecodeChunk(
860    const char* data,
861    size_t len,
862    OutputStringInterface* output_string) {
863  if (!start_decoding_was_called_) {
864    LOG(DFATAL) << "DecodeChunk() called without StartDecoding()" << LOG_ENDL;
865    Reset();
866    return false;
867  }
868  ParseableChunk parseable_chunk(data, len);
869  if (!unparsed_bytes_.empty()) {
870    unparsed_bytes_.append(data, len);
871    parseable_chunk.SetDataBuffer(unparsed_bytes_.data(),
872                                  unparsed_bytes_.size());
873  }
874  TrackNewOutputText output_tracker(decoded_target_);
875  VCDiffResult result = ReadDeltaFileHeader(&parseable_chunk);
876  if (RESULT_SUCCESS == result) {
877    result = ReadCustomCodeTable(&parseable_chunk);
878  }
879  if (RESULT_SUCCESS == result) {
880    result = delta_window_.DecodeWindows(&parseable_chunk);
881  }
882  if (RESULT_ERROR == result) {
883    Reset();  // Don't allow further DecodeChunk calls
884    return false;
885  }
886  unparsed_bytes_.assign(parseable_chunk.UnparsedData(),
887                         parseable_chunk.UnparsedSize());
888  output_tracker.AppendNewOutputText(delta_window_.TargetBytesRemaining(),
889                                     output_string);
890  if (!allow_vcd_target()) {
891    // VCD_TARGET will never be used to reference target data beyond the start
892    // of the current window, so throw away any earlier target data.
893    TruncateToBeginningOfWindow();
894  }
895  return true;
896}
897
898// Finishes decoding after all data has been received.  Returns true
899// if decoding of the entire stream was successful.
900bool VCDiffStreamingDecoderImpl::FinishDecoding() {
901  bool success = true;
902  if (!start_decoding_was_called_) {
903    LOG(WARNING) << "FinishDecoding() called before StartDecoding(),"
904                    " or called after DecodeChunk() returned false"
905                 << LOG_ENDL;
906    success = false;
907  } else if (!IsDecodingComplete()) {
908    LOG(ERROR) << "FinishDecoding() called before parsing entire"
909                  " delta file window" << LOG_ENDL;
910    success = false;
911  }
912  // Reset the object state for the next decode operation
913  Reset();
914  return success;
915}
916
917bool VCDiffStreamingDecoderImpl::TargetWindowWouldExceedSizeLimits(
918    size_t window_size) const {
919  if (window_size > maximum_target_window_size_) {
920    LOG(ERROR) << "Length of target window (" << window_size
921               << ") exceeds limit of " << maximum_target_window_size_
922               << " bytes" << LOG_ENDL;
923    return true;
924  }
925  if (HasPlannedTargetFileSize()) {
926    // The logical expression to check would be:
927    //
928    //   total_of_target_window_sizes_ + window_size > planned_target_file_size_
929    //
930    // but the addition might cause an integer overflow if target_bytes_to_add
931    // is very large.  So it is better to check target_bytes_to_add against
932    // the remaining planned target bytes.
933    size_t remaining_planned_target_file_size =
934        planned_target_file_size_ - total_of_target_window_sizes_;
935    if (window_size > remaining_planned_target_file_size) {
936      LOG(ERROR) << "Length of target window (" << window_size
937                 << " bytes) plus previous windows ("
938                 << total_of_target_window_sizes_
939                 << " bytes) would exceed planned size of "
940                 << planned_target_file_size_ << " bytes" << LOG_ENDL;
941      return true;
942    }
943  }
944  size_t remaining_maximum_target_bytes =
945      maximum_target_file_size_ - total_of_target_window_sizes_;
946  if (window_size > remaining_maximum_target_bytes) {
947    LOG(ERROR) << "Length of target window (" << window_size
948               << " bytes) plus previous windows ("
949               << total_of_target_window_sizes_
950               << " bytes) would exceed maximum target file size of "
951               << maximum_target_file_size_ << " bytes" << LOG_ENDL;
952    return true;
953  }
954  return false;
955}
956
957// *** Methods for VCDiffDeltaFileWindow
958
959void VCDiffDeltaFileWindow::Reset() {
960  found_header_ = false;
961
962  // Mark the start of the current target window.
963  target_window_start_pos_ = parent_ ? parent_->decoded_target()->size() : 0U;
964  target_window_length_ = 0;
965
966  source_segment_ptr_ = NULL;
967  source_segment_length_ = 0;
968
969  instructions_and_sizes_.Invalidate();
970  data_for_add_and_run_.Invalidate();
971  addresses_for_copy_.Invalidate();
972
973  interleaved_bytes_expected_ = 0;
974
975  has_checksum_ = false;
976  expected_checksum_ = 0;
977}
978
979VCDiffResult VCDiffDeltaFileWindow::SetUpWindowSections(
980    VCDiffHeaderParser* header_parser) {
981  size_t add_and_run_data_length = 0;
982  size_t instructions_and_sizes_length = 0;
983  size_t addresses_length = 0;
984  if (!header_parser->ParseSectionLengths(has_checksum_,
985                                          &add_and_run_data_length,
986                                          &instructions_and_sizes_length,
987                                          &addresses_length,
988                                          &expected_checksum_)) {
989    return header_parser->GetResult();
990  }
991  if (parent_->AllowInterleaved() &&
992      (add_and_run_data_length == 0) &&
993      (addresses_length == 0)) {
994    // The interleaved format is being used.
995    interleaved_bytes_expected_ =
996        static_cast<int>(instructions_and_sizes_length);
997    UpdateInterleavedSectionPointers(header_parser->UnparsedData(),
998                                     header_parser->End());
999  } else {
1000    // If interleaved format is not used, then the whole window contents
1001    // must be available before decoding can begin.  If only part of
1002    // the current window is available, then report end of data
1003    // and re-parse the whole header when DecodeChunk() is called again.
1004    if (header_parser->UnparsedSize() < (add_and_run_data_length +
1005                                         instructions_and_sizes_length +
1006                                         addresses_length)) {
1007      return RESULT_END_OF_DATA;
1008    }
1009    data_for_add_and_run_.Init(header_parser->UnparsedData(),
1010                               add_and_run_data_length);
1011    instructions_and_sizes_.Init(data_for_add_and_run_.End(),
1012                                 instructions_and_sizes_length);
1013    addresses_for_copy_.Init(instructions_and_sizes_.End(), addresses_length);
1014    if (addresses_for_copy_.End() != header_parser->EndOfDeltaWindow()) {
1015      LOG(ERROR) << "The end of the instructions section "
1016                     "does not match the end of the delta window" << LOG_ENDL;
1017      return RESULT_ERROR;
1018    }
1019  }
1020  reader_.Init(instructions_and_sizes_.UnparsedDataAddr(),
1021               instructions_and_sizes_.End());
1022  return RESULT_SUCCESS;
1023}
1024
1025// Here are the elements of the delta window header to be parsed,
1026// from section 4 of the RFC:
1027//
1028//     Window1
1029//         Win_Indicator                            - byte
1030//         [Source segment size]                    - integer
1031//         [Source segment position]                - integer
1032//         The delta encoding of the target window
1033//             Length of the delta encoding         - integer
1034//             The delta encoding
1035//                 Size of the target window        - integer
1036//                 Delta_Indicator                  - byte
1037//                 Length of data for ADDs and RUNs - integer
1038//                 Length of instructions and sizes - integer
1039//                 Length of addresses for COPYs    - integer
1040//                 Data section for ADDs and RUNs   - array of bytes
1041//                 Instructions and sizes section   - array of bytes
1042//                 Addresses section for COPYs      - array of bytes
1043//
1044VCDiffResult VCDiffDeltaFileWindow::ReadHeader(
1045    ParseableChunk* parseable_chunk) {
1046  std::string* decoded_target = parent_->decoded_target();
1047  VCDiffHeaderParser header_parser(parseable_chunk->UnparsedData(),
1048                                   parseable_chunk->End());
1049  size_t source_segment_position = 0;
1050  unsigned char win_indicator = 0;
1051  if (!header_parser.ParseWinIndicatorAndSourceSegment(
1052          parent_->dictionary_size(),
1053          decoded_target->size(),
1054          parent_->allow_vcd_target(),
1055          &win_indicator,
1056          &source_segment_length_,
1057          &source_segment_position)) {
1058    return header_parser.GetResult();
1059  }
1060  has_checksum_ = parent_->AllowChecksum() && (win_indicator & VCD_CHECKSUM);
1061  if (!header_parser.ParseWindowLengths(&target_window_length_)) {
1062    return header_parser.GetResult();
1063  }
1064  if (parent_->TargetWindowWouldExceedSizeLimits(target_window_length_)) {
1065    // An error has been logged by TargetWindowWouldExceedSizeLimits().
1066    return RESULT_ERROR;
1067  }
1068  header_parser.ParseDeltaIndicator();
1069  VCDiffResult setup_return_code = SetUpWindowSections(&header_parser);
1070  if (RESULT_SUCCESS != setup_return_code) {
1071    return setup_return_code;
1072  }
1073  // Reserve enough space in the output string for the current target window.
1074  decoded_target->reserve(target_window_start_pos_ + target_window_length_);
1075  // Get a pointer to the start of the source segment.
1076  if (win_indicator & VCD_SOURCE) {
1077    source_segment_ptr_ = parent_->dictionary_ptr() + source_segment_position;
1078  } else if (win_indicator & VCD_TARGET) {
1079    // This assignment must happen after the reserve().
1080    // decoded_target should not be resized again while processing this window,
1081    // so source_segment_ptr_ should remain valid.
1082    source_segment_ptr_ = decoded_target->data() + source_segment_position;
1083  }
1084  // The whole window header was found and parsed successfully.
1085  found_header_ = true;
1086  parseable_chunk->Advance(header_parser.ParsedSize());
1087  parent_->AddToTotalTargetWindowSize(target_window_length_);
1088  return RESULT_SUCCESS;
1089}
1090
1091void VCDiffDeltaFileWindow::UpdateInstructionPointer(
1092    ParseableChunk* parseable_chunk) {
1093  if (IsInterleaved()) {
1094    size_t bytes_parsed = instructions_and_sizes_.ParsedSize();
1095    // Reduce expected instruction segment length by bytes parsed
1096    interleaved_bytes_expected_ -= static_cast<int>(bytes_parsed);
1097    parseable_chunk->Advance(bytes_parsed);
1098  }
1099}
1100
1101inline size_t VCDiffDeltaFileWindow::TargetBytesDecoded() {
1102  return parent_->decoded_target()->size() - target_window_start_pos_;
1103}
1104
1105size_t VCDiffDeltaFileWindow::TargetBytesRemaining() {
1106  if (target_window_length_ == 0) {
1107    // There is no window being decoded at present
1108    return 0;
1109  } else {
1110    return target_window_length_ - TargetBytesDecoded();
1111  }
1112}
1113
1114inline void VCDiffDeltaFileWindow::CopyBytes(const char* data, size_t size) {
1115  parent_->decoded_target()->append(data, size);
1116}
1117
1118inline void VCDiffDeltaFileWindow::RunByte(unsigned char byte, size_t size) {
1119  parent_->decoded_target()->append(size, byte);
1120}
1121
1122VCDiffResult VCDiffDeltaFileWindow::DecodeAdd(size_t size) {
1123  if (size > data_for_add_and_run_.UnparsedSize()) {
1124    return RESULT_END_OF_DATA;
1125  }
1126  // Write the next "size" data bytes
1127  CopyBytes(data_for_add_and_run_.UnparsedData(), size);
1128  data_for_add_and_run_.Advance(size);
1129  return RESULT_SUCCESS;
1130}
1131
1132VCDiffResult VCDiffDeltaFileWindow::DecodeRun(size_t size) {
1133  if (data_for_add_and_run_.Empty()) {
1134    return RESULT_END_OF_DATA;
1135  }
1136  // Write "size" copies of the next data byte
1137  RunByte(*data_for_add_and_run_.UnparsedData(), size);
1138  data_for_add_and_run_.Advance(1);
1139  return RESULT_SUCCESS;
1140}
1141
1142VCDiffResult VCDiffDeltaFileWindow::DecodeCopy(size_t size,
1143                                               unsigned char mode) {
1144  // Keep track of the number of target bytes decoded as a local variable
1145  // to avoid recalculating it each time it is needed.
1146  size_t target_bytes_decoded = TargetBytesDecoded();
1147  const VCDAddress here_address =
1148      static_cast<VCDAddress>(source_segment_length_ + target_bytes_decoded);
1149  const VCDAddress decoded_address = parent_->addr_cache()->DecodeAddress(
1150      here_address,
1151      mode,
1152      addresses_for_copy_.UnparsedDataAddr(),
1153      addresses_for_copy_.End());
1154  switch (decoded_address) {
1155    case RESULT_ERROR:
1156      LOG(ERROR) << "Unable to decode address for COPY" << LOG_ENDL;
1157      return RESULT_ERROR;
1158    case RESULT_END_OF_DATA:
1159      return RESULT_END_OF_DATA;
1160    default:
1161      if ((decoded_address < 0) || (decoded_address > here_address)) {
1162        LOG(DFATAL) << "Internal error: unexpected address " << decoded_address
1163                    << " returned from DecodeAddress, with here_address = "
1164                    << here_address << LOG_ENDL;
1165        return RESULT_ERROR;
1166      }
1167      break;
1168  }
1169  size_t address = static_cast<size_t>(decoded_address);
1170  if ((address + size) <= source_segment_length_) {
1171    // Copy all data from source segment
1172    CopyBytes(&source_segment_ptr_[address], size);
1173    return RESULT_SUCCESS;
1174  }
1175  // Copy some data from target window...
1176  if (address < source_segment_length_) {
1177    // ... plus some data from source segment
1178    const size_t partial_copy_size = source_segment_length_ - address;
1179    CopyBytes(&source_segment_ptr_[address], partial_copy_size);
1180    target_bytes_decoded += partial_copy_size;
1181    address += partial_copy_size;
1182    size -= partial_copy_size;
1183  }
1184  address -= source_segment_length_;
1185  // address is now based at start of target window
1186  const char* const target_segment_ptr = parent_->decoded_target()->data() +
1187                                         target_window_start_pos_;
1188  while (size > (target_bytes_decoded - address)) {
1189    // Recursive copy that extends into the yet-to-be-copied target data
1190    const size_t partial_copy_size = target_bytes_decoded - address;
1191    CopyBytes(&target_segment_ptr[address], partial_copy_size);
1192    target_bytes_decoded += partial_copy_size;
1193    address += partial_copy_size;
1194    size -= partial_copy_size;
1195  }
1196  CopyBytes(&target_segment_ptr[address], size);
1197  return RESULT_SUCCESS;
1198}
1199
1200int VCDiffDeltaFileWindow::DecodeBody(ParseableChunk* parseable_chunk) {
1201  if (IsInterleaved() && (instructions_and_sizes_.UnparsedData()
1202                              != parseable_chunk->UnparsedData())) {
1203    LOG(DFATAL) << "Internal error: interleaved format is used, but the"
1204                   " input pointer does not point to the instructions section"
1205                << LOG_ENDL;
1206    return RESULT_ERROR;
1207  }
1208  while (TargetBytesDecoded() < target_window_length_) {
1209    int32_t decoded_size = VCD_INSTRUCTION_ERROR;
1210    unsigned char mode = 0;
1211    VCDiffInstructionType instruction =
1212        reader_.GetNextInstruction(&decoded_size, &mode);
1213    switch (instruction) {
1214      case VCD_INSTRUCTION_END_OF_DATA:
1215        UpdateInstructionPointer(parseable_chunk);
1216        return RESULT_END_OF_DATA;
1217      case VCD_INSTRUCTION_ERROR:
1218        return RESULT_ERROR;
1219      default:
1220        break;
1221    }
1222    const size_t size = static_cast<size_t>(decoded_size);
1223    // The value of "size" itself could be enormous (say, INT32_MAX)
1224    // so check it individually against the limit to protect against
1225    // overflow when adding it to something else.
1226    if ((size > target_window_length_) ||
1227        ((size + TargetBytesDecoded()) > target_window_length_)) {
1228      LOG(ERROR) << VCDiffInstructionName(instruction)
1229                 << " with size " << size
1230                 << " plus existing " << TargetBytesDecoded()
1231                 << " bytes of target data exceeds length of target"
1232                    " window (" << target_window_length_ << " bytes)"
1233                 << LOG_ENDL;
1234      return RESULT_ERROR;
1235    }
1236    VCDiffResult result = RESULT_SUCCESS;
1237    switch (instruction) {
1238      case VCD_ADD:
1239        result = DecodeAdd(size);
1240        break;
1241      case VCD_RUN:
1242        result = DecodeRun(size);
1243        break;
1244      case VCD_COPY:
1245        result = DecodeCopy(size, mode);
1246        break;
1247      default:
1248        LOG(DFATAL) << "Unexpected instruction type " << instruction
1249                    << "in opcode stream" << LOG_ENDL;
1250        return RESULT_ERROR;
1251    }
1252    switch (result) {
1253      case RESULT_END_OF_DATA:
1254        reader_.UnGetInstruction();
1255        UpdateInstructionPointer(parseable_chunk);
1256        return RESULT_END_OF_DATA;
1257      case RESULT_ERROR:
1258        return RESULT_ERROR;
1259      case RESULT_SUCCESS:
1260        break;
1261    }
1262  }
1263  if (TargetBytesDecoded() != target_window_length_) {
1264    LOG(ERROR) << "Decoded target window size (" << TargetBytesDecoded()
1265               << " bytes) does not match expected size ("
1266               << target_window_length_ << " bytes)" << LOG_ENDL;
1267    return RESULT_ERROR;
1268  }
1269  const char* const target_window_start =
1270      parent_->decoded_target()->data() + target_window_start_pos_;
1271  if (has_checksum_ &&
1272      (ComputeAdler32(target_window_start, target_window_length_)
1273           != expected_checksum_)) {
1274    LOG(ERROR) << "Target data does not match checksum; this could mean "
1275                  "that the wrong dictionary was used" << LOG_ENDL;
1276    return RESULT_ERROR;
1277  }
1278  if (!instructions_and_sizes_.Empty()) {
1279    LOG(ERROR) << "Excess instructions and sizes left over "
1280                  "after decoding target window" << LOG_ENDL;
1281      return RESULT_ERROR;
1282  }
1283  if (!IsInterleaved()) {
1284    // Standard format is being used, with three separate sections for the
1285    // instructions, data, and addresses.
1286    if (!data_for_add_and_run_.Empty()) {
1287      LOG(ERROR) << "Excess ADD/RUN data left over "
1288                    "after decoding target window" << LOG_ENDL;
1289        return RESULT_ERROR;
1290    }
1291    if (!addresses_for_copy_.Empty()) {
1292      LOG(ERROR) << "Excess COPY addresses left over "
1293                    "after decoding target window" << LOG_ENDL;
1294        return RESULT_ERROR;
1295    }
1296    // Reached the end of the window.  Update the ParseableChunk to point to the
1297    // end of the addresses section, which is the last section in the window.
1298    parseable_chunk->SetPosition(addresses_for_copy_.End());
1299  } else {
1300    // Interleaved format is being used.
1301    UpdateInstructionPointer(parseable_chunk);
1302  }
1303  return RESULT_SUCCESS;
1304}
1305
1306VCDiffResult VCDiffDeltaFileWindow::DecodeWindows(
1307    ParseableChunk* parseable_chunk) {
1308  if (!parent_) {
1309    LOG(DFATAL) << "Internal error: VCDiffDeltaFileWindow::DecodeWindows() "
1310                   "called before VCDiffDeltaFileWindow::Init()" << LOG_ENDL;
1311    return RESULT_ERROR;
1312  }
1313  while (!parseable_chunk->Empty()) {
1314    if (!found_header_) {
1315      switch (ReadHeader(parseable_chunk)) {
1316        case RESULT_END_OF_DATA:
1317          return RESULT_END_OF_DATA;
1318        case RESULT_ERROR:
1319          return RESULT_ERROR;
1320        default:
1321          // Reset address cache between windows (RFC section 5.1)
1322          if (!parent_->addr_cache()->Init()) {
1323            LOG(DFATAL) << "Error initializing address cache" << LOG_ENDL;
1324            return RESULT_ERROR;
1325          }
1326      }
1327    } else {
1328      // We are resuming a window that was partially decoded before a
1329      // RESULT_END_OF_DATA was returned.  This can only happen on the first
1330      // loop iteration, and only if the interleaved format is enabled and used.
1331      if (!IsInterleaved()) {
1332        LOG(DFATAL) << "Internal error: Resumed decoding of a delta file window"
1333                       " when interleaved format is not being used" << LOG_ENDL;
1334        return RESULT_ERROR;
1335      }
1336      UpdateInterleavedSectionPointers(parseable_chunk->UnparsedData(),
1337                                       parseable_chunk->End());
1338      reader_.UpdatePointers(instructions_and_sizes_.UnparsedDataAddr(),
1339                             instructions_and_sizes_.End());
1340    }
1341    switch (DecodeBody(parseable_chunk)) {
1342      case RESULT_END_OF_DATA:
1343        if (MoreDataExpected()) {
1344          return RESULT_END_OF_DATA;
1345        } else {
1346          LOG(ERROR) << "End of data reached while decoding VCDIFF delta file"
1347                     << LOG_ENDL;
1348          // fall through to RESULT_ERROR case
1349        }
1350      case RESULT_ERROR:
1351        return RESULT_ERROR;
1352      default:
1353        break;  // DecodeBody succeeded
1354    }
1355    // Get ready to read a new delta window
1356    Reset();
1357    if (parent_->ReachedPlannedTargetFileSize()) {
1358      // Found exactly the length we expected.  Stop decoding.
1359      return RESULT_SUCCESS;
1360    }
1361  }
1362  return RESULT_SUCCESS;
1363}
1364
1365// *** Methods for VCDiffStreamingDecoder
1366
1367VCDiffStreamingDecoder::VCDiffStreamingDecoder()
1368: impl_(new VCDiffStreamingDecoderImpl) { }
1369
1370VCDiffStreamingDecoder::~VCDiffStreamingDecoder() { delete impl_; }
1371
1372void VCDiffStreamingDecoder::StartDecoding(const char* source, size_t len) {
1373  impl_->StartDecoding(source, len);
1374}
1375
1376bool VCDiffStreamingDecoder::DecodeChunkToInterface(
1377    const char* data,
1378    size_t len,
1379    OutputStringInterface* output_string) {
1380  return impl_->DecodeChunk(data, len, output_string);
1381}
1382
1383bool VCDiffStreamingDecoder::FinishDecoding() {
1384  return impl_->FinishDecoding();
1385}
1386
1387bool VCDiffStreamingDecoder::SetMaximumTargetFileSize(
1388    size_t new_maximum_target_file_size) {
1389  return impl_->SetMaximumTargetFileSize(new_maximum_target_file_size);
1390}
1391
1392bool VCDiffStreamingDecoder::SetMaximumTargetWindowSize(
1393    size_t new_maximum_target_window_size) {
1394  return impl_->SetMaximumTargetWindowSize(new_maximum_target_window_size);
1395}
1396
1397void VCDiffStreamingDecoder::SetAllowVcdTarget(bool allow_vcd_target) {
1398  impl_->SetAllowVcdTarget(allow_vcd_target);
1399}
1400
1401bool VCDiffDecoder::DecodeToInterface(const char* dictionary_ptr,
1402                                      size_t dictionary_size,
1403                                      const string& encoding,
1404                                      OutputStringInterface* target) {
1405  target->clear();
1406  decoder_.StartDecoding(dictionary_ptr, dictionary_size);
1407  if (!decoder_.DecodeChunkToInterface(encoding.data(),
1408                                       encoding.size(),
1409                                       target)) {
1410    return false;
1411  }
1412  return decoder_.FinishDecoding();
1413}
1414
1415}  // namespace open_vcdiff
1416