1c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Copyright 2006 Google Inc.
2c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Authors: Sanjay Ghemawat, Jeff Dean, Chandra Chereddi, Lincoln Smith
3c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
4c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Licensed under the Apache License, Version 2.0 (the "License");
5c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// you may not use this file except in compliance with the License.
6c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// You may obtain a copy of the License at
7c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
8c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//      http://www.apache.org/licenses/LICENSE-2.0
9c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
10c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Unless required by applicable law or agreed to in writing, software
11c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// distributed under the License is distributed on an "AS IS" BASIS,
12c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// See the License for the specific language governing permissions and
14c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// limitations under the License.
15c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
16c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#ifndef OPEN_VCDIFF_VCDIFFENGINE_H_
17c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#define OPEN_VCDIFF_VCDIFFENGINE_H_
18c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
19c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include <config.h>
20c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include <stddef.h>  // size_t
21c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include <stdint.h>  // uint32_t
22c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
23c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottnamespace open_vcdiff {
24c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
25c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottclass BlockHash;
26c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottclass OutputStringInterface;
27c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottclass CodeTableWriterInterface;
28c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
29c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// The VCDiffEngine class is used to find the optimal encoding (in terms of COPY
30c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// and ADD instructions) for a given dictionary and target window.  To write the
31c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// instructions for this encoding, it calls the Copy() and Add() methods of the
32c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// code table writer object which is passed as an argument to Encode().
33c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottclass VCDiffEngine {
34c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott public:
35c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // The minimum size of a string match that is worth putting into a COPY
36c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // instruction.  Since this value is more than twice the block size, the
37c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // encoder will always discover a match of this size, no matter whether it is
38c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // aligned on block boundaries in the dictionary text.
39c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  static const size_t kMinimumMatchSize = 32;
40c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
41c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  VCDiffEngine(const char* dictionary, size_t dictionary_size);
42c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
43c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  ~VCDiffEngine();
44c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
45c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Initializes the object before use.
46c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // This method must be called after constructing a VCDiffEngine object,
47c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // and before any other method may be called.  It should not be called
48c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // twice on the same object.
49c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Returns true if initialization succeeded, or false if an error occurred,
50c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // in which case no other method except the destructor may then be used
51c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // on the object.
52c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // The Init() method is the only one allowed to treat hashed_dictionary_
53c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // as non-const.
54c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  bool Init();
55c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
56c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  size_t dictionary_size() const { return dictionary_size_; }
57c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
58c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Main worker function.  Finds the best matches between the dictionary
59c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // (source) and target data, and uses the coder to write a
60c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // delta file window into *diff.
61c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Because it is a const function, many threads
62c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // can call Encode() at once for the same VCDiffEngine object.
63c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // All thread-specific data will be stored in the coder and diff arguments.
64c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // The coder object must have been fully initialized (by calling its Init()
65c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // method, if any) before calling this function.
66c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //
67c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // look_for_target_matches determines whether to look for matches
68c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // within the previously encoded target data, or just within the source
69c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // (dictionary) data.  Please see vcencoder.h for a full explanation
70c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // of this parameter.
71c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  void Encode(const char* target_data,
72c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott              size_t target_size,
73c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott              bool look_for_target_matches,
74c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott              OutputStringInterface* diff,
75c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott              CodeTableWriterInterface* coder) const;
76c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
77c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott private:
78c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  static bool ShouldGenerateCopyInstructionForMatchOfSize(size_t size) {
79c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return size >= kMinimumMatchSize;
80c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
81c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
82c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // The following two functions use templates to produce two different
83c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // versions of the code depending on the value of the option
84c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // look_for_target_matches.  This approach saves a test-and-branch instruction
85c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // within the inner loop of EncodeCopyForBestMatch.
86c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  template<bool look_for_target_matches>
87c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  void EncodeInternal(const char* target_data,
88c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                      size_t target_size,
89c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                      OutputStringInterface* diff,
90c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                      CodeTableWriterInterface* coder) const;
91c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
92c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // If look_for_target_matches is true, then target_hash must point to a valid
93c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // BlockHash object, and cannot be NULL.  If look_for_target_matches is
94c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // false, then the value of target_hash is ignored.
95c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  template<bool look_for_target_matches>
96c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  size_t EncodeCopyForBestMatch(uint32_t hash_value,
97c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                const char* target_candidate_start,
98c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                const char* unencoded_target_start,
99c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                size_t unencoded_target_size,
100c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                const BlockHash* target_hash,
101c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                CodeTableWriterInterface* coder) const;
102c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
103c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  void AddUnmatchedRemainder(const char* unencoded_target_start,
104c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                             size_t unencoded_target_size,
105c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                             CodeTableWriterInterface* coder) const;
106c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
107c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  void FinishEncoding(size_t target_size,
108c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                      OutputStringInterface* diff,
109c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                      CodeTableWriterInterface* coder) const;
110c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
111c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  const char* dictionary_;  // A copy of the dictionary contents
112c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
113c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  const size_t dictionary_size_;
114c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
115c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // A hash that contains one element for every kBlockSize bytes of dictionary_.
116c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // This can be reused to encode many different target strings using the
117c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // same dictionary, without the need to compute the hash values each time.
118c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  const BlockHash* hashed_dictionary_;
119c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
120c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Making these private avoids implicit copy constructor & assignment operator
121c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  VCDiffEngine(const VCDiffEngine&);
122c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  void operator=(const VCDiffEngine&);
123c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott};
124c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
125c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}  // namespace open_vcdiff
126c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
127c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#endif  // OPEN_VCDIFF_VCDIFFENGINE_H_
128