1// Copyright 2007 Google Inc.
2// Author: Lincoln Smith
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8//      http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15//
16// Classes to implement the Address Cache and Address Encoding
17// algorithms described in sections 5.1 - 5.4 of RFC 3284 -
18// The VCDIFF Generic Differencing and Compression Data Format.
19// The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html
20
21#ifndef OPEN_VCDIFF_ADDRCACHE_H_
22#define OPEN_VCDIFF_ADDRCACHE_H_
23
24#include <config.h>
25#include <vector>
26#include "vcdiff_defs.h"  // VCDAddress
27
28namespace open_vcdiff {
29
30// Implements the "same" and "near" caches
31// as described in RFC 3284, section 5.  The "near" cache allows
32// efficient reuse of one of the last four referenced addresses
33// plus a small offset, and the "same" cache allows efficient reuse
34// of an exact recent address distinguished by its lowest-order bits.
35//
36// NOT threadsafe.
37//
38class VCDiffAddressCache {
39 public:
40  // The default cache sizes specified in the RFC
41  static const int kDefaultNearCacheSize = 4;
42  static const int kDefaultSameCacheSize = 3;
43
44  VCDiffAddressCache(int near_cache_size, int same_cache_size);
45
46  // This version of the constructor uses the default values
47  // kDefaultNearCacheSize and kDefaultSameCacheSize.
48  VCDiffAddressCache();
49
50  // Initializes the object before use.  This method must be called after
51  // constructing a VCDiffAddressCache/ object, before any other method may be
52  // called.  This is because Init() validates near_cache_size_ and
53  // same_cache_size_ before initializing the same and near caches.  After the
54  // object has been initialized and used, Init() can be called again to reset
55  // it to its initial state.
56  //
57  bool Init();
58
59  int near_cache_size() const { return near_cache_size_; }
60
61  int same_cache_size() const { return same_cache_size_; }
62
63  // Returns the first mode number that represents one of the NEAR modes.
64  // The number of NEAR modes is near_cache_size.  Each NEAR mode refers to
65  // an element of the near_addresses_ array, where a recently-referenced
66  // address is stored.
67  //
68  static unsigned char FirstNearMode() {
69    return VCD_FIRST_NEAR_MODE;
70  }
71
72  // Returns the first mode number that represents one of the SAME modes.
73  // The number of SAME modes is same_cache_size.  Each SAME mode refers to
74  // a block of 256 elements of the same_addresses_ array; the lowest-order
75  // 8 bits of the address are used to find the element of this block that
76  // may match the desired address value.
77  //
78  unsigned char FirstSameMode() const {
79    return VCD_FIRST_NEAR_MODE + near_cache_size();
80  }
81
82  // Returns the maximum valid mode number, which happens to be
83  // the last SAME mode.
84  //
85  unsigned char LastMode() const {
86    return FirstSameMode() + same_cache_size() - 1;
87  }
88
89  static unsigned char DefaultLastMode() {
90    return VCD_FIRST_NEAR_MODE
91        + kDefaultNearCacheSize + kDefaultSameCacheSize - 1;
92  }
93
94  // See the definition of enum VCDiffModes in vcdiff_defs.h,
95  // as well as section 5.3 of the RFC, for a description of
96  // each address mode type (SELF, HERE, NEAR, and SAME).
97  static bool IsSelfMode(unsigned char mode) {
98    return mode == VCD_SELF_MODE;
99  }
100
101  static bool IsHereMode(unsigned char mode) {
102    return mode == VCD_HERE_MODE;
103  }
104
105  bool IsNearMode(unsigned char mode) const {
106    return (mode >= FirstNearMode()) && (mode < FirstSameMode());
107  }
108
109  bool IsSameMode(unsigned char mode) const {
110    return (mode >= FirstSameMode()) && (mode <= LastMode());
111  }
112
113  static VCDAddress DecodeSelfAddress(int32_t encoded_address) {
114    return encoded_address;
115  }
116
117  static VCDAddress DecodeHereAddress(int32_t encoded_address,
118                                      VCDAddress here_address) {
119    return here_address - encoded_address;
120  }
121
122  VCDAddress DecodeNearAddress(unsigned char mode,
123                               int32_t encoded_address) const {
124    return NearAddress(mode - FirstNearMode()) + encoded_address;
125  }
126
127  VCDAddress DecodeSameAddress(unsigned char mode,
128                               unsigned char encoded_address) const {
129    return SameAddress(((mode - FirstSameMode()) * 256) + encoded_address);
130  }
131
132  // Returns true if, when using the given mode, an encoded address
133  // should be written to the delta file as a variable-length integer;
134  // returns false if the encoded address should be written
135  // as a byte value (unsigned char).
136  bool WriteAddressAsVarintForMode(unsigned char mode) const {
137    return !IsSameMode(mode);
138  }
139
140  // An accessor for an element of the near_addresses_ array.
141  // No bounds checking is performed; the caller must ensure that
142  // Init() has already been called, and that
143  //     0 <= pos < near_cache_size_
144  //
145  VCDAddress NearAddress(int pos) const {
146    return near_addresses_[pos];
147  }
148
149  // An accessor for an element of the same_addresses_ array.
150  // No bounds checking is performed; the caller must ensure that
151  // Init() has already been called, and that
152  //     0 <= pos < (same_cache_size_ * 256)
153  //
154  VCDAddress SameAddress(int pos) const {
155    return same_addresses_[pos];
156  }
157
158  // This method will be called whenever an address is calculated for an
159  // encoded or decoded COPY instruction, and will update the contents
160  // of the SAME and NEAR caches.
161  //
162  void UpdateCache(VCDAddress address);
163
164  // Determines the address mode that yields the most compact encoding
165  // of the given address value.  The most compact encoding
166  // is found by looking for the numerically lowest encoded address.
167  // Sets *encoded_addr to the encoded representation of the address
168  // and returns the mode used.
169  //
170  // The caller should pass the return value to the method
171  // WriteAddressAsVarintForMode() to determine whether encoded_addr
172  // should be written to the delta file as a variable-length integer
173  // or as a byte (unsigned char).
174  //
175  unsigned char EncodeAddress(VCDAddress address,
176                              VCDAddress here_address,
177                              VCDAddress* encoded_addr);
178
179  // Interprets the next value in the address_stream using the provided mode,
180  // which may need to access the SAME or NEAR address cache.  Returns the
181  // decoded address, or one of the following values:
182  //   RESULT_ERROR: An invalid address value was found in address_stream.
183  //   RESULT_END_OF_DATA: The limit address_stream_end was reached before
184  //       the address could be decoded.  If more streamed data is expected,
185  //       this means that the consumer should block and wait for more data
186  //       before continuing to decode.  If no more data is expected, this
187  //       return value signals an error condition.
188  //
189  // If successful, *address_stream will be incremented past the decoded address
190  // position.  If RESULT_ERROR or RESULT_END_OF_DATA is returned,
191  // then the value of *address_stream will not have changed.
192  //
193  VCDAddress DecodeAddress(VCDAddress here_address,
194                           unsigned char mode,
195                           const char** address_stream,
196                           const char* address_stream_end);
197
198 private:
199  // The number of addresses to be kept in the NEAR cache.
200  const int near_cache_size_;
201  // The number of 256-byte blocks to store in the SAME cache.
202  const int same_cache_size_;
203  // The next position in the NEAR cache to which an address will be written.
204  int       next_slot_;
205  // NEAR cache contents
206  std::vector<VCDAddress> near_addresses_;
207  // SAME cache contents
208  std::vector<VCDAddress> same_addresses_;
209
210  // Making these private avoids implicit copy constructor & assignment operator
211  VCDiffAddressCache(const VCDiffAddressCache&);  // NOLINT
212  void operator=(const VCDiffAddressCache&);
213};
214
215}  // namespace open_vcdiff
216
217#endif  // OPEN_VCDIFF_ADDRCACHE_H_
218