1311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Copyright 2007 Google Inc.
2311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Author: Lincoln Smith
3311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//
4311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Licensed under the Apache License, Version 2.0 (the "License");
5311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// you may not use this file except in compliance with the License.
6311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// You may obtain a copy of the License at
7311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//
8311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//      http://www.apache.org/licenses/LICENSE-2.0
9311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//
10311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Unless required by applicable law or agreed to in writing, software
11311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// distributed under the License is distributed on an "AS IS" BASIS,
12311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// See the License for the specific language governing permissions and
14311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// limitations under the License.
15311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//
16311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Implementation of the Address Cache and Address Encoding
17311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// algorithms described in sections 5.1 - 5.4 of RFC 3284 -
18311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// The VCDIFF Generic Differencing and Compression Data Format.
19311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html
20311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//
21311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Assumptions:
22311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//   * The VCDAddress type is large enough to hold any offset within
23311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//     the source and target windows.  The limit (for int32_t) is 2^31-1 bytes.
24311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//     The source (dictionary) should not approach this size limit;
25311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//     to compress a target file that is larger than
26311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//     INT_MAX - (dictionary size) bytes, the encoder must
27311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//     break it up into multiple target windows.
28311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
29311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff#include <config.h>
30311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff#include "addrcache.h"
31311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff#include "logging.h"
32311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff#include "varint_bigendian.h"
33311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff#include "vcdiff_defs.h"  // RESULT_ERROR
34311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
35311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffnamespace open_vcdiff {
36311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
37311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// The constructor does not initialize near_addresses_ and same_addresses_.
38311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Therefore, Init() must be called before any other method can be used.
39311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//
40311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Arguments:
41311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//   near_cache_size: Size of the NEAR cache (number of 4-byte integers)
42311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//   same_cache_size: Size of the SAME cache (number of blocks of
43311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//                                            256 4-byte integers per block)
44311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//     Because the mode is expressed as a byte value,
45311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//     near_cache_size + same_cache_size should not exceed 254.
46311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//
47311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffVCDiffAddressCache::VCDiffAddressCache(int near_cache_size,
48311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                                       int same_cache_size)
49311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    : near_cache_size_(near_cache_size),
50311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      same_cache_size_(same_cache_size),
51311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      next_slot_(0) { }
52311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
53311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffVCDiffAddressCache::VCDiffAddressCache()
54311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    : near_cache_size_(kDefaultNearCacheSize),
55311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      same_cache_size_(kDefaultSameCacheSize),
56311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      next_slot_(0) { }
57311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
58311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Sets up data structures needed to call other methods.  Operations that may
59311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// fail at runtime (for example, validating the provided near_cache_size_ and
60311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// same_cache_size_ parameters against their maximum allowed values) are
61311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// confined to this routine in order to guarantee that the class constructor
62311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// will never fail.  Other methods (except the destructor) cannot be invoked
63311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// until this method has been called successfully.  After the object has been
64311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// initialized and used, Init() can be called again to reset it to its initial
65311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// state.
66311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//
67311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Return value: "true" if initialization succeeded, "false" if it failed.
68311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//     No other method except the destructor may be invoked if this function
69311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//     returns false.  The caller is responsible for checking the return value
70311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//     and providing an exit path in case of error.
71311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//
72311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffbool VCDiffAddressCache::Init() {
73311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // The mode is expressed as a byte value, so there is only room for 256 modes,
74311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // including the two non-cached modes (SELF and HERE).  Do not allow a larger
75311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // number of modes to be defined.  We do a separate sanity check for
76311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // near_cache_size_ and same_cache_size_ because adding them together can
77311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // cause an integer overflow if each is set to, say, INT_MAX.
78311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  if ((near_cache_size_ > (VCD_MAX_MODES - 2)) || (near_cache_size_ < 0)) {
79732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com    VCD_ERROR << "Near cache size " << near_cache_size_ << " is invalid"
80732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com              << VCD_ENDL;
81311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    return false;
82311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
83311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  if ((same_cache_size_ > (VCD_MAX_MODES - 2)) || (same_cache_size_ < 0)) {
84732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com    VCD_ERROR << "Same cache size " << same_cache_size_ << " is invalid"
85732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com              << VCD_ENDL;
86311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    return false;
87311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
88311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  if ((near_cache_size_ + same_cache_size_) > VCD_MAX_MODES - 2) {
89732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com    VCD_ERROR << "Using near cache size " << near_cache_size_
90732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com              << " and same cache size " << same_cache_size_
91732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com              << " would exceed maximum number of COPY modes ("
92732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com              << VCD_MAX_MODES << ")" << VCD_ENDL;
93311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    return false;
94311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
95311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  if (near_cache_size_ > 0) {
96311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    near_addresses_.assign(near_cache_size_, 0);
97311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
98311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  if (same_cache_size_ > 0) {
99311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    same_addresses_.assign(same_cache_size_ * 256, 0);
100311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
101311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  next_slot_ = 0;  // in case Init() is called a second time to reinit
102311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  return true;
103311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff}
104311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
105311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// This method will be called whenever an address is calculated for an
106311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// encoded or decoded COPY instruction, and will update the contents
107311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// of the SAME and NEAR caches.  It is vital that the use of
108311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// UpdateCache (called cache_update in the RFC examples) exactly match
109311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// the RFC standard, and that the same caching logic be used in the
110311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// decoder as in the encoder, in order for the decoded addresses to
111311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// match.
112311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//
113311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Argument:
114311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//   address: This must be a valid address between 0 and
115311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//       (source window size + target window size).  It is assumed that
116311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//       these bounds have been checked before calling UpdateCache.
117311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//
118311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffvoid VCDiffAddressCache::UpdateCache(VCDAddress address) {
119311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  if (near_cache_size_ > 0) {
120311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    near_addresses_[next_slot_] = address;
121311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    next_slot_ = (next_slot_ + 1) % near_cache_size_;
122311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
123311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  if (same_cache_size_ > 0) {
124311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    same_addresses_[address % (same_cache_size_ * 256)] = address;
125311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
126311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff}
127311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
128311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Determines the address mode that yields the most compact encoding
129311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// of the given address value, writes the encoded address into the
130311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// address stream, and returns the mode used.  The most compact encoding
131311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// is found by looking for the numerically lowest encoded address.
132311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// The Init() function must already have been called.
133311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//
134311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Arguments:
135311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//   address: The address to be encoded.  Must be a non-negative integer
136311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//       between 0 and (here_address - 1).
137311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//   here_address: The current location in the target data (i.e., the
138311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//       position just after the last encoded value.)  Must be non-negative.
139311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//   encoded_addr: Points to an VCDAddress that will be replaced
140311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//       with the encoded representation of address.
141311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//       If WriteAddressAsVarintForMode returns true when passed
142311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//       the return value, then encoded_addr should be written
143311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//       into the delta file as a variable-length integer (Varint);
144311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//       otherwise, it should be written as a byte (unsigned char).
145311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//
146311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Return value: A mode value between 0 and 255.  The mode will tell
147311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//       how to interpret the next value in the address stream.
148311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//       The values 0 and 1 correspond to SELF and HERE addressing.
149311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//
150311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// The function is guaranteed to succeed unless the conditions on the arguments
151732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com// have not been met, in which case a VCD_DFATAL message will be produced,
152311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// 0 will be returned, and *encoded_addr will be replaced with 0.
153311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//
154311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffunsigned char VCDiffAddressCache::EncodeAddress(VCDAddress address,
155311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                                                VCDAddress here_address,
156311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                                                VCDAddress* encoded_addr) {
157311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  if (address < 0) {
158732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com    VCD_DFATAL << "EncodeAddress was passed a negative address: "
159732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com               << address << VCD_ENDL;
160311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    *encoded_addr = 0;
161311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    return 0;
162311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
163311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  if (address >= here_address) {
164732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com    VCD_DFATAL << "EncodeAddress was called with address (" << address
165732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com               << ") < here_address (" << here_address << ")" << VCD_ENDL;
166311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    *encoded_addr = 0;
167311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    return 0;
168311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
169311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // Try using the SAME cache.  This method, if available, always
170311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // results in the smallest encoding and takes priority over other modes.
171311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  if (same_cache_size() > 0) {
172311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    const VCDAddress same_cache_pos =
173311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff        address % (same_cache_size() * 256);
174311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    if (SameAddress(same_cache_pos) == address) {
175311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      // This is the only mode for which an single byte will be written
176311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      // to the address stream instead of a variable-length integer.
177311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      UpdateCache(address);
178311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      *encoded_addr = same_cache_pos % 256;
179311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      return FirstSameMode() + (same_cache_pos / 256);  // SAME mode
180311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    }
181311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
182311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
183311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // Try SELF mode
184311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  unsigned char best_mode = VCD_SELF_MODE;
185311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  VCDAddress best_encoded_address = address;
186311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
187311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // Try HERE mode
188311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  {
189311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    const VCDAddress here_encoded_address = here_address - address;
190311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    if (here_encoded_address < best_encoded_address) {
191311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      best_mode = VCD_HERE_MODE;
192311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      best_encoded_address = here_encoded_address;
193311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    }
194311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
195311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
196311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // Try using the NEAR cache
197311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  for (int i = 0; i < near_cache_size(); ++i) {
198311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    const VCDAddress near_encoded_address = address - NearAddress(i);
199311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    if ((near_encoded_address >= 0) &&
200311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff        (near_encoded_address < best_encoded_address)) {
201311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      best_mode = FirstNearMode() + i;
202311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      best_encoded_address = near_encoded_address;
203311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    }
204311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
205311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
206311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  UpdateCache(address);
207311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  *encoded_addr = best_encoded_address;
208311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  return best_mode;
209311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff}
210311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
211311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Increments *byte_pointer and returns the byte it pointed to before the
212311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// increment.  The caller must check bounds to ensure that *byte_pointer
213311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// points to a valid address in memory.
214311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffstatic unsigned char ParseByte(const char** byte_pointer) {
215311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  unsigned char byte_value = static_cast<unsigned char>(**byte_pointer);
216311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  ++(*byte_pointer);
217311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  return byte_value;
218311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff}
219311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
220311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Checks the given decoded address for validity.  Returns true if the
221311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// address is valid; otherwise, prints an error message to the log and
222311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// returns false.
223311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffstatic bool IsDecodedAddressValid(VCDAddress decoded_address,
224311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                                  VCDAddress here_address) {
225311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  if (decoded_address < 0) {
226732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com    VCD_ERROR << "Decoded address " << decoded_address << " is invalid"
227732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com              << VCD_ENDL;
228311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    return false;
229311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  } else if (decoded_address >= here_address) {
230732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com    VCD_ERROR << "Decoded address (" << decoded_address
231732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com              << ") is beyond location in target file (" << here_address
232732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com              << ")" << VCD_ENDL;
233311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    return false;
234311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
235311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  return true;
236311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff}
237311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
238311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Interprets the next value in the address_stream using the provided mode,
239311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// which may need to access the SAME or NEAR address cache.  Returns the
240311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// decoded address.
241311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// The Init() function must already have been called.
242311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//
243311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Arguments:
244311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//   here_address: The current location in the source + target data (i.e., the
245311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//       location into which the COPY instruction will copy.)  By definition,
246311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//       all addresses between 0 and (here_address - 1) are valid, and
247311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//       any other address is invalid.
248311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//   mode: A byte value between 0 and (near_cache_size_ + same_cache_size_ + 1)
249311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//       which tells how to interpret the next value in the address stream.
250311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//       The values 0 and 1 correspond to SELF and HERE addressing.
251311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//       The validity of "mode" should already have been checked before
252311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//       calling this function.
253311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//   address_stream: Points to a pointer holding the position
254311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//       in the "Addresses section for COPYs" part of the input data.
255311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//       That section must already have been uncompressed
256311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//       using a secondary decompressor (if necessary.)
257311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//       This is an IN/OUT argument; the value of *address_stream will be
258311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//       incremented by the size of an integer, or (if the SAME cache
259311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//       was used) by the size of a byte (1).
260311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//   address_stream_end: Points to the position just after the end of
261311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//       the address stream buffer.  All addresses between *address_stream
262311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//       and address_stream_end should contain valid address data.
263311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//
264311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Return value: If the input conditions were met, and the address section
265311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//     of the input data contains properly encoded addresses that match
266311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//     the instructions section, then an integer between 0 and here_address - 1
267311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//     will be returned, representing the address from which data should
268311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//     be copied from the source or target window into the output stream.
269311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//     If an invalid address value is found in address_stream, then
270311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//     RESULT_ERROR will be returned.  If the limit address_stream_end
271311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//     is reached before the address can be decoded, then
272311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//     RESULT_END_OF_DATA will be returned.  If more streamed data
273311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//     is expected, this means that the consumer should block and wait
274311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//     for more data before continuing to decode.  If no more data is expected,
275311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//     this return value signals an error condition.
276311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff//
277311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffVCDAddress VCDiffAddressCache::DecodeAddress(VCDAddress here_address,
278311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                                             unsigned char mode,
279311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                                             const char** address_stream,
280311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                                             const char* address_stream_end) {
281311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  if (here_address < 0) {
282732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com    VCD_DFATAL << "DecodeAddress was passed a negative value"
283732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com                  " for here_address: " << here_address << VCD_ENDL;
284311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    return RESULT_ERROR;
285311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
286311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  const char* new_address_pos = *address_stream;
287311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  if (new_address_pos >= address_stream_end) {
288311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    return RESULT_END_OF_DATA;
289311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
290311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  VCDAddress decoded_address;
291311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  if (IsSameMode(mode)) {
292311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    // SAME mode expects a byte value as the encoded address
293311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    unsigned char encoded_address = ParseByte(&new_address_pos);
294311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    decoded_address = DecodeSameAddress(mode, encoded_address);
295311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  } else {
296311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    // All modes except SAME mode expect a VarintBE as the encoded address
297311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    int32_t encoded_address = VarintBE<int32_t>::Parse(address_stream_end,
298311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff                                                       &new_address_pos);
299311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    switch (encoded_address) {
300311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      case RESULT_ERROR:
301732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com        VCD_ERROR << "Found invalid variable-length integer "
302732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com                     "as encoded address value" << VCD_ENDL;
303311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff        return RESULT_ERROR;
304311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      case RESULT_END_OF_DATA:
305311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff        return RESULT_END_OF_DATA;
306311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      default:
307311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff        break;
308311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    }
309311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    if (IsSelfMode(mode)) {
310311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      decoded_address = DecodeSelfAddress(encoded_address);
311311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    } else if (IsHereMode(mode)) {
312311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      decoded_address = DecodeHereAddress(encoded_address, here_address);
313311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    } else if (IsNearMode(mode)) {
314311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      decoded_address = DecodeNearAddress(mode, encoded_address);
315311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    } else {
316732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com      VCD_DFATAL << "Invalid mode value (" << static_cast<int>(mode)
317732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com                 << ") passed to DecodeAddress; maximum mode value = "
318732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com                 << static_cast<int>(LastMode()) << VCD_ENDL;
319311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff      return RESULT_ERROR;
320311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    }
321311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
322311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  // Check for an out-of-bounds address (corrupt/malicious data)
323311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  if (!IsDecodedAddressValid(decoded_address, here_address)) {
324311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff    return RESULT_ERROR;
325311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  }
326311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  *address_stream = new_address_pos;
327311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  UpdateCache(decoded_address);
328311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff  return decoded_address;
329311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff}
330311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff
331311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff}  // namespace open_vcdiff
332