1311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Copyright 2007 Google Inc. 2311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Author: Lincoln Smith 3311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// 4311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Licensed under the Apache License, Version 2.0 (the "License"); 5311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// you may not use this file except in compliance with the License. 6311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// You may obtain a copy of the License at 7311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// 8311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// http://www.apache.org/licenses/LICENSE-2.0 9311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// 10311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Unless required by applicable law or agreed to in writing, software 11311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// distributed under the License is distributed on an "AS IS" BASIS, 12311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// See the License for the specific language governing permissions and 14311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// limitations under the License. 15311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// 16311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Implementation of the Address Cache and Address Encoding 17311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// algorithms described in sections 5.1 - 5.4 of RFC 3284 - 18311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// The VCDIFF Generic Differencing and Compression Data Format. 19311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html 20311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// 21311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Assumptions: 22311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// * The VCDAddress type is large enough to hold any offset within 23311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// the source and target windows. The limit (for int32_t) is 2^31-1 bytes. 24311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// The source (dictionary) should not approach this size limit; 25311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// to compress a target file that is larger than 26311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// INT_MAX - (dictionary size) bytes, the encoder must 27311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// break it up into multiple target windows. 28311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 29311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff#include <config.h> 30311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff#include "addrcache.h" 31311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff#include "logging.h" 32311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff#include "varint_bigendian.h" 33311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff#include "vcdiff_defs.h" // RESULT_ERROR 34311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 35311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffnamespace open_vcdiff { 36311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 37311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// The constructor does not initialize near_addresses_ and same_addresses_. 38311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Therefore, Init() must be called before any other method can be used. 39311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// 40311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Arguments: 41311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// near_cache_size: Size of the NEAR cache (number of 4-byte integers) 42311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// same_cache_size: Size of the SAME cache (number of blocks of 43311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// 256 4-byte integers per block) 44311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Because the mode is expressed as a byte value, 45311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// near_cache_size + same_cache_size should not exceed 254. 46311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// 47311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffVCDiffAddressCache::VCDiffAddressCache(int near_cache_size, 48311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff int same_cache_size) 49311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff : near_cache_size_(near_cache_size), 50311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff same_cache_size_(same_cache_size), 51311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff next_slot_(0) { } 52311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 53311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffVCDiffAddressCache::VCDiffAddressCache() 54311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff : near_cache_size_(kDefaultNearCacheSize), 55311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff same_cache_size_(kDefaultSameCacheSize), 56311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff next_slot_(0) { } 57311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 58311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Sets up data structures needed to call other methods. Operations that may 59311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// fail at runtime (for example, validating the provided near_cache_size_ and 60311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// same_cache_size_ parameters against their maximum allowed values) are 61311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// confined to this routine in order to guarantee that the class constructor 62311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// will never fail. Other methods (except the destructor) cannot be invoked 63311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// until this method has been called successfully. After the object has been 64311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// initialized and used, Init() can be called again to reset it to its initial 65311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// state. 66311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// 67311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Return value: "true" if initialization succeeded, "false" if it failed. 68311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// No other method except the destructor may be invoked if this function 69311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// returns false. The caller is responsible for checking the return value 70311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// and providing an exit path in case of error. 71311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// 72311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffbool VCDiffAddressCache::Init() { 73311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // The mode is expressed as a byte value, so there is only room for 256 modes, 74311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // including the two non-cached modes (SELF and HERE). Do not allow a larger 75311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // number of modes to be defined. We do a separate sanity check for 76311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // near_cache_size_ and same_cache_size_ because adding them together can 77311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // cause an integer overflow if each is set to, say, INT_MAX. 78311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if ((near_cache_size_ > (VCD_MAX_MODES - 2)) || (near_cache_size_ < 0)) { 79732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com VCD_ERROR << "Near cache size " << near_cache_size_ << " is invalid" 80732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com << VCD_ENDL; 81311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return false; 82311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 83311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if ((same_cache_size_ > (VCD_MAX_MODES - 2)) || (same_cache_size_ < 0)) { 84732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com VCD_ERROR << "Same cache size " << same_cache_size_ << " is invalid" 85732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com << VCD_ENDL; 86311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return false; 87311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 88311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if ((near_cache_size_ + same_cache_size_) > VCD_MAX_MODES - 2) { 89732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com VCD_ERROR << "Using near cache size " << near_cache_size_ 90732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com << " and same cache size " << same_cache_size_ 91732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com << " would exceed maximum number of COPY modes (" 92732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com << VCD_MAX_MODES << ")" << VCD_ENDL; 93311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return false; 94311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 95311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if (near_cache_size_ > 0) { 96311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff near_addresses_.assign(near_cache_size_, 0); 97311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 98311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if (same_cache_size_ > 0) { 99311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff same_addresses_.assign(same_cache_size_ * 256, 0); 100311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 101311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff next_slot_ = 0; // in case Init() is called a second time to reinit 102311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return true; 103311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff} 104311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 105311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// This method will be called whenever an address is calculated for an 106311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// encoded or decoded COPY instruction, and will update the contents 107311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// of the SAME and NEAR caches. It is vital that the use of 108311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// UpdateCache (called cache_update in the RFC examples) exactly match 109311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// the RFC standard, and that the same caching logic be used in the 110311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// decoder as in the encoder, in order for the decoded addresses to 111311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// match. 112311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// 113311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Argument: 114311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// address: This must be a valid address between 0 and 115311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// (source window size + target window size). It is assumed that 116311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// these bounds have been checked before calling UpdateCache. 117311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// 118311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffvoid VCDiffAddressCache::UpdateCache(VCDAddress address) { 119311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if (near_cache_size_ > 0) { 120311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff near_addresses_[next_slot_] = address; 121311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff next_slot_ = (next_slot_ + 1) % near_cache_size_; 122311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 123311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if (same_cache_size_ > 0) { 124311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff same_addresses_[address % (same_cache_size_ * 256)] = address; 125311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 126311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff} 127311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 128311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Determines the address mode that yields the most compact encoding 129311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// of the given address value, writes the encoded address into the 130311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// address stream, and returns the mode used. The most compact encoding 131311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// is found by looking for the numerically lowest encoded address. 132311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// The Init() function must already have been called. 133311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// 134311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Arguments: 135311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// address: The address to be encoded. Must be a non-negative integer 136311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// between 0 and (here_address - 1). 137311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// here_address: The current location in the target data (i.e., the 138311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// position just after the last encoded value.) Must be non-negative. 139311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// encoded_addr: Points to an VCDAddress that will be replaced 140311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// with the encoded representation of address. 141311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// If WriteAddressAsVarintForMode returns true when passed 142311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// the return value, then encoded_addr should be written 143311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// into the delta file as a variable-length integer (Varint); 144311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// otherwise, it should be written as a byte (unsigned char). 145311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// 146311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Return value: A mode value between 0 and 255. The mode will tell 147311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// how to interpret the next value in the address stream. 148311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// The values 0 and 1 correspond to SELF and HERE addressing. 149311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// 150311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// The function is guaranteed to succeed unless the conditions on the arguments 151732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com// have not been met, in which case a VCD_DFATAL message will be produced, 152311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// 0 will be returned, and *encoded_addr will be replaced with 0. 153311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// 154311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffunsigned char VCDiffAddressCache::EncodeAddress(VCDAddress address, 155311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff VCDAddress here_address, 156311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff VCDAddress* encoded_addr) { 157311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if (address < 0) { 158732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com VCD_DFATAL << "EncodeAddress was passed a negative address: " 159732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com << address << VCD_ENDL; 160311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff *encoded_addr = 0; 161311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return 0; 162311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 163311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if (address >= here_address) { 164732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com VCD_DFATAL << "EncodeAddress was called with address (" << address 165732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com << ") < here_address (" << here_address << ")" << VCD_ENDL; 166311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff *encoded_addr = 0; 167311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return 0; 168311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 169311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // Try using the SAME cache. This method, if available, always 170311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // results in the smallest encoding and takes priority over other modes. 171311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if (same_cache_size() > 0) { 172311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff const VCDAddress same_cache_pos = 173311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff address % (same_cache_size() * 256); 174311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if (SameAddress(same_cache_pos) == address) { 175311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // This is the only mode for which an single byte will be written 176311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // to the address stream instead of a variable-length integer. 177311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff UpdateCache(address); 178311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff *encoded_addr = same_cache_pos % 256; 179311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return FirstSameMode() + (same_cache_pos / 256); // SAME mode 180311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 181311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 182311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 183311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // Try SELF mode 184311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff unsigned char best_mode = VCD_SELF_MODE; 185311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff VCDAddress best_encoded_address = address; 186311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 187311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // Try HERE mode 188311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff { 189311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff const VCDAddress here_encoded_address = here_address - address; 190311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if (here_encoded_address < best_encoded_address) { 191311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff best_mode = VCD_HERE_MODE; 192311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff best_encoded_address = here_encoded_address; 193311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 194311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 195311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 196311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // Try using the NEAR cache 197311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff for (int i = 0; i < near_cache_size(); ++i) { 198311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff const VCDAddress near_encoded_address = address - NearAddress(i); 199311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if ((near_encoded_address >= 0) && 200311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff (near_encoded_address < best_encoded_address)) { 201311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff best_mode = FirstNearMode() + i; 202311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff best_encoded_address = near_encoded_address; 203311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 204311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 205311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 206311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff UpdateCache(address); 207311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff *encoded_addr = best_encoded_address; 208311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return best_mode; 209311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff} 210311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 211311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Increments *byte_pointer and returns the byte it pointed to before the 212311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// increment. The caller must check bounds to ensure that *byte_pointer 213311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// points to a valid address in memory. 214311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffstatic unsigned char ParseByte(const char** byte_pointer) { 215311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff unsigned char byte_value = static_cast<unsigned char>(**byte_pointer); 216311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff ++(*byte_pointer); 217311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return byte_value; 218311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff} 219311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 220311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Checks the given decoded address for validity. Returns true if the 221311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// address is valid; otherwise, prints an error message to the log and 222311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// returns false. 223311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffstatic bool IsDecodedAddressValid(VCDAddress decoded_address, 224311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff VCDAddress here_address) { 225311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if (decoded_address < 0) { 226732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com VCD_ERROR << "Decoded address " << decoded_address << " is invalid" 227732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com << VCD_ENDL; 228311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return false; 229311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } else if (decoded_address >= here_address) { 230732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com VCD_ERROR << "Decoded address (" << decoded_address 231732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com << ") is beyond location in target file (" << here_address 232732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com << ")" << VCD_ENDL; 233311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return false; 234311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 235311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return true; 236311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff} 237311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 238311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Interprets the next value in the address_stream using the provided mode, 239311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// which may need to access the SAME or NEAR address cache. Returns the 240311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// decoded address. 241311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// The Init() function must already have been called. 242311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// 243311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Arguments: 244311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// here_address: The current location in the source + target data (i.e., the 245311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// location into which the COPY instruction will copy.) By definition, 246311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// all addresses between 0 and (here_address - 1) are valid, and 247311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// any other address is invalid. 248311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// mode: A byte value between 0 and (near_cache_size_ + same_cache_size_ + 1) 249311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// which tells how to interpret the next value in the address stream. 250311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// The values 0 and 1 correspond to SELF and HERE addressing. 251311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// The validity of "mode" should already have been checked before 252311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// calling this function. 253311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// address_stream: Points to a pointer holding the position 254311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// in the "Addresses section for COPYs" part of the input data. 255311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// That section must already have been uncompressed 256311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// using a secondary decompressor (if necessary.) 257311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// This is an IN/OUT argument; the value of *address_stream will be 258311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// incremented by the size of an integer, or (if the SAME cache 259311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// was used) by the size of a byte (1). 260311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// address_stream_end: Points to the position just after the end of 261311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// the address stream buffer. All addresses between *address_stream 262311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// and address_stream_end should contain valid address data. 263311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// 264311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// Return value: If the input conditions were met, and the address section 265311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// of the input data contains properly encoded addresses that match 266311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// the instructions section, then an integer between 0 and here_address - 1 267311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// will be returned, representing the address from which data should 268311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// be copied from the source or target window into the output stream. 269311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// If an invalid address value is found in address_stream, then 270311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// RESULT_ERROR will be returned. If the limit address_stream_end 271311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// is reached before the address can be decoded, then 272311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// RESULT_END_OF_DATA will be returned. If more streamed data 273311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// is expected, this means that the consumer should block and wait 274311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// for more data before continuing to decode. If no more data is expected, 275311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// this return value signals an error condition. 276311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff// 277311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiffVCDAddress VCDiffAddressCache::DecodeAddress(VCDAddress here_address, 278311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff unsigned char mode, 279311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff const char** address_stream, 280311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff const char* address_stream_end) { 281311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if (here_address < 0) { 282732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com VCD_DFATAL << "DecodeAddress was passed a negative value" 283732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com " for here_address: " << here_address << VCD_ENDL; 284311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return RESULT_ERROR; 285311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 286311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff const char* new_address_pos = *address_stream; 287311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if (new_address_pos >= address_stream_end) { 288311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return RESULT_END_OF_DATA; 289311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 290311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff VCDAddress decoded_address; 291311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if (IsSameMode(mode)) { 292311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // SAME mode expects a byte value as the encoded address 293311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff unsigned char encoded_address = ParseByte(&new_address_pos); 294311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff decoded_address = DecodeSameAddress(mode, encoded_address); 295311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } else { 296311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // All modes except SAME mode expect a VarintBE as the encoded address 297311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff int32_t encoded_address = VarintBE<int32_t>::Parse(address_stream_end, 298311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff &new_address_pos); 299311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff switch (encoded_address) { 300311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff case RESULT_ERROR: 301732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com VCD_ERROR << "Found invalid variable-length integer " 302732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com "as encoded address value" << VCD_ENDL; 303311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return RESULT_ERROR; 304311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff case RESULT_END_OF_DATA: 305311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return RESULT_END_OF_DATA; 306311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff default: 307311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff break; 308311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 309311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if (IsSelfMode(mode)) { 310311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff decoded_address = DecodeSelfAddress(encoded_address); 311311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } else if (IsHereMode(mode)) { 312311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff decoded_address = DecodeHereAddress(encoded_address, here_address); 313311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } else if (IsNearMode(mode)) { 314311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff decoded_address = DecodeNearAddress(mode, encoded_address); 315311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } else { 316732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com VCD_DFATAL << "Invalid mode value (" << static_cast<int>(mode) 317732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com << ") passed to DecodeAddress; maximum mode value = " 318732fff248e662ec47aa27c124632f406f27b6c8dopenvcdiff@gmail.com << static_cast<int>(LastMode()) << VCD_ENDL; 319311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return RESULT_ERROR; 320311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 321311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 322311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff // Check for an out-of-bounds address (corrupt/malicious data) 323311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff if (!IsDecodedAddressValid(decoded_address, here_address)) { 324311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return RESULT_ERROR; 325311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff } 326311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff *address_stream = new_address_pos; 327311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff UpdateCache(decoded_address); 328311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff return decoded_address; 329311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff} 330311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff 331311c71486f5f6074e5ba62a7f4c5397c8700b868openvcdiff} // namespace open_vcdiff 332