1// Copyright 2015 Google Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15////////////////////////////////////////////////////////////////////////////////
16
17#include "src/binary_parse/range_checked_byte_ptr.h"
18
19#include <assert.h>
20#include <cstddef>
21#include <cstring>
22
23namespace piex {
24namespace binary_parse {
25
26#ifdef BREAK_IF_DEBUGGING_AND_OUT_OF_RANGE
27#define BREAK_IF_DEBUGGING() assert(false)
28#else
29#define BREAK_IF_DEBUGGING() assert(true)
30#endif
31
32namespace {
33class MemoryPagedByteArray : public PagedByteArray {
34 public:
35  MemoryPagedByteArray(const unsigned char *buffer, const size_t len);
36
37  virtual size_t length() const;
38  virtual size_t pageSize() const;
39  virtual void getPage(size_t page_index, const unsigned char **begin,
40                       const unsigned char **end, PagePtr *page) const;
41
42 private:
43  const unsigned char *buffer_;
44  const size_t len_;
45};
46
47MemoryPagedByteArray::MemoryPagedByteArray(const unsigned char *buffer,
48                                           const size_t len)
49    : buffer_(buffer), len_(len) {}
50
51size_t MemoryPagedByteArray::length() const { return len_; }
52
53size_t MemoryPagedByteArray::pageSize() const { return len_; }
54
55void MemoryPagedByteArray::getPage(size_t /* page_index */,
56                                   const unsigned char **begin,
57                                   const unsigned char **end,
58                                   PagePtr *page) const {
59  *begin = buffer_;
60  *end = buffer_ + len_;
61  *page = PagePtr();
62}
63
64// A functor that does nothing. This is used as a no-op shared pointer
65// deallocator below.
66class NullFunctor {
67 public:
68  void operator()() {}
69  void operator()(PagedByteArray * /* p */) const {}
70};
71}  // namespace
72
73PagedByteArray::~PagedByteArray() {}
74
75RangeCheckedBytePtr::RangeCheckedBytePtr()
76    : array_(),
77      page_data_(NULL),
78      current_pos_(0),
79      sub_array_begin_(0),
80      sub_array_end_(0),
81      page_begin_offset_(0),
82      current_page_len_(0),
83      error_flag_(RANGE_CHECKED_BYTE_ERROR) {}
84
85RangeCheckedBytePtr::RangeCheckedBytePtr(const unsigned char *array,
86                                         const size_t len)
87    : array_(new MemoryPagedByteArray(array, len)),
88      page_data_(NULL),
89      current_pos_(0),
90      sub_array_begin_(0),
91      sub_array_end_(len),
92      page_begin_offset_(0),
93      current_page_len_(0),
94      error_flag_(RANGE_CHECKED_BYTE_SUCCESS) {
95  assert(array);
96  if (array == NULL) {
97    error_flag_ = RANGE_CHECKED_BYTE_ERROR;
98  }
99}
100
101RangeCheckedBytePtr::RangeCheckedBytePtr(PagedByteArray *array)
102    : array_(array, NullFunctor()),
103      page_data_(NULL),
104      current_pos_(0),
105      sub_array_begin_(0),
106      sub_array_end_(array->length()),
107      page_begin_offset_(0),
108      current_page_len_(0),
109      error_flag_(RANGE_CHECKED_BYTE_SUCCESS) {}
110
111RangeCheckedBytePtr RangeCheckedBytePtr::invalidPointer() {
112  return RangeCheckedBytePtr();
113}
114
115RangeCheckedBytePtr RangeCheckedBytePtr::pointerToSubArray(
116    size_t pos, size_t length) const {
117  RangeCheckedBytePtr sub_result = (*this) + pos;
118  if (!sub_result.errorOccurred() && length <= sub_result.remainingLength()) {
119    sub_result.sub_array_begin_ = sub_result.current_pos_;
120    sub_result.sub_array_end_ = sub_result.sub_array_begin_ + length;
121
122    // Restrict the boundaries of the current page to the newly set sub-array.
123    sub_result.restrictPageToSubArray();
124
125    return sub_result;
126  } else {
127    error_flag_ = RANGE_CHECKED_BYTE_ERROR;
128    return invalidPointer();
129  }
130}
131
132size_t RangeCheckedBytePtr::offsetInArray() const {
133  // sub_array_begin_ <= current_pos_ is a class invariant, but protect
134  // against violations of this invariant.
135  if (sub_array_begin_ <= current_pos_) {
136    return current_pos_ - sub_array_begin_;
137  } else {
138    assert(false);
139    return 0;
140  }
141}
142
143std::string RangeCheckedBytePtr::substr(size_t pos, size_t length) const {
144  std::vector<unsigned char> bytes = extractBytes(pos, length);
145  std::string result;
146  result.reserve(bytes.size());
147  for (size_t i = 0; i < bytes.size(); ++i) {
148    result.push_back(static_cast<char>(bytes[i]));
149  }
150  return result;
151}
152
153std::vector<unsigned char> RangeCheckedBytePtr::extractBytes(
154    size_t pos, size_t length) const {
155  std::vector<unsigned char> result;
156  if (pos + length < pos /* overflow */ || remainingLength() < pos + length) {
157    BREAK_IF_DEBUGGING();
158    error_flag_ = RANGE_CHECKED_BYTE_ERROR_OVERFLOW;
159    return result;
160  }
161  result.reserve(length);
162  for (size_t i = 0; i < length; ++i) {
163    result.push_back((*this)[pos + i]);
164  }
165  return result;
166}
167
168bool operator==(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y) {
169  if (x.array_ != y.array_) {
170    assert(false);
171    return false;
172  }
173
174  return x.current_pos_ == y.current_pos_;
175}
176
177bool operator!=(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y) {
178  return !(x == y);
179}
180
181void RangeCheckedBytePtr::loadPageForOffset(size_t offset) const {
182  // The offset should always lie within the bounds of the sub-array (this
183  // condition is enforced at the callsite). However, even if the offset lies
184  // outside the sub-array, the restrictPageToSubArray() call at the end
185  // ensures that the object is left in a consistent state that maintains the
186  // class invariants.
187  assert(offset >= sub_array_begin_ && offset < sub_array_end_);
188
189  // Ensure that offset lies within the array.
190  if (offset >= array_->length()) {
191    assert(false);
192    return;
193  }
194
195  // Determine the index of the page to request.
196  size_t page_index = offset / array_->pageSize();
197
198  // Get the page.
199  const unsigned char *page_begin;
200  const unsigned char *page_end;
201  array_->getPage(page_index, &page_begin, &page_end, &page_);
202
203  // Ensure that the page has the expected length (as specified in the
204  // PagedByteArray interface).
205  size_t expected_page_size = array_->pageSize();
206  if (page_index == (array_->length() - 1) / array_->pageSize()) {
207    expected_page_size = array_->length() - array_->pageSize() * page_index;
208  }
209  if ((page_end < page_begin) ||
210      (static_cast<size_t>(page_end - page_begin) != expected_page_size)) {
211    assert(false);
212    return;
213  }
214
215  // Remember information about page.
216  page_data_ = page_begin;
217  page_begin_offset_ = page_index * array_->pageSize();
218  current_page_len_ = static_cast<size_t>(page_end - page_begin);
219
220  // Restrict the boundaries of the page to lie within the sub-array.
221  restrictPageToSubArray();
222}
223
224void RangeCheckedBytePtr::restrictPageToSubArray() const {
225  // Restrict the current page's boundaries so that it is always contained
226  // completely within the extent of the sub-array.
227  // This function is purposely designed to work correctly in the following
228  // two special cases:
229  // a) The current page lies entirely outside the sub-array. In this case,
230  //    current_page_len_ will be set to zero. page_data_ may either remain
231  //    unchanged or may be changed to point one element beyond the end of the
232  //    page, depending on whether the current page lies before or after the
233  //    sub-array.
234  // b) The current page is in the state as initialized by the constructor
235  //    (i.e. page_data_ is NULL and current_page_len_ is zero). In this case,
236  //    page_data_ and current_page_len_ will remain unchanged.
237
238  // Does the beginning of the page lie before the beginning of the sub-array?
239  if (page_begin_offset_ < sub_array_begin_) {
240    // Compute amount by which to shorten page.
241    size_t amount_to_shorten = sub_array_begin_ - page_begin_offset_;
242    if (amount_to_shorten > current_page_len_) {
243      amount_to_shorten = current_page_len_;
244    }
245
246    // Adjust beginning of page accordingly.
247    page_begin_offset_ += amount_to_shorten;
248    page_data_ += amount_to_shorten;
249    current_page_len_ -= amount_to_shorten;
250  }
251
252  // Does the end of the page lie beyond the end of the sub-array?
253  if (page_begin_offset_ + current_page_len_ > sub_array_end_) {
254    // Reduce length of page accordingly.
255    size_t new_len = sub_array_end_ - page_begin_offset_;
256    if (new_len > current_page_len_) {
257      new_len = current_page_len_;
258    }
259    current_page_len_ = new_len;
260  }
261}
262
263int memcmp(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y,
264           size_t num) {
265  std::vector<unsigned char> x_vec = x.extractBytes(0, num);
266  std::vector<unsigned char> y_vec = y.extractBytes(0, num);
267
268  if (!x.errorOccurred() && !y.errorOccurred()) {
269    return ::memcmp(&x_vec[0], &y_vec[0], num);
270  } else {
271    // return an arbitrary value
272    return -1;
273  }
274}
275
276int strcmp(const RangeCheckedBytePtr &x, const std::string &y) {
277  std::vector<unsigned char> x_vec = x.extractBytes(0, y.length());
278
279  if (!x.errorOccurred()) {
280    return ::memcmp(&x_vec[0], y.c_str(), y.length());
281  } else {
282    // return an arbitrary value
283    return -1;
284  }
285}
286
287size_t strlen(const RangeCheckedBytePtr &src) {
288  size_t len = 0;
289  RangeCheckedBytePtr str = src;
290  while (!str.errorOccurred() && (str[0] != '\0')) {
291    str++;
292    len++;
293  }
294  return len;
295}
296
297int16 Get16s(const RangeCheckedBytePtr &input, const bool big_endian,
298             MemoryStatus *status) {
299  const uint16 unsigned_value = Get16u(input, big_endian, status);
300  if (*status != RANGE_CHECKED_BYTE_SUCCESS) {
301    // Return an arbitrary value.
302    return 0;
303  }
304
305  // Convert the two's-complement signed integer encoded in 'unsigned_value'
306  // into a signed representation in the implementation's native representation
307  // for signed integers. An optimized Blaze build (x64) compiles all of the
308  // following code to a no-op (as of this writing).
309  // For further details, see the corresponding comment in Get32s().
310  if (unsigned_value == 0x8000u) {
311    return static_cast<int16>(-0x8000);
312  } else if (unsigned_value > 0x8000u) {
313    return -static_cast<int16>(0x10000u - unsigned_value);
314  } else {
315    return static_cast<int16>(unsigned_value);
316  }
317}
318
319uint16 Get16u(const RangeCheckedBytePtr &input, const bool big_endian,
320              MemoryStatus *status) {
321  if (input.remainingLength() < 2) {
322    if (status && *status == RANGE_CHECKED_BYTE_SUCCESS) {
323      *status = RANGE_CHECKED_BYTE_ERROR;
324    }
325    // Return an arbitrary value.
326    return 0;
327  }
328  if (big_endian) {
329    return (static_cast<uint16>(input[0]) << 8) | static_cast<uint16>(input[1]);
330  } else {
331    return (static_cast<uint16>(input[1]) << 8) | static_cast<uint16>(input[0]);
332  }
333}
334
335int32 Get32s(const RangeCheckedBytePtr &input, const bool big_endian,
336             MemoryStatus *status) {
337  const uint32 unsigned_value = Get32u(input, big_endian, status);
338  if (*status != RANGE_CHECKED_BYTE_SUCCESS) {
339    // Return an arbitrary value.
340    return 0;
341  }
342
343  // Convert the two's-complement signed integer encoded in 'unsigned_value'
344  // into a signed representation in the implementation's native representation
345  // for signed integers.
346  // For all practical purposes, the same result could be obtained simply by
347  // casting unsigned_value to int32; the result of this is
348  // implementation-defined, but on all of the platforms we care about, it does
349  // what we want.
350  // The code below, however, arguably has the aesthetic advantage of being
351  // independent of the representation for signed integers chosen by the
352  // implementation, as long as 'int' and 'unsigned' have the required range to
353  // represent all of the required values.
354  // An optimized Blaze build (x64) compiles all of the following code to a
355  // no-op (as of this writing); i.e. the value that Get32u() returned in %eax
356  // is left unchanged.
357  if (unsigned_value == 0x80000000u) {
358    // Read here on why the constant expression is written this way:
359    // http://stackoverflow.com/questions/14695118
360    return -0x7fffffff - 1;
361  } else if (unsigned_value > 0x80000000u) {
362    // The expression
363    //   0xffffffffu - unsigned_value + 1
364    // is a portable way of flipping the sign of a twos-complement signed
365    // integer whose binary representation is stored in an unsigned integer.
366    // '0xffffffffu + 1' is used in preference to simply '0' because it makes
367    // it clearer that the correct result will be obtained even if an int is
368    // greater than 32 bits. The '0xffffffffu + 1' is "spread out" around
369    // 'unsigned_value' to prevent the compiler from warning about an
370    // integral constant overflow. ('0' would produce the correct result in
371    // this case too but would rely in a more subtle way on the rules for
372    // unsigned wraparound.)
373    return -static_cast<int32>(0xffffffffu - unsigned_value + 1);
374  } else {
375    return static_cast<int32>(unsigned_value);
376  }
377}
378
379uint32 Get32u(const RangeCheckedBytePtr &input, const bool big_endian,
380              MemoryStatus *status) {
381  if (input.remainingLength() < 4) {
382    if (status && *status == RANGE_CHECKED_BYTE_SUCCESS) {
383      *status = RANGE_CHECKED_BYTE_ERROR;
384    }
385    // Return an arbitrary value.
386    return 0;
387  }
388  if (big_endian) {
389    return (static_cast<uint32>(input[0]) << 24) |
390           (static_cast<uint32>(input[1]) << 16) |
391           (static_cast<uint32>(input[2]) << 8) |
392           (static_cast<uint32>(input[3]) << 0);
393  } else {
394    return (static_cast<uint32>(input[3]) << 24) |
395           (static_cast<uint32>(input[2]) << 16) |
396           (static_cast<uint32>(input[1]) << 8) |
397           (static_cast<uint32>(input[0]) << 0);
398  }
399}
400
401}  // namespace binary_parse
402}  // namespace piex
403