1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_RUNTIME_LEB128_H_
18#define ART_RUNTIME_LEB128_H_
19
20#include <vector>
21
22#include "base/bit_utils.h"
23#include "base/logging.h"
24#include "globals.h"
25
26namespace art {
27
28// Reads an unsigned LEB128 value, updating the given pointer to point
29// just past the end of the read value. This function tolerates
30// non-zero high-order bits in the fifth encoded byte.
31static inline uint32_t DecodeUnsignedLeb128(const uint8_t** data) {
32  const uint8_t* ptr = *data;
33  int result = *(ptr++);
34  if (UNLIKELY(result > 0x7f)) {
35    int cur = *(ptr++);
36    result = (result & 0x7f) | ((cur & 0x7f) << 7);
37    if (cur > 0x7f) {
38      cur = *(ptr++);
39      result |= (cur & 0x7f) << 14;
40      if (cur > 0x7f) {
41        cur = *(ptr++);
42        result |= (cur & 0x7f) << 21;
43        if (cur > 0x7f) {
44          // Note: We don't check to see if cur is out of range here,
45          // meaning we tolerate garbage in the four high-order bits.
46          cur = *(ptr++);
47          result |= cur << 28;
48        }
49      }
50    }
51  }
52  *data = ptr;
53  return static_cast<uint32_t>(result);
54}
55
56static inline bool DecodeUnsignedLeb128Checked(const uint8_t** data,
57                                               const void* end,
58                                               uint32_t* out) {
59  const uint8_t* ptr = *data;
60  if (ptr >= end) {
61    return false;
62  }
63  int result = *(ptr++);
64  if (UNLIKELY(result > 0x7f)) {
65    if (ptr >= end) {
66      return false;
67    }
68    int cur = *(ptr++);
69    result = (result & 0x7f) | ((cur & 0x7f) << 7);
70    if (cur > 0x7f) {
71      if (ptr >= end) {
72        return false;
73      }
74      cur = *(ptr++);
75      result |= (cur & 0x7f) << 14;
76      if (cur > 0x7f) {
77        if (ptr >= end) {
78          return false;
79        }
80        cur = *(ptr++);
81        result |= (cur & 0x7f) << 21;
82        if (cur > 0x7f) {
83          if (ptr >= end) {
84            return false;
85          }
86          // Note: We don't check to see if cur is out of range here,
87          // meaning we tolerate garbage in the four high-order bits.
88          cur = *(ptr++);
89          result |= cur << 28;
90        }
91      }
92    }
93  }
94  *data = ptr;
95  *out = static_cast<uint32_t>(result);
96  return true;
97}
98
99// Reads an unsigned LEB128 + 1 value. updating the given pointer to point
100// just past the end of the read value. This function tolerates
101// non-zero high-order bits in the fifth encoded byte.
102// It is possible for this function to return -1.
103static inline int32_t DecodeUnsignedLeb128P1(const uint8_t** data) {
104  return DecodeUnsignedLeb128(data) - 1;
105}
106
107// Reads a signed LEB128 value, updating the given pointer to point
108// just past the end of the read value. This function tolerates
109// non-zero high-order bits in the fifth encoded byte.
110static inline int32_t DecodeSignedLeb128(const uint8_t** data) {
111  const uint8_t* ptr = *data;
112  int32_t result = *(ptr++);
113  if (result <= 0x7f) {
114    result = (result << 25) >> 25;
115  } else {
116    int cur = *(ptr++);
117    result = (result & 0x7f) | ((cur & 0x7f) << 7);
118    if (cur <= 0x7f) {
119      result = (result << 18) >> 18;
120    } else {
121      cur = *(ptr++);
122      result |= (cur & 0x7f) << 14;
123      if (cur <= 0x7f) {
124        result = (result << 11) >> 11;
125      } else {
126        cur = *(ptr++);
127        result |= (cur & 0x7f) << 21;
128        if (cur <= 0x7f) {
129          result = (result << 4) >> 4;
130        } else {
131          // Note: We don't check to see if cur is out of range here,
132          // meaning we tolerate garbage in the four high-order bits.
133          cur = *(ptr++);
134          result |= cur << 28;
135        }
136      }
137    }
138  }
139  *data = ptr;
140  return result;
141}
142
143static inline bool DecodeSignedLeb128Checked(const uint8_t** data,
144                                             const void* end,
145                                             int32_t* out) {
146  const uint8_t* ptr = *data;
147  if (ptr >= end) {
148    return false;
149  }
150  int32_t result = *(ptr++);
151  if (result <= 0x7f) {
152    result = (result << 25) >> 25;
153  } else {
154    if (ptr >= end) {
155      return false;
156    }
157    int cur = *(ptr++);
158    result = (result & 0x7f) | ((cur & 0x7f) << 7);
159    if (cur <= 0x7f) {
160      result = (result << 18) >> 18;
161    } else {
162      if (ptr >= end) {
163        return false;
164      }
165      cur = *(ptr++);
166      result |= (cur & 0x7f) << 14;
167      if (cur <= 0x7f) {
168        result = (result << 11) >> 11;
169      } else {
170        if (ptr >= end) {
171          return false;
172        }
173        cur = *(ptr++);
174        result |= (cur & 0x7f) << 21;
175        if (cur <= 0x7f) {
176          result = (result << 4) >> 4;
177        } else {
178          if (ptr >= end) {
179            return false;
180          }
181          // Note: We don't check to see if cur is out of range here,
182          // meaning we tolerate garbage in the four high-order bits.
183          cur = *(ptr++);
184          result |= cur << 28;
185        }
186      }
187    }
188  }
189  *data = ptr;
190  *out = static_cast<uint32_t>(result);
191  return true;
192}
193
194// Returns the number of bytes needed to encode the value in unsigned LEB128.
195static inline uint32_t UnsignedLeb128Size(uint32_t data) {
196  // bits_to_encode = (data != 0) ? 32 - CLZ(x) : 1  // 32 - CLZ(data | 1)
197  // bytes = ceil(bits_to_encode / 7.0);             // (6 + bits_to_encode) / 7
198  uint32_t x = 6 + 32 - CLZ(data | 1U);
199  // Division by 7 is done by (x * 37) >> 8 where 37 = ceil(256 / 7).
200  // This works for 0 <= x < 256 / (7 * 37 - 256), i.e. 0 <= x <= 85.
201  return (x * 37) >> 8;
202}
203
204// Returns the number of bytes needed to encode the value in unsigned LEB128.
205static inline uint32_t SignedLeb128Size(int32_t data) {
206  // Like UnsignedLeb128Size(), but we need one bit beyond the highest bit that differs from sign.
207  data = data ^ (data >> 31);
208  uint32_t x = 1 /* we need to encode the sign bit */ + 6 + 32 - CLZ(data | 1U);
209  return (x * 37) >> 8;
210}
211
212static inline uint8_t* EncodeUnsignedLeb128(uint8_t* dest, uint32_t value) {
213  uint8_t out = value & 0x7f;
214  value >>= 7;
215  while (value != 0) {
216    *dest++ = out | 0x80;
217    out = value & 0x7f;
218    value >>= 7;
219  }
220  *dest++ = out;
221  return dest;
222}
223
224template <typename Vector>
225static inline void EncodeUnsignedLeb128(Vector* dest, uint32_t value) {
226  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
227  uint8_t out = value & 0x7f;
228  value >>= 7;
229  while (value != 0) {
230    dest->push_back(out | 0x80);
231    out = value & 0x7f;
232    value >>= 7;
233  }
234  dest->push_back(out);
235}
236
237// Overwrite encoded Leb128 with a new value. The new value must be less than
238// or equal to the old value to ensure that it fits the allocated space.
239static inline void UpdateUnsignedLeb128(uint8_t* dest, uint32_t value) {
240  const uint8_t* old_end = dest;
241  uint32_t old_value = DecodeUnsignedLeb128(&old_end);
242  DCHECK_LE(value, old_value);
243  for (uint8_t* end = EncodeUnsignedLeb128(dest, value); end < old_end; end++) {
244    // Use longer encoding than necessary to fill the allocated space.
245    end[-1] |= 0x80;
246    end[0] = 0;
247  }
248}
249
250static inline uint8_t* EncodeSignedLeb128(uint8_t* dest, int32_t value) {
251  uint32_t extra_bits = static_cast<uint32_t>(value ^ (value >> 31)) >> 6;
252  uint8_t out = value & 0x7f;
253  while (extra_bits != 0u) {
254    *dest++ = out | 0x80;
255    value >>= 7;
256    out = value & 0x7f;
257    extra_bits >>= 7;
258  }
259  *dest++ = out;
260  return dest;
261}
262
263template<typename Vector>
264static inline void EncodeSignedLeb128(Vector* dest, int32_t value) {
265  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
266  uint32_t extra_bits = static_cast<uint32_t>(value ^ (value >> 31)) >> 6;
267  uint8_t out = value & 0x7f;
268  while (extra_bits != 0u) {
269    dest->push_back(out | 0x80);
270    value >>= 7;
271    out = value & 0x7f;
272    extra_bits >>= 7;
273  }
274  dest->push_back(out);
275}
276
277// An encoder that pushes int32_t/uint32_t data onto the given std::vector.
278template <typename Vector = std::vector<uint8_t>>
279class Leb128Encoder {
280  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
281
282 public:
283  explicit Leb128Encoder(Vector* data) : data_(data) {
284    DCHECK(data != nullptr);
285  }
286
287  void Reserve(uint32_t size) {
288    data_->reserve(size);
289  }
290
291  void PushBackUnsigned(uint32_t value) {
292    EncodeUnsignedLeb128(data_, value);
293  }
294
295  template<typename It>
296  void InsertBackUnsigned(It cur, It end) {
297    for (; cur != end; ++cur) {
298      PushBackUnsigned(*cur);
299    }
300  }
301
302  void PushBackSigned(int32_t value) {
303    EncodeSignedLeb128(data_, value);
304  }
305
306  template<typename It>
307  void InsertBackSigned(It cur, It end) {
308    for (; cur != end; ++cur) {
309      PushBackSigned(*cur);
310    }
311  }
312
313  const Vector& GetData() const {
314    return *data_;
315  }
316
317 protected:
318  Vector* const data_;
319
320 private:
321  DISALLOW_COPY_AND_ASSIGN(Leb128Encoder);
322};
323
324// An encoder with an API similar to vector<uint32_t> where the data is captured in ULEB128 format.
325template <typename Vector = std::vector<uint8_t>>
326class Leb128EncodingVector FINAL : private Vector,
327                                   public Leb128Encoder<Vector> {
328  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
329
330 public:
331  Leb128EncodingVector() : Leb128Encoder<Vector>(this) { }
332
333  explicit Leb128EncodingVector(const typename Vector::allocator_type& alloc)
334    : Vector(alloc),
335      Leb128Encoder<Vector>(this) { }
336
337 private:
338  DISALLOW_COPY_AND_ASSIGN(Leb128EncodingVector);
339};
340
341}  // namespace art
342
343#endif  // ART_RUNTIME_LEB128_H_
344