1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_RUNTIME_LEB128_H_
18#define ART_RUNTIME_LEB128_H_
19
20#include <vector>
21
22#include "base/bit_utils.h"
23#include "base/logging.h"
24#include "globals.h"
25
26namespace art {
27
28// Reads an unsigned LEB128 value, updating the given pointer to point
29// just past the end of the read value. This function tolerates
30// non-zero high-order bits in the fifth encoded byte.
31static inline uint32_t DecodeUnsignedLeb128(const uint8_t** data) {
32  const uint8_t* ptr = *data;
33  int result = *(ptr++);
34  if (UNLIKELY(result > 0x7f)) {
35    int cur = *(ptr++);
36    result = (result & 0x7f) | ((cur & 0x7f) << 7);
37    if (cur > 0x7f) {
38      cur = *(ptr++);
39      result |= (cur & 0x7f) << 14;
40      if (cur > 0x7f) {
41        cur = *(ptr++);
42        result |= (cur & 0x7f) << 21;
43        if (cur > 0x7f) {
44          // Note: We don't check to see if cur is out of range here,
45          // meaning we tolerate garbage in the four high-order bits.
46          cur = *(ptr++);
47          result |= cur << 28;
48        }
49      }
50    }
51  }
52  *data = ptr;
53  return static_cast<uint32_t>(result);
54}
55
56// Reads an unsigned LEB128 + 1 value. updating the given pointer to point
57// just past the end of the read value. This function tolerates
58// non-zero high-order bits in the fifth encoded byte.
59// It is possible for this function to return -1.
60static inline int32_t DecodeUnsignedLeb128P1(const uint8_t** data) {
61  return DecodeUnsignedLeb128(data) - 1;
62}
63
64// Reads a signed LEB128 value, updating the given pointer to point
65// just past the end of the read value. This function tolerates
66// non-zero high-order bits in the fifth encoded byte.
67static inline int32_t DecodeSignedLeb128(const uint8_t** data) {
68  const uint8_t* ptr = *data;
69  int32_t result = *(ptr++);
70  if (result <= 0x7f) {
71    result = (result << 25) >> 25;
72  } else {
73    int cur = *(ptr++);
74    result = (result & 0x7f) | ((cur & 0x7f) << 7);
75    if (cur <= 0x7f) {
76      result = (result << 18) >> 18;
77    } else {
78      cur = *(ptr++);
79      result |= (cur & 0x7f) << 14;
80      if (cur <= 0x7f) {
81        result = (result << 11) >> 11;
82      } else {
83        cur = *(ptr++);
84        result |= (cur & 0x7f) << 21;
85        if (cur <= 0x7f) {
86          result = (result << 4) >> 4;
87        } else {
88          // Note: We don't check to see if cur is out of range here,
89          // meaning we tolerate garbage in the four high-order bits.
90          cur = *(ptr++);
91          result |= cur << 28;
92        }
93      }
94    }
95  }
96  *data = ptr;
97  return result;
98}
99
100// Returns the number of bytes needed to encode the value in unsigned LEB128.
101static inline uint32_t UnsignedLeb128Size(uint32_t data) {
102  // bits_to_encode = (data != 0) ? 32 - CLZ(x) : 1  // 32 - CLZ(data | 1)
103  // bytes = ceil(bits_to_encode / 7.0);             // (6 + bits_to_encode) / 7
104  uint32_t x = 6 + 32 - CLZ(data | 1U);
105  // Division by 7 is done by (x * 37) >> 8 where 37 = ceil(256 / 7).
106  // This works for 0 <= x < 256 / (7 * 37 - 256), i.e. 0 <= x <= 85.
107  return (x * 37) >> 8;
108}
109
110// Returns the number of bytes needed to encode the value in unsigned LEB128.
111static inline uint32_t SignedLeb128Size(int32_t data) {
112  // Like UnsignedLeb128Size(), but we need one bit beyond the highest bit that differs from sign.
113  data = data ^ (data >> 31);
114  uint32_t x = 1 /* we need to encode the sign bit */ + 6 + 32 - CLZ(data | 1U);
115  return (x * 37) >> 8;
116}
117
118static inline uint8_t* EncodeUnsignedLeb128(uint8_t* dest, uint32_t value) {
119  uint8_t out = value & 0x7f;
120  value >>= 7;
121  while (value != 0) {
122    *dest++ = out | 0x80;
123    out = value & 0x7f;
124    value >>= 7;
125  }
126  *dest++ = out;
127  return dest;
128}
129
130template <typename Vector>
131static inline void EncodeUnsignedLeb128(Vector* dest, uint32_t value) {
132  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
133  uint8_t out = value & 0x7f;
134  value >>= 7;
135  while (value != 0) {
136    dest->push_back(out | 0x80);
137    out = value & 0x7f;
138    value >>= 7;
139  }
140  dest->push_back(out);
141}
142
143// Overwrite encoded Leb128 with a new value. The new value must be less than
144// or equal to the old value to ensure that it fits the allocated space.
145static inline void UpdateUnsignedLeb128(uint8_t* dest, uint32_t value) {
146  const uint8_t* old_end = dest;
147  uint32_t old_value = DecodeUnsignedLeb128(&old_end);
148  DCHECK_LE(value, old_value);
149  for (uint8_t* end = EncodeUnsignedLeb128(dest, value); end < old_end; end++) {
150    // Use longer encoding than necessary to fill the allocated space.
151    end[-1] |= 0x80;
152    end[0] = 0;
153  }
154}
155
156static inline uint8_t* EncodeSignedLeb128(uint8_t* dest, int32_t value) {
157  uint32_t extra_bits = static_cast<uint32_t>(value ^ (value >> 31)) >> 6;
158  uint8_t out = value & 0x7f;
159  while (extra_bits != 0u) {
160    *dest++ = out | 0x80;
161    value >>= 7;
162    out = value & 0x7f;
163    extra_bits >>= 7;
164  }
165  *dest++ = out;
166  return dest;
167}
168
169template<typename Vector>
170static inline void EncodeSignedLeb128(Vector* dest, int32_t value) {
171  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
172  uint32_t extra_bits = static_cast<uint32_t>(value ^ (value >> 31)) >> 6;
173  uint8_t out = value & 0x7f;
174  while (extra_bits != 0u) {
175    dest->push_back(out | 0x80);
176    value >>= 7;
177    out = value & 0x7f;
178    extra_bits >>= 7;
179  }
180  dest->push_back(out);
181}
182
183// An encoder that pushes int32_t/uint32_t data onto the given std::vector.
184template <typename Vector = std::vector<uint8_t>>
185class Leb128Encoder {
186  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
187
188 public:
189  explicit Leb128Encoder(Vector* data) : data_(data) {
190    DCHECK(data != nullptr);
191  }
192
193  void Reserve(uint32_t size) {
194    data_->reserve(size);
195  }
196
197  void PushBackUnsigned(uint32_t value) {
198    EncodeUnsignedLeb128(data_, value);
199  }
200
201  template<typename It>
202  void InsertBackUnsigned(It cur, It end) {
203    for (; cur != end; ++cur) {
204      PushBackUnsigned(*cur);
205    }
206  }
207
208  void PushBackSigned(int32_t value) {
209    EncodeSignedLeb128(data_, value);
210  }
211
212  template<typename It>
213  void InsertBackSigned(It cur, It end) {
214    for (; cur != end; ++cur) {
215      PushBackSigned(*cur);
216    }
217  }
218
219  const Vector& GetData() const {
220    return *data_;
221  }
222
223 protected:
224  Vector* const data_;
225
226 private:
227  DISALLOW_COPY_AND_ASSIGN(Leb128Encoder);
228};
229
230// An encoder with an API similar to vector<uint32_t> where the data is captured in ULEB128 format.
231template <typename Vector = std::vector<uint8_t>>
232class Leb128EncodingVector FINAL : private Vector,
233                                   public Leb128Encoder<Vector> {
234  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
235
236 public:
237  Leb128EncodingVector() : Leb128Encoder<Vector>(this) { }
238
239  explicit Leb128EncodingVector(const typename Vector::allocator_type& alloc)
240    : Vector(alloc),
241      Leb128Encoder<Vector>(this) { }
242
243 private:
244  DISALLOW_COPY_AND_ASSIGN(Leb128EncodingVector);
245};
246
247}  // namespace art
248
249#endif  // ART_RUNTIME_LEB128_H_
250