1// Copyright 2015 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_WASM_DECODER_H_
6#define V8_WASM_DECODER_H_
7
8#include <memory>
9
10#include "src/base/compiler-specific.h"
11#include "src/flags.h"
12#include "src/signature.h"
13#include "src/utils.h"
14#include "src/wasm/wasm-result.h"
15#include "src/zone/zone-containers.h"
16
17namespace v8 {
18namespace internal {
19namespace wasm {
20
21#if DEBUG
22#define TRACE(...)                                    \
23  do {                                                \
24    if (FLAG_trace_wasm_decoder) PrintF(__VA_ARGS__); \
25  } while (false)
26#else
27#define TRACE(...)
28#endif
29
30// A helper utility to decode bytes, integers, fields, varints, etc, from
31// a buffer of bytes.
32class Decoder {
33 public:
34  Decoder(const byte* start, const byte* end)
35      : start_(start),
36        pc_(start),
37        end_(end),
38        error_pc_(nullptr),
39        error_pt_(nullptr) {}
40  Decoder(const byte* start, const byte* pc, const byte* end)
41      : start_(start),
42        pc_(pc),
43        end_(end),
44        error_pc_(nullptr),
45        error_pt_(nullptr) {}
46
47  virtual ~Decoder() {}
48
49  inline bool check(const byte* base, unsigned offset, unsigned length,
50                    const char* msg) {
51    DCHECK_GE(base, start_);
52    if ((base + offset + length) > end_) {
53      error(base, base + offset, "%s", msg);
54      return false;
55    }
56    return true;
57  }
58
59  // Reads a single 8-bit byte, reporting an error if out of bounds.
60  inline uint8_t checked_read_u8(const byte* base, unsigned offset,
61                                 const char* msg = "expected 1 byte") {
62    return check(base, offset, 1, msg) ? base[offset] : 0;
63  }
64
65  // Reads 16-bit word, reporting an error if out of bounds.
66  inline uint16_t checked_read_u16(const byte* base, unsigned offset,
67                                   const char* msg = "expected 2 bytes") {
68    return check(base, offset, 2, msg) ? read_u16(base + offset) : 0;
69  }
70
71  // Reads 32-bit word, reporting an error if out of bounds.
72  inline uint32_t checked_read_u32(const byte* base, unsigned offset,
73                                   const char* msg = "expected 4 bytes") {
74    return check(base, offset, 4, msg) ? read_u32(base + offset) : 0;
75  }
76
77  // Reads 64-bit word, reporting an error if out of bounds.
78  inline uint64_t checked_read_u64(const byte* base, unsigned offset,
79                                   const char* msg = "expected 8 bytes") {
80    return check(base, offset, 8, msg) ? read_u64(base + offset) : 0;
81  }
82
83  // Reads a variable-length unsigned integer (little endian).
84  uint32_t checked_read_u32v(const byte* base, unsigned offset,
85                             unsigned* length,
86                             const char* msg = "expected LEB32") {
87    return checked_read_leb<uint32_t, false>(base, offset, length, msg);
88  }
89
90  // Reads a variable-length signed integer (little endian).
91  int32_t checked_read_i32v(const byte* base, unsigned offset, unsigned* length,
92                            const char* msg = "expected SLEB32") {
93    uint32_t result =
94        checked_read_leb<uint32_t, true>(base, offset, length, msg);
95    if (*length == 5) return bit_cast<int32_t>(result);
96    if (*length > 0) {
97      int shift = 32 - 7 * *length;
98      // Perform sign extension.
99      return bit_cast<int32_t>(result << shift) >> shift;
100    }
101    return 0;
102  }
103
104  // Reads a variable-length unsigned integer (little endian).
105  uint64_t checked_read_u64v(const byte* base, unsigned offset,
106                             unsigned* length,
107                             const char* msg = "expected LEB64") {
108    return checked_read_leb<uint64_t, false>(base, offset, length, msg);
109  }
110
111  // Reads a variable-length signed integer (little endian).
112  int64_t checked_read_i64v(const byte* base, unsigned offset, unsigned* length,
113                            const char* msg = "expected SLEB64") {
114    uint64_t result =
115        checked_read_leb<uint64_t, true>(base, offset, length, msg);
116    if (*length == 10) return bit_cast<int64_t>(result);
117    if (*length > 0) {
118      int shift = 64 - 7 * *length;
119      // Perform sign extension.
120      return bit_cast<int64_t>(result << shift) >> shift;
121    }
122    return 0;
123  }
124
125  // Reads a single 16-bit unsigned integer (little endian).
126  inline uint16_t read_u16(const byte* ptr) {
127    DCHECK(ptr >= start_ && (ptr + 2) <= end_);
128    return ReadLittleEndianValue<uint16_t>(ptr);
129  }
130
131  // Reads a single 32-bit unsigned integer (little endian).
132  inline uint32_t read_u32(const byte* ptr) {
133    DCHECK(ptr >= start_ && (ptr + 4) <= end_);
134    return ReadLittleEndianValue<uint32_t>(ptr);
135  }
136
137  // Reads a single 64-bit unsigned integer (little endian).
138  inline uint64_t read_u64(const byte* ptr) {
139    DCHECK(ptr >= start_ && (ptr + 8) <= end_);
140    return ReadLittleEndianValue<uint64_t>(ptr);
141  }
142
143  // Reads a 8-bit unsigned integer (byte) and advances {pc_}.
144  uint8_t consume_u8(const char* name = nullptr) {
145    TRACE("  +%d  %-20s: ", static_cast<int>(pc_ - start_),
146          name ? name : "uint8_t");
147    if (checkAvailable(1)) {
148      byte val = *(pc_++);
149      TRACE("%02x = %d\n", val, val);
150      return val;
151    }
152    return traceOffEnd<uint8_t>();
153  }
154
155  // Reads a 16-bit unsigned integer (little endian) and advances {pc_}.
156  uint16_t consume_u16(const char* name = nullptr) {
157    TRACE("  +%d  %-20s: ", static_cast<int>(pc_ - start_),
158          name ? name : "uint16_t");
159    if (checkAvailable(2)) {
160      uint16_t val = read_u16(pc_);
161      TRACE("%02x %02x = %d\n", pc_[0], pc_[1], val);
162      pc_ += 2;
163      return val;
164    }
165    return traceOffEnd<uint16_t>();
166  }
167
168  // Reads a single 32-bit unsigned integer (little endian) and advances {pc_}.
169  uint32_t consume_u32(const char* name = nullptr) {
170    TRACE("  +%d  %-20s: ", static_cast<int>(pc_ - start_),
171          name ? name : "uint32_t");
172    if (checkAvailable(4)) {
173      uint32_t val = read_u32(pc_);
174      TRACE("%02x %02x %02x %02x = %u\n", pc_[0], pc_[1], pc_[2], pc_[3], val);
175      pc_ += 4;
176      return val;
177    }
178    return traceOffEnd<uint32_t>();
179  }
180
181  // Reads a LEB128 variable-length unsigned 32-bit integer and advances {pc_}.
182  uint32_t consume_u32v(const char* name = nullptr) {
183    return consume_leb<uint32_t, false>(name);
184  }
185
186  // Reads a LEB128 variable-length signed 32-bit integer and advances {pc_}.
187  int32_t consume_i32v(const char* name = nullptr) {
188    return consume_leb<int32_t, true>(name);
189  }
190
191  // Consume {size} bytes and send them to the bit bucket, advancing {pc_}.
192  void consume_bytes(uint32_t size, const char* name = "skip") {
193#if DEBUG
194    if (name) {
195      // Only trace if the name is not null.
196      TRACE("  +%d  %-20s: %d bytes\n", static_cast<int>(pc_ - start_), name,
197            size);
198    }
199#endif
200    if (checkAvailable(size)) {
201      pc_ += size;
202    } else {
203      pc_ = end_;
204    }
205  }
206
207  // Check that at least {size} bytes exist between {pc_} and {end_}.
208  bool checkAvailable(int size) {
209    intptr_t pc_overflow_value = std::numeric_limits<intptr_t>::max() - size;
210    if (size < 0 || (intptr_t)pc_ > pc_overflow_value) {
211      error(pc_, nullptr, "reading %d bytes would underflow/overflow", size);
212      return false;
213    } else if (pc_ < start_ || end_ < (pc_ + size)) {
214      error(pc_, nullptr, "expected %d bytes, fell off end", size);
215      return false;
216    } else {
217      return true;
218    }
219  }
220
221  void error(const char* msg) { error(pc_, nullptr, "%s", msg); }
222
223  void error(const byte* pc, const char* msg) { error(pc, nullptr, "%s", msg); }
224
225  // Sets internal error state.
226  void PRINTF_FORMAT(4, 5)
227      error(const byte* pc, const byte* pt, const char* format, ...) {
228    if (ok()) {
229#if DEBUG
230      if (FLAG_wasm_break_on_decoder_error) {
231        base::OS::DebugBreak();
232      }
233#endif
234      const int kMaxErrorMsg = 256;
235      char* buffer = new char[kMaxErrorMsg];
236      va_list arguments;
237      va_start(arguments, format);
238      base::OS::VSNPrintF(buffer, kMaxErrorMsg - 1, format, arguments);
239      va_end(arguments);
240      error_msg_.reset(buffer);
241      error_pc_ = pc;
242      error_pt_ = pt;
243      onFirstError();
244    }
245  }
246
247  // Behavior triggered on first error, overridden in subclasses.
248  virtual void onFirstError() {}
249
250  // Debugging helper to print bytes up to the end.
251  template <typename T>
252  T traceOffEnd() {
253    T t = 0;
254    for (const byte* ptr = pc_; ptr < end_; ptr++) {
255      TRACE("%02x ", *ptr);
256    }
257    TRACE("<end>\n");
258    pc_ = end_;
259    return t;
260  }
261
262  // Converts the given value to a {Result}, copying the error if necessary.
263  template <typename T>
264  Result<T> toResult(T val) {
265    Result<T> result;
266    if (failed()) {
267      TRACE("Result error: %s\n", error_msg_.get());
268      result.error_code = kError;
269      result.start = start_;
270      result.error_pc = error_pc_;
271      result.error_pt = error_pt_;
272      // transfer ownership of the error to the result.
273      result.error_msg.reset(error_msg_.release());
274    } else {
275      result.error_code = kSuccess;
276    }
277    result.val = std::move(val);
278    return result;
279  }
280
281  // Resets the boundaries of this decoder.
282  void Reset(const byte* start, const byte* end) {
283    start_ = start;
284    pc_ = start;
285    end_ = end;
286    error_pc_ = nullptr;
287    error_pt_ = nullptr;
288    error_msg_.reset();
289  }
290
291  bool ok() const { return error_msg_ == nullptr; }
292  bool failed() const { return !ok(); }
293  bool more() const { return pc_ < end_; }
294
295  const byte* start() const { return start_; }
296  const byte* pc() const { return pc_; }
297  uint32_t pc_offset() const { return static_cast<uint32_t>(pc_ - start_); }
298  const byte* end() const { return end_; }
299
300 protected:
301  const byte* start_;
302  const byte* pc_;
303  const byte* end_;
304  const byte* error_pc_;
305  const byte* error_pt_;
306  std::unique_ptr<char[]> error_msg_;
307
308 private:
309  template <typename IntType, bool is_signed>
310  IntType checked_read_leb(const byte* base, unsigned offset, unsigned* length,
311                           const char* msg) {
312    if (!check(base, offset, 1, msg)) {
313      *length = 0;
314      return 0;
315    }
316
317    const int kMaxLength = (sizeof(IntType) * 8 + 6) / 7;
318    const byte* ptr = base + offset;
319    const byte* end = ptr + kMaxLength;
320    if (end > end_) end = end_;
321    int shift = 0;
322    byte b = 0;
323    IntType result = 0;
324    while (ptr < end) {
325      b = *ptr++;
326      result = result | (static_cast<IntType>(b & 0x7F) << shift);
327      if ((b & 0x80) == 0) break;
328      shift += 7;
329    }
330    DCHECK_LE(ptr - (base + offset), kMaxLength);
331    *length = static_cast<unsigned>(ptr - (base + offset));
332    if (ptr == end) {
333      // Check there are no bits set beyond the bitwidth of {IntType}.
334      const int kExtraBits = (1 + kMaxLength * 7) - (sizeof(IntType) * 8);
335      const byte kExtraBitsMask =
336          static_cast<byte>((0xFF << (8 - kExtraBits)) & 0xFF);
337      int extra_bits_value;
338      if (is_signed) {
339        // A signed-LEB128 must sign-extend the final byte, excluding its
340        // most-signifcant bit. e.g. for a 32-bit LEB128:
341        //   kExtraBits = 4
342        //   kExtraBitsMask = 0xf0
343        // If b is 0x0f, the value is negative, so extra_bits_value is 0x70.
344        // If b is 0x03, the value is positive, so extra_bits_value is 0x00.
345        extra_bits_value = (static_cast<int8_t>(b << kExtraBits) >> 8) &
346                           kExtraBitsMask & ~0x80;
347      } else {
348        extra_bits_value = 0;
349      }
350      if (*length == kMaxLength && (b & kExtraBitsMask) != extra_bits_value) {
351        error(base, ptr, "extra bits in varint");
352        return 0;
353      }
354      if ((b & 0x80) != 0) {
355        error(base, ptr, "%s", msg);
356        return 0;
357      }
358    }
359    return result;
360  }
361
362  template <typename IntType, bool is_signed>
363  IntType consume_leb(const char* name = nullptr) {
364    TRACE("  +%d  %-20s: ", static_cast<int>(pc_ - start_),
365          name ? name : "varint");
366    if (checkAvailable(1)) {
367      const int kMaxLength = (sizeof(IntType) * 8 + 6) / 7;
368      const byte* pos = pc_;
369      const byte* end = pc_ + kMaxLength;
370      if (end > end_) end = end_;
371
372      IntType result = 0;
373      int shift = 0;
374      byte b = 0;
375      while (pc_ < end) {
376        b = *pc_++;
377        TRACE("%02x ", b);
378        result = result | (static_cast<IntType>(b & 0x7F) << shift);
379        shift += 7;
380        if ((b & 0x80) == 0) break;
381      }
382
383      int length = static_cast<int>(pc_ - pos);
384      if (pc_ == end && (b & 0x80)) {
385        TRACE("\n");
386        error(pc_ - 1, "varint too large");
387      } else if (length == 0) {
388        TRACE("\n");
389        error(pc_, "varint of length 0");
390      } else if (is_signed) {
391        if (length < kMaxLength) {
392          int sign_ext_shift = 8 * sizeof(IntType) - shift;
393          // Perform sign extension.
394          result = (result << sign_ext_shift) >> sign_ext_shift;
395        }
396        TRACE("= %" PRIi64 "\n", static_cast<int64_t>(result));
397      } else {
398        TRACE("= %" PRIu64 "\n", static_cast<uint64_t>(result));
399      }
400      return result;
401    }
402    return traceOffEnd<uint32_t>();
403  }
404};
405
406#undef TRACE
407}  // namespace wasm
408}  // namespace internal
409}  // namespace v8
410
411#endif  // V8_WASM_DECODER_H_
412