1// Copyright 2015 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_WASM_DECODER_H_
6#define V8_WASM_DECODER_H_
7
8#include <memory>
9
10#include "src/base/compiler-specific.h"
11#include "src/flags.h"
12#include "src/signature.h"
13#include "src/utils.h"
14#include "src/wasm/wasm-result.h"
15#include "src/zone/zone-containers.h"
16
17namespace v8 {
18namespace internal {
19namespace wasm {
20
21#if DEBUG
22#define TRACE(...)                                    \
23  do {                                                \
24    if (FLAG_trace_wasm_decoder) PrintF(__VA_ARGS__); \
25  } while (false)
26#else
27#define TRACE(...)
28#endif
29
30// A helper utility to decode bytes, integers, fields, varints, etc, from
31// a buffer of bytes.
32class Decoder {
33 public:
34  Decoder(const byte* start, const byte* end)
35      : start_(start),
36        pc_(start),
37        limit_(end),
38        end_(end),
39        error_pc_(nullptr),
40        error_pt_(nullptr) {}
41
42  virtual ~Decoder() {}
43
44  inline bool check(const byte* base, unsigned offset, unsigned length,
45                    const char* msg) {
46    DCHECK_GE(base, start_);
47    if ((base + offset + length) > limit_) {
48      error(base, base + offset, "%s", msg);
49      return false;
50    }
51    return true;
52  }
53
54  // Reads a single 8-bit byte, reporting an error if out of bounds.
55  inline uint8_t checked_read_u8(const byte* base, unsigned offset,
56                                 const char* msg = "expected 1 byte") {
57    return check(base, offset, 1, msg) ? base[offset] : 0;
58  }
59
60  // Reads 16-bit word, reporting an error if out of bounds.
61  inline uint16_t checked_read_u16(const byte* base, unsigned offset,
62                                   const char* msg = "expected 2 bytes") {
63    return check(base, offset, 2, msg) ? read_u16(base + offset) : 0;
64  }
65
66  // Reads 32-bit word, reporting an error if out of bounds.
67  inline uint32_t checked_read_u32(const byte* base, unsigned offset,
68                                   const char* msg = "expected 4 bytes") {
69    return check(base, offset, 4, msg) ? read_u32(base + offset) : 0;
70  }
71
72  // Reads 64-bit word, reporting an error if out of bounds.
73  inline uint64_t checked_read_u64(const byte* base, unsigned offset,
74                                   const char* msg = "expected 8 bytes") {
75    return check(base, offset, 8, msg) ? read_u64(base + offset) : 0;
76  }
77
78  // Reads a variable-length unsigned integer (little endian).
79  uint32_t checked_read_u32v(const byte* base, unsigned offset,
80                             unsigned* length,
81                             const char* msg = "expected LEB32") {
82    return checked_read_leb<uint32_t, false>(base, offset, length, msg);
83  }
84
85  // Reads a variable-length signed integer (little endian).
86  int32_t checked_read_i32v(const byte* base, unsigned offset, unsigned* length,
87                            const char* msg = "expected SLEB32") {
88    uint32_t result =
89        checked_read_leb<uint32_t, true>(base, offset, length, msg);
90    if (*length == 5) return bit_cast<int32_t>(result);
91    if (*length > 0) {
92      int shift = 32 - 7 * *length;
93      // Perform sign extension.
94      return bit_cast<int32_t>(result << shift) >> shift;
95    }
96    return 0;
97  }
98
99  // Reads a variable-length unsigned integer (little endian).
100  uint64_t checked_read_u64v(const byte* base, unsigned offset,
101                             unsigned* length,
102                             const char* msg = "expected LEB64") {
103    return checked_read_leb<uint64_t, false>(base, offset, length, msg);
104  }
105
106  // Reads a variable-length signed integer (little endian).
107  int64_t checked_read_i64v(const byte* base, unsigned offset, unsigned* length,
108                            const char* msg = "expected SLEB64") {
109    uint64_t result =
110        checked_read_leb<uint64_t, true>(base, offset, length, msg);
111    if (*length == 10) return bit_cast<int64_t>(result);
112    if (*length > 0) {
113      int shift = 64 - 7 * *length;
114      // Perform sign extension.
115      return bit_cast<int64_t>(result << shift) >> shift;
116    }
117    return 0;
118  }
119
120  // Reads a single 16-bit unsigned integer (little endian).
121  inline uint16_t read_u16(const byte* ptr) {
122    DCHECK(ptr >= start_ && (ptr + 2) <= end_);
123    return ReadLittleEndianValue<uint16_t>(ptr);
124  }
125
126  // Reads a single 32-bit unsigned integer (little endian).
127  inline uint32_t read_u32(const byte* ptr) {
128    DCHECK(ptr >= start_ && (ptr + 4) <= end_);
129    return ReadLittleEndianValue<uint32_t>(ptr);
130  }
131
132  // Reads a single 64-bit unsigned integer (little endian).
133  inline uint64_t read_u64(const byte* ptr) {
134    DCHECK(ptr >= start_ && (ptr + 8) <= end_);
135    return ReadLittleEndianValue<uint64_t>(ptr);
136  }
137
138  // Reads a 8-bit unsigned integer (byte) and advances {pc_}.
139  uint8_t consume_u8(const char* name = nullptr) {
140    TRACE("  +%d  %-20s: ", static_cast<int>(pc_ - start_),
141          name ? name : "uint8_t");
142    if (checkAvailable(1)) {
143      byte val = *(pc_++);
144      TRACE("%02x = %d\n", val, val);
145      return val;
146    }
147    return traceOffEnd<uint8_t>();
148  }
149
150  // Reads a 16-bit unsigned integer (little endian) and advances {pc_}.
151  uint16_t consume_u16(const char* name = nullptr) {
152    TRACE("  +%d  %-20s: ", static_cast<int>(pc_ - start_),
153          name ? name : "uint16_t");
154    if (checkAvailable(2)) {
155      uint16_t val = read_u16(pc_);
156      TRACE("%02x %02x = %d\n", pc_[0], pc_[1], val);
157      pc_ += 2;
158      return val;
159    }
160    return traceOffEnd<uint16_t>();
161  }
162
163  // Reads a single 32-bit unsigned integer (little endian) and advances {pc_}.
164  uint32_t consume_u32(const char* name = nullptr) {
165    TRACE("  +%d  %-20s: ", static_cast<int>(pc_ - start_),
166          name ? name : "uint32_t");
167    if (checkAvailable(4)) {
168      uint32_t val = read_u32(pc_);
169      TRACE("%02x %02x %02x %02x = %u\n", pc_[0], pc_[1], pc_[2], pc_[3], val);
170      pc_ += 4;
171      return val;
172    }
173    return traceOffEnd<uint32_t>();
174  }
175
176  // Reads a LEB128 variable-length unsigned 32-bit integer and advances {pc_}.
177  uint32_t consume_u32v(const char* name = nullptr) {
178    return consume_leb<uint32_t, false>(name);
179  }
180
181  // Reads a LEB128 variable-length signed 32-bit integer and advances {pc_}.
182  int32_t consume_i32v(const char* name = nullptr) {
183    return consume_leb<int32_t, true>(name);
184  }
185
186  // Consume {size} bytes and send them to the bit bucket, advancing {pc_}.
187  void consume_bytes(uint32_t size, const char* name = "skip") {
188    TRACE("  +%d  %-20s: %d bytes\n", static_cast<int>(pc_ - start_), name,
189          size);
190    if (checkAvailable(size)) {
191      pc_ += size;
192    } else {
193      pc_ = limit_;
194    }
195  }
196
197  // Check that at least {size} bytes exist between {pc_} and {limit_}.
198  bool checkAvailable(int size) {
199    intptr_t pc_overflow_value = std::numeric_limits<intptr_t>::max() - size;
200    if (size < 0 || (intptr_t)pc_ > pc_overflow_value) {
201      error(pc_, nullptr, "reading %d bytes would underflow/overflow", size);
202      return false;
203    } else if (pc_ < start_ || limit_ < (pc_ + size)) {
204      error(pc_, nullptr, "expected %d bytes, fell off end", size);
205      return false;
206    } else {
207      return true;
208    }
209  }
210
211  void error(const char* msg) { error(pc_, nullptr, "%s", msg); }
212
213  void error(const byte* pc, const char* msg) { error(pc, nullptr, "%s", msg); }
214
215  // Sets internal error state.
216  void PRINTF_FORMAT(4, 5)
217      error(const byte* pc, const byte* pt, const char* format, ...) {
218    if (ok()) {
219#if DEBUG
220      if (FLAG_wasm_break_on_decoder_error) {
221        base::OS::DebugBreak();
222      }
223#endif
224      const int kMaxErrorMsg = 256;
225      char* buffer = new char[kMaxErrorMsg];
226      va_list arguments;
227      va_start(arguments, format);
228      base::OS::VSNPrintF(buffer, kMaxErrorMsg - 1, format, arguments);
229      va_end(arguments);
230      error_msg_.reset(buffer);
231      error_pc_ = pc;
232      error_pt_ = pt;
233      onFirstError();
234    }
235  }
236
237  // Behavior triggered on first error, overridden in subclasses.
238  virtual void onFirstError() {}
239
240  // Debugging helper to print bytes up to the end.
241  template <typename T>
242  T traceOffEnd() {
243    T t = 0;
244    for (const byte* ptr = pc_; ptr < limit_; ptr++) {
245      TRACE("%02x ", *ptr);
246    }
247    TRACE("<end>\n");
248    pc_ = limit_;
249    return t;
250  }
251
252  // Converts the given value to a {Result}, copying the error if necessary.
253  template <typename T>
254  Result<T> toResult(T val) {
255    Result<T> result;
256    if (failed()) {
257      TRACE("Result error: %s\n", error_msg_.get());
258      result.error_code = kError;
259      result.start = start_;
260      result.error_pc = error_pc_;
261      result.error_pt = error_pt_;
262      // transfer ownership of the error to the result.
263      result.error_msg.reset(error_msg_.release());
264    } else {
265      result.error_code = kSuccess;
266    }
267    result.val = std::move(val);
268    return result;
269  }
270
271  // Resets the boundaries of this decoder.
272  void Reset(const byte* start, const byte* end) {
273    start_ = start;
274    pc_ = start;
275    limit_ = end;
276    end_ = end;
277    error_pc_ = nullptr;
278    error_pt_ = nullptr;
279    error_msg_.reset();
280  }
281
282  bool ok() const { return error_msg_ == nullptr; }
283  bool failed() const { return !ok(); }
284  bool more() const { return pc_ < limit_; }
285
286  const byte* start() { return start_; }
287  const byte* pc() { return pc_; }
288  uint32_t pc_offset() { return static_cast<uint32_t>(pc_ - start_); }
289
290 protected:
291  const byte* start_;
292  const byte* pc_;
293  const byte* limit_;
294  const byte* end_;
295  const byte* error_pc_;
296  const byte* error_pt_;
297  std::unique_ptr<char[]> error_msg_;
298
299 private:
300  template <typename IntType, bool is_signed>
301  IntType checked_read_leb(const byte* base, unsigned offset, unsigned* length,
302                           const char* msg) {
303    if (!check(base, offset, 1, msg)) {
304      *length = 0;
305      return 0;
306    }
307
308    const int kMaxLength = (sizeof(IntType) * 8 + 6) / 7;
309    const byte* ptr = base + offset;
310    const byte* end = ptr + kMaxLength;
311    if (end > limit_) end = limit_;
312    int shift = 0;
313    byte b = 0;
314    IntType result = 0;
315    while (ptr < end) {
316      b = *ptr++;
317      result = result | (static_cast<IntType>(b & 0x7F) << shift);
318      if ((b & 0x80) == 0) break;
319      shift += 7;
320    }
321    DCHECK_LE(ptr - (base + offset), kMaxLength);
322    *length = static_cast<unsigned>(ptr - (base + offset));
323    if (ptr == end) {
324      // Check there are no bits set beyond the bitwidth of {IntType}.
325      const int kExtraBits = (1 + kMaxLength * 7) - (sizeof(IntType) * 8);
326      const byte kExtraBitsMask =
327          static_cast<byte>((0xFF << (8 - kExtraBits)) & 0xFF);
328      int extra_bits_value;
329      if (is_signed) {
330        // A signed-LEB128 must sign-extend the final byte, excluding its
331        // most-signifcant bit. e.g. for a 32-bit LEB128:
332        //   kExtraBits = 4
333        //   kExtraBitsMask = 0xf0
334        // If b is 0x0f, the value is negative, so extra_bits_value is 0x70.
335        // If b is 0x03, the value is positive, so extra_bits_value is 0x00.
336        extra_bits_value = (static_cast<int8_t>(b << kExtraBits) >> 8) &
337                           kExtraBitsMask & ~0x80;
338      } else {
339        extra_bits_value = 0;
340      }
341      if (*length == kMaxLength && (b & kExtraBitsMask) != extra_bits_value) {
342        error(base, ptr, "extra bits in varint");
343        return 0;
344      }
345      if ((b & 0x80) != 0) {
346        error(base, ptr, "%s", msg);
347        return 0;
348      }
349    }
350    return result;
351  }
352
353  template <typename IntType, bool is_signed>
354  IntType consume_leb(const char* name = nullptr) {
355    TRACE("  +%d  %-20s: ", static_cast<int>(pc_ - start_),
356          name ? name : "varint");
357    if (checkAvailable(1)) {
358      const int kMaxLength = (sizeof(IntType) * 8 + 6) / 7;
359      const byte* pos = pc_;
360      const byte* end = pc_ + kMaxLength;
361      if (end > limit_) end = limit_;
362
363      IntType result = 0;
364      int shift = 0;
365      byte b = 0;
366      while (pc_ < end) {
367        b = *pc_++;
368        TRACE("%02x ", b);
369        result = result | (static_cast<IntType>(b & 0x7F) << shift);
370        shift += 7;
371        if ((b & 0x80) == 0) break;
372      }
373
374      int length = static_cast<int>(pc_ - pos);
375      if (pc_ == end && (b & 0x80)) {
376        error(pc_ - 1, "varint too large");
377      } else if (length == 0) {
378        error(pc_, "varint of length 0");
379      } else if (is_signed) {
380        if (length < kMaxLength) {
381          int sign_ext_shift = 8 * sizeof(IntType) - shift;
382          // Perform sign extension.
383          result = (result << sign_ext_shift) >> sign_ext_shift;
384        }
385        TRACE("= %" PRIi64 "\n", static_cast<int64_t>(result));
386      } else {
387        TRACE("= %" PRIu64 "\n", static_cast<uint64_t>(result));
388      }
389      return result;
390    }
391    return traceOffEnd<uint32_t>();
392  }
393};
394
395#undef TRACE
396}  // namespace wasm
397}  // namespace internal
398}  // namespace v8
399
400#endif  // V8_WASM_DECODER_H_
401