1// Copyright 2014 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6//     * Redistributions of source code must retain the above copyright
7//       notice, this list of conditions and the following disclaimer.
8//     * Redistributions in binary form must reproduce the above
9//       copyright notice, this list of conditions and the following
10//       disclaimer in the documentation and/or other materials provided
11//       with the distribution.
12//     * Neither the name of Google Inc. nor the names of its
13//       contributors may be used to endorse or promote products derived
14//       from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#ifndef V8_AST_AST_VALUE_FACTORY_H_
29#define V8_AST_AST_VALUE_FACTORY_H_
30
31#include "src/base/hashmap.h"
32#include "src/conversions.h"
33#include "src/factory.h"
34#include "src/globals.h"
35#include "src/isolate.h"
36#include "src/utils.h"
37
38// AstString, AstValue and AstValueFactory are for storing strings and values
39// independent of the V8 heap and internalizing them later. During parsing,
40// AstStrings and AstValues are created and stored outside the heap, in
41// AstValueFactory. After parsing, the strings and values are internalized
42// (moved into the V8 heap).
43namespace v8 {
44namespace internal {
45
46class AstString : public ZoneObject {
47 public:
48  explicit AstString(bool is_raw)
49      : next_(nullptr), bit_field_(IsRawStringBits::encode(is_raw)) {}
50
51  int length() const;
52  bool IsEmpty() const { return length() == 0; }
53
54  // Puts the string into the V8 heap.
55  void Internalize(Isolate* isolate);
56
57  // This function can be called after internalizing.
58  V8_INLINE Handle<String> string() const {
59    DCHECK_NOT_NULL(string_);
60    return Handle<String>(string_);
61  }
62
63  AstString* next() { return next_; }
64  AstString** next_location() { return &next_; }
65
66 protected:
67  void set_string(Handle<String> string) { string_ = string.location(); }
68  // {string_} is stored as String** instead of a Handle<String> so it can be
69  // stored in a union with {next_}.
70  union {
71    AstString* next_;
72    String** string_;
73  };
74  // Poor-man's virtual dispatch to AstRawString / AstConsString. Takes less
75  // memory.
76  class IsRawStringBits : public BitField<bool, 0, 1> {};
77  int bit_field_;
78};
79
80
81class AstRawString final : public AstString {
82 public:
83  int length() const {
84    if (is_one_byte()) return literal_bytes_.length();
85    return literal_bytes_.length() / 2;
86  }
87
88  int byte_length() const { return literal_bytes_.length(); }
89
90  void Internalize(Isolate* isolate);
91
92  bool AsArrayIndex(uint32_t* index) const;
93
94  // The string is not null-terminated, use length() to find out the length.
95  const unsigned char* raw_data() const {
96    return literal_bytes_.start();
97  }
98
99  bool is_one_byte() const { return IsOneByteBits::decode(bit_field_); }
100
101  bool IsOneByteEqualTo(const char* data) const;
102  uint16_t FirstCharacter() const {
103    if (is_one_byte()) return literal_bytes_[0];
104    const uint16_t* c =
105        reinterpret_cast<const uint16_t*>(literal_bytes_.start());
106    return *c;
107  }
108
109  static bool Compare(void* a, void* b);
110
111  // For storing AstRawStrings in a hash map.
112  uint32_t hash() const {
113    return hash_;
114  }
115
116 private:
117  friend class AstRawStringInternalizationKey;
118  friend class AstStringConstants;
119  friend class AstValueFactory;
120
121  AstRawString(bool is_one_byte, const Vector<const byte>& literal_bytes,
122               uint32_t hash)
123      : AstString(true), hash_(hash), literal_bytes_(literal_bytes) {
124    bit_field_ |= IsOneByteBits::encode(is_one_byte);
125  }
126
127  AstRawString() : AstString(true), hash_(0) {
128    bit_field_ |= IsOneByteBits::encode(true);
129  }
130
131  class IsOneByteBits : public BitField<bool, IsRawStringBits::kNext, 1> {};
132
133  uint32_t hash_;
134  // Points to memory owned by Zone.
135  Vector<const byte> literal_bytes_;
136};
137
138
139class AstConsString final : public AstString {
140 public:
141  AstConsString(const AstString* left, const AstString* right)
142      : AstString(false),
143        length_(left->length() + right->length()),
144        left_(left),
145        right_(right) {}
146
147  int length() const { return length_; }
148
149  void Internalize(Isolate* isolate);
150
151 private:
152  const int length_;
153  const AstString* left_;
154  const AstString* right_;
155};
156
157enum class AstSymbol : uint8_t { kHomeObjectSymbol };
158
159// AstValue is either a string, a symbol, a number, a string array, a boolean,
160// or a special value (null, undefined, the hole).
161class AstValue : public ZoneObject {
162 public:
163  bool IsString() const {
164    return type_ == STRING;
165  }
166
167  bool IsSymbol() const { return type_ == SYMBOL; }
168
169  bool IsNumber() const { return IsSmi() || IsHeapNumber(); }
170
171  bool ContainsDot() const {
172    return type_ == NUMBER_WITH_DOT || type_ == SMI_WITH_DOT;
173  }
174
175  const AstRawString* AsString() const {
176    CHECK_EQ(STRING, type_);
177    return string_;
178  }
179
180  AstSymbol AsSymbol() const {
181    CHECK_EQ(SYMBOL, type_);
182    return symbol_;
183  }
184
185  double AsNumber() const {
186    if (IsHeapNumber()) return number_;
187    if (IsSmi()) return smi_;
188    UNREACHABLE();
189    return 0;
190  }
191
192  Smi* AsSmi() const {
193    CHECK(IsSmi());
194    return Smi::FromInt(smi_);
195  }
196
197  bool ToUint32(uint32_t* value) const {
198    if (IsSmi()) {
199      int num = smi_;
200      if (num < 0) return false;
201      *value = static_cast<uint32_t>(num);
202      return true;
203    }
204    if (IsHeapNumber()) {
205      return DoubleToUint32IfEqualToSelf(number_, value);
206    }
207    return false;
208  }
209
210  bool EqualsString(const AstRawString* string) const {
211    return type_ == STRING && string_ == string;
212  }
213
214  bool IsPropertyName() const;
215
216  bool BooleanValue() const;
217
218  bool IsSmi() const { return type_ == SMI || type_ == SMI_WITH_DOT; }
219  bool IsHeapNumber() const {
220    return type_ == NUMBER || type_ == NUMBER_WITH_DOT;
221  }
222  bool IsFalse() const { return type_ == BOOLEAN && !bool_; }
223  bool IsTrue() const { return type_ == BOOLEAN && bool_; }
224  bool IsUndefined() const { return type_ == UNDEFINED; }
225  bool IsTheHole() const { return type_ == THE_HOLE; }
226  bool IsNull() const { return type_ == NULL_TYPE; }
227
228  void Internalize(Isolate* isolate);
229
230  // Can be called after Internalize has been called.
231  V8_INLINE Handle<Object> value() const {
232    if (type_ == STRING) {
233      return string_->string();
234    }
235    DCHECK_NOT_NULL(value_);
236    return Handle<Object>(value_);
237  }
238  AstValue* next() const { return next_; }
239  void set_next(AstValue* next) { next_ = next; }
240
241 private:
242  void set_value(Handle<Object> object) { value_ = object.location(); }
243  friend class AstValueFactory;
244
245  enum Type {
246    STRING,
247    SYMBOL,
248    NUMBER,
249    NUMBER_WITH_DOT,
250    SMI,
251    SMI_WITH_DOT,
252    BOOLEAN,
253    NULL_TYPE,
254    UNDEFINED,
255    THE_HOLE
256  };
257
258  explicit AstValue(const AstRawString* s) : type_(STRING), next_(nullptr) {
259    string_ = s;
260  }
261
262  explicit AstValue(AstSymbol symbol) : type_(SYMBOL), next_(nullptr) {
263    symbol_ = symbol;
264  }
265
266  explicit AstValue(double n, bool with_dot) : next_(nullptr) {
267    int int_value;
268    if (DoubleToSmiInteger(n, &int_value)) {
269      type_ = with_dot ? SMI_WITH_DOT : SMI;
270      smi_ = int_value;
271    } else {
272      type_ = with_dot ? NUMBER_WITH_DOT : NUMBER;
273      number_ = n;
274    }
275  }
276
277  AstValue(Type t, int i) : type_(t), next_(nullptr) {
278    DCHECK(type_ == SMI);
279    smi_ = i;
280  }
281
282  explicit AstValue(bool b) : type_(BOOLEAN), next_(nullptr) { bool_ = b; }
283
284  explicit AstValue(Type t) : type_(t), next_(nullptr) {
285    DCHECK(t == NULL_TYPE || t == UNDEFINED || t == THE_HOLE);
286  }
287
288  Type type_;
289
290  // {value_} is stored as Object** instead of a Handle<Object> so it can be
291  // stored in a union with {next_}.
292  union {
293    Object** value_;  // if internalized
294    AstValue* next_;  // if !internalized
295  };
296
297  // Uninternalized value.
298  union {
299    const AstRawString* string_;
300    double number_;
301    int smi_;
302    bool bool_;
303    AstSymbol symbol_;
304  };
305};
306
307// For generating constants.
308#define STRING_CONSTANTS(F)                     \
309  F(anonymous_function, "(anonymous function)") \
310  F(arguments, "arguments")                     \
311  F(async, "async")                             \
312  F(await, "await")                             \
313  F(constructor, "constructor")                 \
314  F(default, "default")                         \
315  F(done, "done")                               \
316  F(dot, ".")                                   \
317  F(dot_for, ".for")                            \
318  F(dot_generator_object, ".generator_object")  \
319  F(dot_iterator, ".iterator")                  \
320  F(dot_result, ".result")                      \
321  F(dot_switch_tag, ".switch_tag")              \
322  F(dot_catch, ".catch")                        \
323  F(empty, "")                                  \
324  F(eval, "eval")                               \
325  F(function, "function")                       \
326  F(get_space, "get ")                          \
327  F(length, "length")                           \
328  F(let, "let")                                 \
329  F(name, "name")                               \
330  F(native, "native")                           \
331  F(new_target, ".new.target")                  \
332  F(next, "next")                               \
333  F(proto, "__proto__")                         \
334  F(prototype, "prototype")                     \
335  F(return, "return")                           \
336  F(set_space, "set ")                          \
337  F(star_default_star, "*default*")             \
338  F(this, "this")                               \
339  F(this_function, ".this_function")            \
340  F(throw, "throw")                             \
341  F(undefined, "undefined")                     \
342  F(use_asm, "use asm")                         \
343  F(use_strict, "use strict")                   \
344  F(value, "value")
345
346class AstStringConstants final {
347 public:
348  AstStringConstants(Isolate* isolate, uint32_t hash_seed)
349      : zone_(isolate->allocator(), ZONE_NAME),
350        string_table_(AstRawString::Compare),
351        hash_seed_(hash_seed) {
352    DCHECK(ThreadId::Current().Equals(isolate->thread_id()));
353#define F(name, str)                                                      \
354  {                                                                       \
355    const char* data = str;                                               \
356    Vector<const uint8_t> literal(reinterpret_cast<const uint8_t*>(data), \
357                                  static_cast<int>(strlen(data)));        \
358    uint32_t hash = StringHasher::HashSequentialString<uint8_t>(          \
359        literal.start(), literal.length(), hash_seed_);                   \
360    name##_string_ = new (&zone_) AstRawString(true, literal, hash);      \
361    /* The Handle returned by the factory is located on the roots */      \
362    /* array, not on the temporary HandleScope, so this is safe.  */      \
363    name##_string_->set_string(isolate->factory()->name##_string());      \
364    base::HashMap::Entry* entry =                                         \
365        string_table_.InsertNew(name##_string_, name##_string_->hash());  \
366    DCHECK(entry->value == nullptr);                                      \
367    entry->value = reinterpret_cast<void*>(1);                            \
368  }
369    STRING_CONSTANTS(F)
370#undef F
371  }
372
373#define F(name, str) \
374  const AstRawString* name##_string() const { return name##_string_; }
375  STRING_CONSTANTS(F)
376#undef F
377
378  uint32_t hash_seed() const { return hash_seed_; }
379  const base::CustomMatcherHashMap* string_table() const {
380    return &string_table_;
381  }
382
383 private:
384  Zone zone_;
385  base::CustomMatcherHashMap string_table_;
386  uint32_t hash_seed_;
387
388#define F(name, str) AstRawString* name##_string_;
389  STRING_CONSTANTS(F)
390#undef F
391
392  DISALLOW_COPY_AND_ASSIGN(AstStringConstants);
393};
394
395#define OTHER_CONSTANTS(F) \
396  F(true_value)            \
397  F(false_value)           \
398  F(null_value)            \
399  F(undefined_value)       \
400  F(the_hole_value)
401
402class AstValueFactory {
403 public:
404  AstValueFactory(Zone* zone, const AstStringConstants* string_constants,
405                  uint32_t hash_seed)
406      : string_table_(string_constants->string_table()),
407        values_(nullptr),
408        strings_(nullptr),
409        strings_end_(&strings_),
410        string_constants_(string_constants),
411        zone_(zone),
412        hash_seed_(hash_seed) {
413#define F(name) name##_ = nullptr;
414    OTHER_CONSTANTS(F)
415#undef F
416    DCHECK_EQ(hash_seed, string_constants->hash_seed());
417    std::fill(smis_, smis_ + arraysize(smis_), nullptr);
418    std::fill(one_character_strings_,
419              one_character_strings_ + arraysize(one_character_strings_),
420              nullptr);
421  }
422
423  Zone* zone() const { return zone_; }
424
425  const AstRawString* GetOneByteString(Vector<const uint8_t> literal) {
426    return GetOneByteStringInternal(literal);
427  }
428  const AstRawString* GetOneByteString(const char* string) {
429    return GetOneByteString(Vector<const uint8_t>(
430        reinterpret_cast<const uint8_t*>(string), StrLength(string)));
431  }
432  const AstRawString* GetTwoByteString(Vector<const uint16_t> literal) {
433    return GetTwoByteStringInternal(literal);
434  }
435  const AstRawString* GetString(Handle<String> literal);
436  const AstConsString* NewConsString(const AstString* left,
437                                     const AstString* right);
438
439  V8_EXPORT_PRIVATE void Internalize(Isolate* isolate);
440
441#define F(name, str)                           \
442  const AstRawString* name##_string() {        \
443    return string_constants_->name##_string(); \
444  }
445  STRING_CONSTANTS(F)
446#undef F
447
448  V8_EXPORT_PRIVATE const AstValue* NewString(const AstRawString* string);
449  // A JavaScript symbol (ECMA-262 edition 6).
450  const AstValue* NewSymbol(AstSymbol symbol);
451  V8_EXPORT_PRIVATE const AstValue* NewNumber(double number,
452                                              bool with_dot = false);
453  const AstValue* NewSmi(uint32_t number);
454  const AstValue* NewBoolean(bool b);
455  const AstValue* NewStringList(ZoneList<const AstRawString*>* strings);
456  const AstValue* NewNull();
457  const AstValue* NewUndefined();
458  const AstValue* NewTheHole();
459
460 private:
461  static const uint32_t kMaxCachedSmi = 1 << 10;
462
463  STATIC_ASSERT(kMaxCachedSmi <= Smi::kMaxValue);
464
465  AstValue* AddValue(AstValue* value) {
466    value->set_next(values_);
467    values_ = value;
468    return value;
469  }
470  AstString* AddString(AstString* string) {
471    *strings_end_ = string;
472    strings_end_ = string->next_location();
473    return string;
474  }
475  void ResetStrings() {
476    strings_ = nullptr;
477    strings_end_ = &strings_;
478  }
479  V8_EXPORT_PRIVATE AstRawString* GetOneByteStringInternal(
480      Vector<const uint8_t> literal);
481  AstRawString* GetTwoByteStringInternal(Vector<const uint16_t> literal);
482  AstRawString* GetString(uint32_t hash, bool is_one_byte,
483                          Vector<const byte> literal_bytes);
484
485  // All strings are copied here, one after another (no NULLs inbetween).
486  base::CustomMatcherHashMap string_table_;
487  // For keeping track of all AstValues and AstRawStrings we've created (so that
488  // they can be internalized later).
489  AstValue* values_;
490
491  // We need to keep track of strings_ in order since cons strings require their
492  // members to be internalized first.
493  AstString* strings_;
494  AstString** strings_end_;
495
496  // Holds constant string values which are shared across the isolate.
497  const AstStringConstants* string_constants_;
498
499  // Caches for faster access: small numbers, one character lowercase strings
500  // (for minified code).
501  AstValue* smis_[kMaxCachedSmi + 1];
502  AstRawString* one_character_strings_[26];
503
504  Zone* zone_;
505
506  uint32_t hash_seed_;
507
508#define F(name) AstValue* name##_;
509  OTHER_CONSTANTS(F)
510#undef F
511};
512}  // namespace internal
513}  // namespace v8
514
515#undef STRING_CONSTANTS
516#undef OTHER_CONSTANTS
517
518#endif  // V8_AST_AST_VALUE_FACTORY_H_
519