1// Copyright 2014 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6//     * Redistributions of source code must retain the above copyright
7//       notice, this list of conditions and the following disclaimer.
8//     * Redistributions in binary form must reproduce the above
9//       copyright notice, this list of conditions and the following
10//       disclaimer in the documentation and/or other materials provided
11//       with the distribution.
12//     * Neither the name of Google Inc. nor the names of its
13//       contributors may be used to endorse or promote products derived
14//       from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#ifndef V8_AST_AST_VALUE_FACTORY_H_
29#define V8_AST_AST_VALUE_FACTORY_H_
30
31#include "src/api.h"
32#include "src/base/hashmap.h"
33#include "src/globals.h"
34#include "src/utils.h"
35
36// AstString, AstValue and AstValueFactory are for storing strings and values
37// independent of the V8 heap and internalizing them later. During parsing,
38// AstStrings and AstValues are created and stored outside the heap, in
39// AstValueFactory. After parsing, the strings and values are internalized
40// (moved into the V8 heap).
41namespace v8 {
42namespace internal {
43
44class AstString : public ZoneObject {
45 public:
46  explicit AstString(bool is_raw)
47      : next_(nullptr), bit_field_(IsRawStringBits::encode(is_raw)) {}
48
49  int length() const;
50  bool IsEmpty() const { return length() == 0; }
51
52  // Puts the string into the V8 heap.
53  void Internalize(Isolate* isolate);
54
55  // This function can be called after internalizing.
56  V8_INLINE Handle<String> string() const {
57    DCHECK_NOT_NULL(string_);
58    return Handle<String>(string_);
59  }
60
61  AstString* next() { return next_; }
62  AstString** next_location() { return &next_; }
63
64 protected:
65  void set_string(Handle<String> string) { string_ = string.location(); }
66  // {string_} is stored as String** instead of a Handle<String> so it can be
67  // stored in a union with {next_}.
68  union {
69    AstString* next_;
70    String** string_;
71  };
72  // Poor-man's virtual dispatch to AstRawString / AstConsString. Takes less
73  // memory.
74  class IsRawStringBits : public BitField<bool, 0, 1> {};
75  int bit_field_;
76};
77
78
79class AstRawString final : public AstString {
80 public:
81  int length() const {
82    if (is_one_byte()) return literal_bytes_.length();
83    return literal_bytes_.length() / 2;
84  }
85
86  int byte_length() const { return literal_bytes_.length(); }
87
88  void Internalize(Isolate* isolate);
89
90  bool AsArrayIndex(uint32_t* index) const;
91
92  // The string is not null-terminated, use length() to find out the length.
93  const unsigned char* raw_data() const {
94    return literal_bytes_.start();
95  }
96
97  bool is_one_byte() const { return IsOneByteBits::decode(bit_field_); }
98
99  bool IsOneByteEqualTo(const char* data) const;
100  uint16_t FirstCharacter() const {
101    if (is_one_byte()) return literal_bytes_[0];
102    const uint16_t* c =
103        reinterpret_cast<const uint16_t*>(literal_bytes_.start());
104    return *c;
105  }
106
107  // For storing AstRawStrings in a hash map.
108  uint32_t hash() const {
109    return hash_;
110  }
111
112 private:
113  friend class AstValueFactory;
114  friend class AstRawStringInternalizationKey;
115
116  AstRawString(bool is_one_byte, const Vector<const byte>& literal_bytes,
117               uint32_t hash)
118      : AstString(true), hash_(hash), literal_bytes_(literal_bytes) {
119    bit_field_ |= IsOneByteBits::encode(is_one_byte);
120  }
121
122  AstRawString() : AstString(true), hash_(0) {
123    bit_field_ |= IsOneByteBits::encode(true);
124  }
125
126  class IsOneByteBits : public BitField<bool, IsRawStringBits::kNext, 1> {};
127
128  uint32_t hash_;
129  // Points to memory owned by Zone.
130  Vector<const byte> literal_bytes_;
131};
132
133
134class AstConsString final : public AstString {
135 public:
136  AstConsString(const AstString* left, const AstString* right)
137      : AstString(false),
138        length_(left->length() + right->length()),
139        left_(left),
140        right_(right) {}
141
142  int length() const { return length_; }
143
144  void Internalize(Isolate* isolate);
145
146 private:
147  const int length_;
148  const AstString* left_;
149  const AstString* right_;
150};
151
152
153// AstValue is either a string, a number, a string array, a boolean, or a
154// special value (null, undefined, the hole).
155class AstValue : public ZoneObject {
156 public:
157  bool IsString() const {
158    return type_ == STRING;
159  }
160
161  bool IsNumber() const {
162    return type_ == NUMBER || type_ == NUMBER_WITH_DOT || type_ == SMI ||
163           type_ == SMI_WITH_DOT;
164  }
165
166  bool ContainsDot() const {
167    return type_ == NUMBER_WITH_DOT || type_ == SMI_WITH_DOT;
168  }
169
170  const AstRawString* AsString() const {
171    CHECK_EQ(STRING, type_);
172    return string_;
173  }
174
175  double AsNumber() const {
176    if (type_ == NUMBER || type_ == NUMBER_WITH_DOT)
177      return number_;
178    if (type_ == SMI || type_ == SMI_WITH_DOT)
179      return smi_;
180    UNREACHABLE();
181    return 0;
182  }
183
184  Smi* AsSmi() const {
185    CHECK(type_ == SMI || type_ == SMI_WITH_DOT);
186    return Smi::FromInt(smi_);
187  }
188
189  bool EqualsString(const AstRawString* string) const {
190    return type_ == STRING && string_ == string;
191  }
192
193  bool IsPropertyName() const;
194
195  bool BooleanValue() const;
196
197  bool IsSmi() const { return type_ == SMI || type_ == SMI_WITH_DOT; }
198  bool IsFalse() const { return type_ == BOOLEAN && !bool_; }
199  bool IsTrue() const { return type_ == BOOLEAN && bool_; }
200  bool IsUndefined() const { return type_ == UNDEFINED; }
201  bool IsTheHole() const { return type_ == THE_HOLE; }
202  bool IsNull() const { return type_ == NULL_TYPE; }
203
204  void Internalize(Isolate* isolate);
205
206  // Can be called after Internalize has been called.
207  V8_INLINE Handle<Object> value() const {
208    if (type_ == STRING) {
209      return string_->string();
210    }
211    DCHECK_NOT_NULL(value_);
212    return Handle<Object>(value_);
213  }
214  AstValue* next() const { return next_; }
215  void set_next(AstValue* next) { next_ = next; }
216
217 private:
218  void set_value(Handle<Object> object) { value_ = object.location(); }
219  friend class AstValueFactory;
220
221  enum Type {
222    STRING,
223    SYMBOL,
224    NUMBER,
225    NUMBER_WITH_DOT,
226    SMI,
227    SMI_WITH_DOT,
228    BOOLEAN,
229    NULL_TYPE,
230    UNDEFINED,
231    THE_HOLE
232  };
233
234  explicit AstValue(const AstRawString* s) : type_(STRING), next_(nullptr) {
235    string_ = s;
236  }
237
238  explicit AstValue(const char* name) : type_(SYMBOL), next_(nullptr) {
239    symbol_name_ = name;
240  }
241
242  explicit AstValue(double n, bool with_dot) : next_(nullptr) {
243    int int_value;
244    if (DoubleToSmiInteger(n, &int_value)) {
245      type_ = with_dot ? SMI_WITH_DOT : SMI;
246      smi_ = int_value;
247    } else {
248      type_ = with_dot ? NUMBER_WITH_DOT : NUMBER;
249      number_ = n;
250    }
251  }
252
253  AstValue(Type t, int i) : type_(t), next_(nullptr) {
254    DCHECK(type_ == SMI);
255    smi_ = i;
256  }
257
258  explicit AstValue(bool b) : type_(BOOLEAN), next_(nullptr) { bool_ = b; }
259
260  explicit AstValue(Type t) : type_(t), next_(nullptr) {
261    DCHECK(t == NULL_TYPE || t == UNDEFINED || t == THE_HOLE);
262  }
263
264  Type type_;
265
266  // {value_} is stored as Object** instead of a Handle<Object> so it can be
267  // stored in a union with {next_}.
268  union {
269    Object** value_;  // if internalized
270    AstValue* next_;  // if !internalized
271  };
272
273  // Uninternalized value.
274  union {
275    const AstRawString* string_;
276    double number_;
277    int smi_;
278    bool bool_;
279    const char* symbol_name_;
280  };
281};
282
283
284// For generating constants.
285#define STRING_CONSTANTS(F)                     \
286  F(anonymous_function, "(anonymous function)") \
287  F(arguments, "arguments")                     \
288  F(async, "async")                             \
289  F(await, "await")                             \
290  F(constructor, "constructor")                 \
291  F(default, "default")                         \
292  F(done, "done")                               \
293  F(dot, ".")                                   \
294  F(dot_class_field_init, ".class-field-init")  \
295  F(dot_for, ".for")                            \
296  F(dot_generator_object, ".generator_object")  \
297  F(dot_iterator, ".iterator")                  \
298  F(dot_result, ".result")                      \
299  F(dot_switch_tag, ".switch_tag")              \
300  F(dot_catch, ".catch")                        \
301  F(empty, "")                                  \
302  F(eval, "eval")                               \
303  F(function, "function")                       \
304  F(get_space, "get ")                          \
305  F(length, "length")                           \
306  F(let, "let")                                 \
307  F(native, "native")                           \
308  F(new_target, ".new.target")                  \
309  F(next, "next")                               \
310  F(proto, "__proto__")                         \
311  F(prototype, "prototype")                     \
312  F(return, "return")                           \
313  F(set_space, "set ")                          \
314  F(star_default_star, "*default*")             \
315  F(this, "this")                               \
316  F(this_function, ".this_function")            \
317  F(throw, "throw")                             \
318  F(undefined, "undefined")                     \
319  F(use_asm, "use asm")                         \
320  F(use_strict, "use strict")                   \
321  F(value, "value")
322
323#define OTHER_CONSTANTS(F) \
324  F(true_value)            \
325  F(false_value)           \
326  F(null_value)            \
327  F(undefined_value)       \
328  F(the_hole_value)
329
330class AstValueFactory {
331 public:
332  AstValueFactory(Zone* zone, uint32_t hash_seed)
333      : string_table_(AstRawStringCompare),
334        values_(nullptr),
335        smis_(),
336        strings_(nullptr),
337        strings_end_(&strings_),
338        zone_(zone),
339        hash_seed_(hash_seed) {
340#define F(name, str) name##_string_ = NULL;
341    STRING_CONSTANTS(F)
342#undef F
343#define F(name) name##_ = NULL;
344    OTHER_CONSTANTS(F)
345#undef F
346    std::fill(smis_, smis_ + arraysize(smis_), nullptr);
347  }
348
349  Zone* zone() const { return zone_; }
350
351  const AstRawString* GetOneByteString(Vector<const uint8_t> literal) {
352    return GetOneByteStringInternal(literal);
353  }
354  const AstRawString* GetOneByteString(const char* string) {
355    return GetOneByteString(Vector<const uint8_t>(
356        reinterpret_cast<const uint8_t*>(string), StrLength(string)));
357  }
358  const AstRawString* GetTwoByteString(Vector<const uint16_t> literal) {
359    return GetTwoByteStringInternal(literal);
360  }
361  const AstRawString* GetString(Handle<String> literal);
362  const AstConsString* NewConsString(const AstString* left,
363                                     const AstString* right);
364  const AstRawString* ConcatStrings(const AstRawString* left,
365                                    const AstRawString* right);
366
367  void Internalize(Isolate* isolate);
368
369#define F(name, str)                                                    \
370  const AstRawString* name##_string() {                                 \
371    if (name##_string_ == NULL) {                                       \
372      const char* data = str;                                           \
373      name##_string_ = GetOneByteString(                                \
374          Vector<const uint8_t>(reinterpret_cast<const uint8_t*>(data), \
375                                static_cast<int>(strlen(data))));       \
376    }                                                                   \
377    return name##_string_;                                              \
378  }
379  STRING_CONSTANTS(F)
380#undef F
381
382  const AstValue* NewString(const AstRawString* string);
383  // A JavaScript symbol (ECMA-262 edition 6).
384  const AstValue* NewSymbol(const char* name);
385  const AstValue* NewNumber(double number, bool with_dot = false);
386  const AstValue* NewSmi(uint32_t number);
387  const AstValue* NewBoolean(bool b);
388  const AstValue* NewStringList(ZoneList<const AstRawString*>* strings);
389  const AstValue* NewNull();
390  const AstValue* NewUndefined();
391  const AstValue* NewTheHole();
392
393 private:
394  static const uint32_t kMaxCachedSmi = 1 << 10;
395
396  STATIC_ASSERT(kMaxCachedSmi <= Smi::kMaxValue);
397
398  AstValue* AddValue(AstValue* value) {
399    value->set_next(values_);
400    values_ = value;
401    return value;
402  }
403  AstString* AddString(AstString* string) {
404    *strings_end_ = string;
405    strings_end_ = string->next_location();
406    return string;
407  }
408  void ResetStrings() {
409    strings_ = nullptr;
410    strings_end_ = &strings_;
411  }
412  V8_EXPORT_PRIVATE AstRawString* GetOneByteStringInternal(
413      Vector<const uint8_t> literal);
414  AstRawString* GetTwoByteStringInternal(Vector<const uint16_t> literal);
415  AstRawString* GetString(uint32_t hash, bool is_one_byte,
416                          Vector<const byte> literal_bytes);
417
418  static bool AstRawStringCompare(void* a, void* b);
419
420  // All strings are copied here, one after another (no NULLs inbetween).
421  base::CustomMatcherHashMap string_table_;
422  // For keeping track of all AstValues and AstRawStrings we've created (so that
423  // they can be internalized later).
424  AstValue* values_;
425
426  AstValue* smis_[kMaxCachedSmi + 1];
427  // We need to keep track of strings_ in order since cons strings require their
428  // members to be internalized first.
429  AstString* strings_;
430  AstString** strings_end_;
431  Zone* zone_;
432
433  uint32_t hash_seed_;
434
435#define F(name, str) const AstRawString* name##_string_;
436  STRING_CONSTANTS(F)
437#undef F
438
439#define F(name) AstValue* name##_;
440  OTHER_CONSTANTS(F)
441#undef F
442};
443}  // namespace internal
444}  // namespace v8
445
446#undef STRING_CONSTANTS
447#undef OTHER_CONSTANTS
448
449#endif  // V8_AST_AST_VALUE_FACTORY_H_
450