ast-value-factory.h revision f3b273f5e6ffd2f6ba1c18a27a17db41dfb113c3
1// Copyright 2014 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6//     * Redistributions of source code must retain the above copyright
7//       notice, this list of conditions and the following disclaimer.
8//     * Redistributions in binary form must reproduce the above
9//       copyright notice, this list of conditions and the following
10//       disclaimer in the documentation and/or other materials provided
11//       with the distribution.
12//     * Neither the name of Google Inc. nor the names of its
13//       contributors may be used to endorse or promote products derived
14//       from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#ifndef V8_AST_AST_VALUE_FACTORY_H_
29#define V8_AST_AST_VALUE_FACTORY_H_
30
31#include "src/api.h"
32#include "src/base/hashmap.h"
33#include "src/utils.h"
34
35// AstString, AstValue and AstValueFactory are for storing strings and values
36// independent of the V8 heap and internalizing them later. During parsing,
37// AstStrings and AstValues are created and stored outside the heap, in
38// AstValueFactory. After parsing, the strings and values are internalized
39// (moved into the V8 heap).
40namespace v8 {
41namespace internal {
42
43class AstString : public ZoneObject {
44 public:
45  explicit AstString(bool is_raw)
46      : next_(nullptr), bit_field_(IsRawStringBits::encode(is_raw)) {}
47
48  int length() const;
49  bool IsEmpty() const { return length() == 0; }
50
51  // Puts the string into the V8 heap.
52  void Internalize(Isolate* isolate);
53
54  // This function can be called after internalizing.
55  V8_INLINE Handle<String> string() const {
56    DCHECK(!string_.is_null());
57    return string_;
58  }
59
60  AstString** next_location() { return &next_; }
61  AstString* next() const { return next_; }
62
63 protected:
64  // Handle<String>::null() until internalized.
65  Handle<String> string_;
66  AstString* next_;
67  // Poor-man's virtual dispatch to AstRawString / AstConsString. Takes less
68  // memory.
69  class IsRawStringBits : public BitField<bool, 0, 1> {};
70  int bit_field_;
71};
72
73
74class AstRawString final : public AstString {
75 public:
76  int length() const {
77    if (is_one_byte()) return literal_bytes_.length();
78    return literal_bytes_.length() / 2;
79  }
80
81  int byte_length() const { return literal_bytes_.length(); }
82
83  void Internalize(Isolate* isolate);
84
85  bool AsArrayIndex(uint32_t* index) const;
86
87  // The string is not null-terminated, use length() to find out the length.
88  const unsigned char* raw_data() const {
89    return literal_bytes_.start();
90  }
91
92  bool is_one_byte() const { return IsOneByteBits::decode(bit_field_); }
93
94  bool IsOneByteEqualTo(const char* data) const;
95  uint16_t FirstCharacter() const {
96    if (is_one_byte()) return literal_bytes_[0];
97    const uint16_t* c =
98        reinterpret_cast<const uint16_t*>(literal_bytes_.start());
99    return *c;
100  }
101
102  // For storing AstRawStrings in a hash map.
103  uint32_t hash() const {
104    return hash_;
105  }
106
107 private:
108  friend class AstValueFactory;
109  friend class AstRawStringInternalizationKey;
110
111  AstRawString(bool is_one_byte, const Vector<const byte>& literal_bytes,
112               uint32_t hash)
113      : AstString(true), hash_(hash), literal_bytes_(literal_bytes) {
114    bit_field_ |= IsOneByteBits::encode(is_one_byte);
115  }
116
117  AstRawString() : AstString(true), hash_(0) {
118    bit_field_ |= IsOneByteBits::encode(true);
119  }
120
121  class IsOneByteBits : public BitField<bool, IsRawStringBits::kNext, 1> {};
122
123  uint32_t hash_;
124  // Points to memory owned by Zone.
125  Vector<const byte> literal_bytes_;
126};
127
128
129class AstConsString final : public AstString {
130 public:
131  AstConsString(const AstString* left, const AstString* right)
132      : AstString(false),
133        length_(left->length() + right->length()),
134        left_(left),
135        right_(right) {}
136
137  int length() const { return length_; }
138
139  void Internalize(Isolate* isolate);
140
141 private:
142  const int length_;
143  const AstString* left_;
144  const AstString* right_;
145};
146
147
148// AstValue is either a string, a number, a string array, a boolean, or a
149// special value (null, undefined, the hole).
150class AstValue : public ZoneObject {
151 public:
152  bool IsString() const {
153    return type_ == STRING;
154  }
155
156  bool IsNumber() const {
157    return type_ == NUMBER || type_ == NUMBER_WITH_DOT || type_ == SMI ||
158           type_ == SMI_WITH_DOT;
159  }
160
161  bool ContainsDot() const {
162    return type_ == NUMBER_WITH_DOT || type_ == SMI_WITH_DOT;
163  }
164
165  const AstRawString* AsString() const {
166    CHECK_EQ(STRING, type_);
167    return string_;
168  }
169
170  double AsNumber() const {
171    if (type_ == NUMBER || type_ == NUMBER_WITH_DOT)
172      return number_;
173    if (type_ == SMI || type_ == SMI_WITH_DOT)
174      return smi_;
175    UNREACHABLE();
176    return 0;
177  }
178
179  Smi* AsSmi() const {
180    CHECK(type_ == SMI || type_ == SMI_WITH_DOT);
181    return Smi::FromInt(smi_);
182  }
183
184  bool EqualsString(const AstRawString* string) const {
185    return type_ == STRING && string_ == string;
186  }
187
188  bool IsPropertyName() const;
189
190  bool BooleanValue() const;
191
192  bool IsSmi() const { return type_ == SMI || type_ == SMI_WITH_DOT; }
193  bool IsFalse() const { return type_ == BOOLEAN && !bool_; }
194  bool IsTrue() const { return type_ == BOOLEAN && bool_; }
195  bool IsUndefined() const { return type_ == UNDEFINED; }
196  bool IsTheHole() const { return type_ == THE_HOLE; }
197  bool IsNull() const { return type_ == NULL_TYPE; }
198
199  void Internalize(Isolate* isolate);
200
201  // Can be called after Internalize has been called.
202  V8_INLINE Handle<Object> value() const {
203    if (type_ == STRING) {
204      return string_->string();
205    }
206    DCHECK(!value_.is_null());
207    return value_;
208  }
209  AstValue* next() const { return next_; }
210  void set_next(AstValue* next) { next_ = next; }
211
212 private:
213  friend class AstValueFactory;
214
215  enum Type {
216    STRING,
217    SYMBOL,
218    NUMBER,
219    NUMBER_WITH_DOT,
220    SMI,
221    SMI_WITH_DOT,
222    BOOLEAN,
223    NULL_TYPE,
224    UNDEFINED,
225    THE_HOLE
226  };
227
228  explicit AstValue(const AstRawString* s) : type_(STRING), next_(nullptr) {
229    string_ = s;
230  }
231
232  explicit AstValue(const char* name) : type_(SYMBOL), next_(nullptr) {
233    symbol_name_ = name;
234  }
235
236  explicit AstValue(double n, bool with_dot) : next_(nullptr) {
237    int int_value;
238    if (DoubleToSmiInteger(n, &int_value)) {
239      type_ = with_dot ? SMI_WITH_DOT : SMI;
240      smi_ = int_value;
241    } else {
242      type_ = with_dot ? NUMBER_WITH_DOT : NUMBER;
243      number_ = n;
244    }
245  }
246
247  AstValue(Type t, int i) : type_(t), next_(nullptr) {
248    DCHECK(type_ == SMI);
249    smi_ = i;
250  }
251
252  explicit AstValue(bool b) : type_(BOOLEAN), next_(nullptr) { bool_ = b; }
253
254  explicit AstValue(Type t) : type_(t), next_(nullptr) {
255    DCHECK(t == NULL_TYPE || t == UNDEFINED || t == THE_HOLE);
256  }
257
258  Type type_;
259
260  // Uninternalized value.
261  union {
262    const AstRawString* string_;
263    double number_;
264    int smi_;
265    bool bool_;
266    const AstRawString* strings_;
267    const char* symbol_name_;
268  };
269
270  // Handle<String>::null() until internalized.
271  Handle<Object> value_;
272  AstValue* next_;
273};
274
275
276// For generating constants.
277#define STRING_CONSTANTS(F)                     \
278  F(anonymous_function, "(anonymous function)") \
279  F(arguments, "arguments")                     \
280  F(async, "async")                             \
281  F(await, "await")                             \
282  F(constructor, "constructor")                 \
283  F(default, "default")                         \
284  F(done, "done")                               \
285  F(dot, ".")                                   \
286  F(dot_class_field_init, ".class-field-init")  \
287  F(dot_for, ".for")                            \
288  F(dot_generator_object, ".generator_object")  \
289  F(dot_iterator, ".iterator")                  \
290  F(dot_result, ".result")                      \
291  F(dot_switch_tag, ".switch_tag")              \
292  F(dot_catch, ".catch")                        \
293  F(empty, "")                                  \
294  F(eval, "eval")                               \
295  F(function, "function")                       \
296  F(get_space, "get ")                          \
297  F(length, "length")                           \
298  F(let, "let")                                 \
299  F(native, "native")                           \
300  F(new_target, ".new.target")                  \
301  F(next, "next")                               \
302  F(proto, "__proto__")                         \
303  F(prototype, "prototype")                     \
304  F(return, "return")                           \
305  F(set_space, "set ")                          \
306  F(star_default_star, "*default*")             \
307  F(this, "this")                               \
308  F(this_function, ".this_function")            \
309  F(throw, "throw")                             \
310  F(undefined, "undefined")                     \
311  F(use_asm, "use asm")                         \
312  F(use_strict, "use strict")                   \
313  F(value, "value")
314
315#define OTHER_CONSTANTS(F) \
316  F(true_value)            \
317  F(false_value)           \
318  F(null_value)            \
319  F(undefined_value)       \
320  F(the_hole_value)
321
322class AstValueFactory {
323 public:
324  AstValueFactory(Zone* zone, uint32_t hash_seed)
325      : string_table_(AstRawStringCompare),
326        values_(nullptr),
327        strings_end_(&strings_),
328        zone_(zone),
329        hash_seed_(hash_seed) {
330    ResetStrings();
331#define F(name, str) name##_string_ = NULL;
332    STRING_CONSTANTS(F)
333#undef F
334#define F(name) name##_ = NULL;
335    OTHER_CONSTANTS(F)
336#undef F
337  }
338
339  Zone* zone() const { return zone_; }
340
341  const AstRawString* GetOneByteString(Vector<const uint8_t> literal) {
342    return GetOneByteStringInternal(literal);
343  }
344  const AstRawString* GetOneByteString(const char* string) {
345    return GetOneByteString(Vector<const uint8_t>(
346        reinterpret_cast<const uint8_t*>(string), StrLength(string)));
347  }
348  const AstRawString* GetTwoByteString(Vector<const uint16_t> literal) {
349    return GetTwoByteStringInternal(literal);
350  }
351  const AstRawString* GetString(Handle<String> literal);
352  const AstConsString* NewConsString(const AstString* left,
353                                     const AstString* right);
354  const AstRawString* ConcatStrings(const AstRawString* left,
355                                    const AstRawString* right);
356
357  void Internalize(Isolate* isolate);
358
359#define F(name, str)                                                    \
360  const AstRawString* name##_string() {                                 \
361    if (name##_string_ == NULL) {                                       \
362      const char* data = str;                                           \
363      name##_string_ = GetOneByteString(                                \
364          Vector<const uint8_t>(reinterpret_cast<const uint8_t*>(data), \
365                                static_cast<int>(strlen(data))));       \
366    }                                                                   \
367    return name##_string_;                                              \
368  }
369  STRING_CONSTANTS(F)
370#undef F
371
372  const AstValue* NewString(const AstRawString* string);
373  // A JavaScript symbol (ECMA-262 edition 6).
374  const AstValue* NewSymbol(const char* name);
375  const AstValue* NewNumber(double number, bool with_dot = false);
376  const AstValue* NewSmi(int number);
377  const AstValue* NewBoolean(bool b);
378  const AstValue* NewStringList(ZoneList<const AstRawString*>* strings);
379  const AstValue* NewNull();
380  const AstValue* NewUndefined();
381  const AstValue* NewTheHole();
382
383 private:
384  AstValue* AddValue(AstValue* value) {
385    value->set_next(values_);
386    values_ = value;
387    return value;
388  }
389  AstString* AddString(AstString* string) {
390    *strings_end_ = string;
391    strings_end_ = string->next_location();
392    return string;
393  }
394  void ResetStrings() {
395    strings_ = nullptr;
396    strings_end_ = &strings_;
397  }
398  AstRawString* GetOneByteStringInternal(Vector<const uint8_t> literal);
399  AstRawString* GetTwoByteStringInternal(Vector<const uint16_t> literal);
400  AstRawString* GetString(uint32_t hash, bool is_one_byte,
401                          Vector<const byte> literal_bytes);
402
403  static bool AstRawStringCompare(void* a, void* b);
404
405  // All strings are copied here, one after another (no NULLs inbetween).
406  base::CustomMatcherHashMap string_table_;
407  // For keeping track of all AstValues and AstRawStrings we've created (so that
408  // they can be internalized later).
409  AstValue* values_;
410  // We need to keep track of strings_ in order, since cons strings require
411  // their members to be internalized first.
412  AstString* strings_;
413  AstString** strings_end_;
414  Zone* zone_;
415
416  uint32_t hash_seed_;
417
418#define F(name, str) const AstRawString* name##_string_;
419  STRING_CONSTANTS(F)
420#undef F
421
422#define F(name) AstValue* name##_;
423  OTHER_CONSTANTS(F)
424#undef F
425};
426}  // namespace internal
427}  // namespace v8
428
429#undef STRING_CONSTANTS
430#undef OTHER_CONSTANTS
431
432#endif  // V8_AST_AST_VALUE_FACTORY_H_
433