1// Copyright 2012 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6//     * Redistributions of source code must retain the above copyright
7//       notice, this list of conditions and the following disclaimer.
8//     * Redistributions in binary form must reproduce the above
9//       copyright notice, this list of conditions and the following
10//       disclaimer in the documentation and/or other materials provided
11//       with the distribution.
12//     * Neither the name of Google Inc. nor the names of its
13//       contributors may be used to endorse or promote products derived
14//       from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#ifndef V8_PREPARSER_H
29#define V8_PREPARSER_H
30
31#include "hashmap.h"
32#include "token.h"
33#include "scanner.h"
34
35namespace v8 {
36
37namespace internal {
38class UnicodeCache;
39}
40
41namespace preparser {
42
43typedef uint8_t byte;
44
45// Preparsing checks a JavaScript program and emits preparse-data that helps
46// a later parsing to be faster.
47// See preparse-data-format.h for the data format.
48
49// The PreParser checks that the syntax follows the grammar for JavaScript,
50// and collects some information about the program along the way.
51// The grammar check is only performed in order to understand the program
52// sufficiently to deduce some information about it, that can be used
53// to speed up later parsing. Finding errors is not the goal of pre-parsing,
54// rather it is to speed up properly written and correct programs.
55// That means that contextual checks (like a label being declared where
56// it is used) are generally omitted.
57
58namespace i = v8::internal;
59
60class DuplicateFinder {
61 public:
62  explicit DuplicateFinder(i::UnicodeCache* constants)
63      : unicode_constants_(constants),
64        backing_store_(16),
65        map_(&Match) { }
66
67  int AddAsciiSymbol(i::Vector<const char> key, int value);
68  int AddUtf16Symbol(i::Vector<const uint16_t> key, int value);
69  // Add a a number literal by converting it (if necessary)
70  // to the string that ToString(ToNumber(literal)) would generate.
71  // and then adding that string with AddAsciiSymbol.
72  // This string is the actual value used as key in an object literal,
73  // and the one that must be different from the other keys.
74  int AddNumber(i::Vector<const char> key, int value);
75
76 private:
77  int AddSymbol(i::Vector<const byte> key, bool is_ascii, int value);
78  // Backs up the key and its length in the backing store.
79  // The backup is stored with a base 127 encoding of the
80  // length (plus a bit saying whether the string is ASCII),
81  // followed by the bytes of the key.
82  byte* BackupKey(i::Vector<const byte> key, bool is_ascii);
83
84  // Compare two encoded keys (both pointing into the backing store)
85  // for having the same base-127 encoded lengths and ASCII-ness,
86  // and then having the same 'length' bytes following.
87  static bool Match(void* first, void* second);
88  // Creates a hash from a sequence of bytes.
89  static uint32_t Hash(i::Vector<const byte> key, bool is_ascii);
90  // Checks whether a string containing a JS number is its canonical
91  // form.
92  static bool IsNumberCanonical(i::Vector<const char> key);
93
94  // Size of buffer. Sufficient for using it to call DoubleToCString in
95  // from conversions.h.
96  static const int kBufferSize = 100;
97
98  i::UnicodeCache* unicode_constants_;
99  // Backing store used to store strings used as hashmap keys.
100  i::SequenceCollector<unsigned char> backing_store_;
101  i::HashMap map_;
102  // Buffer used for string->number->canonical string conversions.
103  char number_buffer_[kBufferSize];
104};
105
106
107class PreParser {
108 public:
109  enum PreParseResult {
110    kPreParseStackOverflow,
111    kPreParseSuccess
112  };
113
114
115  PreParser(i::Scanner* scanner,
116            i::ParserRecorder* log,
117            uintptr_t stack_limit,
118            bool allow_lazy,
119            bool allow_natives_syntax,
120            bool allow_modules)
121      : scanner_(scanner),
122        log_(log),
123        scope_(NULL),
124        stack_limit_(stack_limit),
125        strict_mode_violation_location_(i::Scanner::Location::invalid()),
126        strict_mode_violation_type_(NULL),
127        stack_overflow_(false),
128        allow_lazy_(allow_lazy),
129        allow_modules_(allow_modules),
130        allow_natives_syntax_(allow_natives_syntax),
131        parenthesized_function_(false),
132        harmony_scoping_(scanner->HarmonyScoping()) { }
133
134  ~PreParser() {}
135
136  // Pre-parse the program from the character stream; returns true on
137  // success (even if parsing failed, the pre-parse data successfully
138  // captured the syntax error), and false if a stack-overflow happened
139  // during parsing.
140  static PreParseResult PreParseProgram(i::Scanner* scanner,
141                                        i::ParserRecorder* log,
142                                        int flags,
143                                        uintptr_t stack_limit) {
144    bool allow_lazy = (flags & i::kAllowLazy) != 0;
145    bool allow_natives_syntax = (flags & i::kAllowNativesSyntax) != 0;
146    bool allow_modules = (flags & i::kAllowModules) != 0;
147    return PreParser(scanner, log, stack_limit, allow_lazy,
148                     allow_natives_syntax, allow_modules).PreParse();
149  }
150
151  // Parses a single function literal, from the opening parentheses before
152  // parameters to the closing brace after the body.
153  // Returns a FunctionEntry describing the body of the funciton in enough
154  // detail that it can be lazily compiled.
155  // The scanner is expected to have matched the "function" keyword and
156  // parameters, and have consumed the initial '{'.
157  // At return, unless an error occured, the scanner is positioned before the
158  // the final '}'.
159  PreParseResult PreParseLazyFunction(i::LanguageMode mode,
160                                      i::ParserRecorder* log);
161
162 private:
163  // Used to detect duplicates in object literals. Each of the values
164  // kGetterProperty, kSetterProperty and kValueProperty represents
165  // a type of object literal property. When parsing a property, its
166  // type value is stored in the DuplicateFinder for the property name.
167  // Values are chosen so that having intersection bits means the there is
168  // an incompatibility.
169  // I.e., you can add a getter to a property that already has a setter, since
170  // kGetterProperty and kSetterProperty doesn't intersect, but not if it
171  // already has a getter or a value. Adding the getter to an existing
172  // setter will store the value (kGetterProperty | kSetterProperty), which
173  // is incompatible with adding any further properties.
174  enum PropertyType {
175    kNone = 0,
176    // Bit patterns representing different object literal property types.
177    kGetterProperty = 1,
178    kSetterProperty = 2,
179    kValueProperty = 7,
180    // Helper constants.
181    kValueFlag = 4
182  };
183
184  // Checks the type of conflict based on values coming from PropertyType.
185  bool HasConflict(int type1, int type2) { return (type1 & type2) != 0; }
186  bool IsDataDataConflict(int type1, int type2) {
187    return ((type1 & type2) & kValueFlag) != 0;
188  }
189  bool IsDataAccessorConflict(int type1, int type2) {
190    return ((type1 ^ type2) & kValueFlag) != 0;
191  }
192  bool IsAccessorAccessorConflict(int type1, int type2) {
193    return ((type1 | type2) & kValueFlag) == 0;
194  }
195
196
197  void CheckDuplicate(DuplicateFinder* finder,
198                      i::Token::Value property,
199                      int type,
200                      bool* ok);
201
202  // These types form an algebra over syntactic categories that is just
203  // rich enough to let us recognize and propagate the constructs that
204  // are either being counted in the preparser data, or is important
205  // to throw the correct syntax error exceptions.
206
207  enum ScopeType {
208    kTopLevelScope,
209    kFunctionScope
210  };
211
212  enum VariableDeclarationContext {
213    kSourceElement,
214    kStatement,
215    kForStatement
216  };
217
218  // If a list of variable declarations includes any initializers.
219  enum VariableDeclarationProperties {
220    kHasInitializers,
221    kHasNoInitializers
222  };
223
224  class Expression;
225
226  class Identifier {
227   public:
228    static Identifier Default() {
229      return Identifier(kUnknownIdentifier);
230    }
231    static Identifier Eval()  {
232      return Identifier(kEvalIdentifier);
233    }
234    static Identifier Arguments()  {
235      return Identifier(kArgumentsIdentifier);
236    }
237    static Identifier FutureReserved()  {
238      return Identifier(kFutureReservedIdentifier);
239    }
240    static Identifier FutureStrictReserved()  {
241      return Identifier(kFutureStrictReservedIdentifier);
242    }
243    bool IsEval() { return type_ == kEvalIdentifier; }
244    bool IsArguments() { return type_ == kArgumentsIdentifier; }
245    bool IsEvalOrArguments() { return type_ >= kEvalIdentifier; }
246    bool IsFutureReserved() { return type_ == kFutureReservedIdentifier; }
247    bool IsFutureStrictReserved() {
248      return type_ == kFutureStrictReservedIdentifier;
249    }
250    bool IsValidStrictVariable() { return type_ == kUnknownIdentifier; }
251
252   private:
253    enum Type {
254      kUnknownIdentifier,
255      kFutureReservedIdentifier,
256      kFutureStrictReservedIdentifier,
257      kEvalIdentifier,
258      kArgumentsIdentifier
259    };
260    explicit Identifier(Type type) : type_(type) { }
261    Type type_;
262
263    friend class Expression;
264  };
265
266  // Bits 0 and 1 are used to identify the type of expression:
267  // If bit 0 is set, it's an identifier.
268  // if bit 1 is set, it's a string literal.
269  // If neither is set, it's no particular type, and both set isn't
270  // use yet.
271  // Bit 2 is used to mark the expression as being parenthesized,
272  // so "(foo)" isn't recognized as a pure identifier (and possible label).
273  class Expression {
274   public:
275    static Expression Default() {
276      return Expression(kUnknownExpression);
277    }
278
279    static Expression FromIdentifier(Identifier id) {
280      return Expression(kIdentifierFlag | (id.type_ << kIdentifierShift));
281    }
282
283    static Expression StringLiteral() {
284      return Expression(kUnknownStringLiteral);
285    }
286
287    static Expression UseStrictStringLiteral() {
288      return Expression(kUseStrictString);
289    }
290
291    static Expression This() {
292      return Expression(kThisExpression);
293    }
294
295    static Expression ThisProperty() {
296      return Expression(kThisPropertyExpression);
297    }
298
299    static Expression StrictFunction() {
300      return Expression(kStrictFunctionExpression);
301    }
302
303    bool IsIdentifier() {
304      return (code_ & kIdentifierFlag) != 0;
305    }
306
307    // Only works corretly if it is actually an identifier expression.
308    PreParser::Identifier AsIdentifier() {
309      return PreParser::Identifier(
310          static_cast<PreParser::Identifier::Type>(code_ >> kIdentifierShift));
311    }
312
313    bool IsParenthesized() {
314      // If bit 0 or 1 is set, we interpret bit 2 as meaning parenthesized.
315      return (code_ & 7) > 4;
316    }
317
318    bool IsRawIdentifier() {
319      return !IsParenthesized() && IsIdentifier();
320    }
321
322    bool IsStringLiteral() { return (code_ & kStringLiteralFlag) != 0; }
323
324    bool IsRawStringLiteral() {
325      return !IsParenthesized() && IsStringLiteral();
326    }
327
328    bool IsUseStrictLiteral() {
329      return (code_ & kStringLiteralMask) == kUseStrictString;
330    }
331
332    bool IsThis() {
333      return code_ == kThisExpression;
334    }
335
336    bool IsThisProperty() {
337      return code_ == kThisPropertyExpression;
338    }
339
340    bool IsStrictFunction() {
341      return code_ == kStrictFunctionExpression;
342    }
343
344    Expression Parenthesize() {
345      int type = code_ & 3;
346      if (type != 0) {
347        // Identifiers and string literals can be parenthesized.
348        // They no longer work as labels or directive prologues,
349        // but are still recognized in other contexts.
350        return Expression(code_ | kParentesizedExpressionFlag);
351      }
352      // For other types of expressions, it's not important to remember
353      // the parentheses.
354      return *this;
355    }
356
357   private:
358    // First two/three bits are used as flags.
359    // Bit 0 and 1 represent identifiers or strings literals, and are
360    // mutually exclusive, but can both be absent.
361    // If bit 0 or 1 are set, bit 2 marks that the expression has
362    // been wrapped in parentheses (a string literal can no longer
363    // be a directive prologue, and an identifier can no longer be
364    // a label.
365    enum  {
366      kUnknownExpression = 0,
367      // Identifiers
368      kIdentifierFlag = 1,  // Used to detect labels.
369      kIdentifierShift = 3,
370
371      kStringLiteralFlag = 2,  // Used to detect directive prologue.
372      kUnknownStringLiteral = kStringLiteralFlag,
373      kUseStrictString = kStringLiteralFlag | 8,
374      kStringLiteralMask = kUseStrictString,
375
376      kParentesizedExpressionFlag = 4,  // Only if identifier or string literal.
377
378      // Below here applies if neither identifier nor string literal.
379      kThisExpression = 4,
380      kThisPropertyExpression = 8,
381      kStrictFunctionExpression = 12
382    };
383
384    explicit Expression(int expression_code) : code_(expression_code) { }
385
386    int code_;
387  };
388
389  class Statement {
390   public:
391    static Statement Default() {
392      return Statement(kUnknownStatement);
393    }
394
395    static Statement FunctionDeclaration() {
396      return Statement(kFunctionDeclaration);
397    }
398
399    // Creates expression statement from expression.
400    // Preserves being an unparenthesized string literal, possibly
401    // "use strict".
402    static Statement ExpressionStatement(Expression expression) {
403      if (!expression.IsParenthesized()) {
404        if (expression.IsUseStrictLiteral()) {
405          return Statement(kUseStrictExpressionStatement);
406        }
407        if (expression.IsStringLiteral()) {
408          return Statement(kStringLiteralExpressionStatement);
409        }
410      }
411      return Default();
412    }
413
414    bool IsStringLiteral() {
415      return code_ != kUnknownStatement;
416    }
417
418    bool IsUseStrictLiteral() {
419      return code_ == kUseStrictExpressionStatement;
420    }
421
422    bool IsFunctionDeclaration() {
423      return code_ == kFunctionDeclaration;
424    }
425
426   private:
427    enum Type {
428      kUnknownStatement,
429      kStringLiteralExpressionStatement,
430      kUseStrictExpressionStatement,
431      kFunctionDeclaration
432    };
433
434    explicit Statement(Type code) : code_(code) {}
435    Type code_;
436  };
437
438  enum SourceElements {
439    kUnknownSourceElements
440  };
441
442  typedef int Arguments;
443
444  class Scope {
445   public:
446    Scope(Scope** variable, ScopeType type)
447        : variable_(variable),
448          prev_(*variable),
449          type_(type),
450          materialized_literal_count_(0),
451          expected_properties_(0),
452          with_nesting_count_(0),
453          language_mode_(
454              (prev_ != NULL) ? prev_->language_mode() : i::CLASSIC_MODE) {
455      *variable = this;
456    }
457    ~Scope() { *variable_ = prev_; }
458    void NextMaterializedLiteralIndex() { materialized_literal_count_++; }
459    void AddProperty() { expected_properties_++; }
460    ScopeType type() { return type_; }
461    int expected_properties() { return expected_properties_; }
462    int materialized_literal_count() { return materialized_literal_count_; }
463    bool IsInsideWith() { return with_nesting_count_ != 0; }
464    bool is_classic_mode() {
465      return language_mode_ == i::CLASSIC_MODE;
466    }
467    i::LanguageMode language_mode() {
468      return language_mode_;
469    }
470    void set_language_mode(i::LanguageMode language_mode) {
471      language_mode_ = language_mode;
472    }
473    void EnterWith() { with_nesting_count_++; }
474    void LeaveWith() { with_nesting_count_--; }
475
476   private:
477    Scope** const variable_;
478    Scope* const prev_;
479    const ScopeType type_;
480    int materialized_literal_count_;
481    int expected_properties_;
482    int with_nesting_count_;
483    i::LanguageMode language_mode_;
484  };
485
486  // Preparse the program. Only called in PreParseProgram after creating
487  // the instance.
488  PreParseResult PreParse() {
489    Scope top_scope(&scope_, kTopLevelScope);
490    bool ok = true;
491    int start_position = scanner_->peek_location().beg_pos;
492    ParseSourceElements(i::Token::EOS, &ok);
493    if (stack_overflow_) return kPreParseStackOverflow;
494    if (!ok) {
495      ReportUnexpectedToken(scanner_->current_token());
496    } else if (!scope_->is_classic_mode()) {
497      CheckOctalLiteral(start_position, scanner_->location().end_pos, &ok);
498    }
499    return kPreParseSuccess;
500  }
501
502  // Report syntax error
503  void ReportUnexpectedToken(i::Token::Value token);
504  void ReportMessageAt(i::Scanner::Location location,
505                       const char* type,
506                       const char* name_opt) {
507    log_->LogMessage(location.beg_pos, location.end_pos, type, name_opt);
508  }
509  void ReportMessageAt(int start_pos,
510                       int end_pos,
511                       const char* type,
512                       const char* name_opt) {
513    log_->LogMessage(start_pos, end_pos, type, name_opt);
514  }
515
516  void CheckOctalLiteral(int beg_pos, int end_pos, bool* ok);
517
518  // All ParseXXX functions take as the last argument an *ok parameter
519  // which is set to false if parsing failed; it is unchanged otherwise.
520  // By making the 'exception handling' explicit, we are forced to check
521  // for failure at the call sites.
522  Statement ParseSourceElement(bool* ok);
523  SourceElements ParseSourceElements(int end_token, bool* ok);
524  Statement ParseStatement(bool* ok);
525  Statement ParseFunctionDeclaration(bool* ok);
526  Statement ParseBlock(bool* ok);
527  Statement ParseVariableStatement(VariableDeclarationContext var_context,
528                                   bool* ok);
529  Statement ParseVariableDeclarations(VariableDeclarationContext var_context,
530                                      VariableDeclarationProperties* decl_props,
531                                      int* num_decl,
532                                      bool* ok);
533  Statement ParseExpressionOrLabelledStatement(bool* ok);
534  Statement ParseIfStatement(bool* ok);
535  Statement ParseContinueStatement(bool* ok);
536  Statement ParseBreakStatement(bool* ok);
537  Statement ParseReturnStatement(bool* ok);
538  Statement ParseWithStatement(bool* ok);
539  Statement ParseSwitchStatement(bool* ok);
540  Statement ParseDoWhileStatement(bool* ok);
541  Statement ParseWhileStatement(bool* ok);
542  Statement ParseForStatement(bool* ok);
543  Statement ParseThrowStatement(bool* ok);
544  Statement ParseTryStatement(bool* ok);
545  Statement ParseDebuggerStatement(bool* ok);
546
547  Expression ParseExpression(bool accept_IN, bool* ok);
548  Expression ParseAssignmentExpression(bool accept_IN, bool* ok);
549  Expression ParseConditionalExpression(bool accept_IN, bool* ok);
550  Expression ParseBinaryExpression(int prec, bool accept_IN, bool* ok);
551  Expression ParseUnaryExpression(bool* ok);
552  Expression ParsePostfixExpression(bool* ok);
553  Expression ParseLeftHandSideExpression(bool* ok);
554  Expression ParseNewExpression(bool* ok);
555  Expression ParseMemberExpression(bool* ok);
556  Expression ParseMemberWithNewPrefixesExpression(unsigned new_count, bool* ok);
557  Expression ParsePrimaryExpression(bool* ok);
558  Expression ParseArrayLiteral(bool* ok);
559  Expression ParseObjectLiteral(bool* ok);
560  Expression ParseRegExpLiteral(bool seen_equal, bool* ok);
561  Expression ParseV8Intrinsic(bool* ok);
562
563  Arguments ParseArguments(bool* ok);
564  Expression ParseFunctionLiteral(bool* ok);
565  void ParseLazyFunctionLiteralBody(bool* ok);
566
567  Identifier ParseIdentifier(bool* ok);
568  Identifier ParseIdentifierName(bool* ok);
569  Identifier ParseIdentifierNameOrGetOrSet(bool* is_get,
570                                           bool* is_set,
571                                           bool* ok);
572
573  // Logs the currently parsed literal as a symbol in the preparser data.
574  void LogSymbol();
575  // Log the currently parsed identifier.
576  Identifier GetIdentifierSymbol();
577  // Log the currently parsed string literal.
578  Expression GetStringSymbol();
579
580  i::Token::Value peek() {
581    if (stack_overflow_) return i::Token::ILLEGAL;
582    return scanner_->peek();
583  }
584
585  i::Token::Value Next() {
586    if (stack_overflow_) return i::Token::ILLEGAL;
587    {
588      int marker;
589      if (reinterpret_cast<uintptr_t>(&marker) < stack_limit_) {
590        // Further calls to peek/Next will return illegal token.
591        // The current one will still be returned. It might already
592        // have been seen using peek.
593        stack_overflow_ = true;
594      }
595    }
596    return scanner_->Next();
597  }
598
599  bool peek_any_identifier();
600
601  void set_language_mode(i::LanguageMode language_mode) {
602    scope_->set_language_mode(language_mode);
603  }
604
605  bool is_classic_mode() {
606    return scope_->language_mode() == i::CLASSIC_MODE;
607  }
608
609  bool is_extended_mode() {
610    return scope_->language_mode() == i::EXTENDED_MODE;
611  }
612
613  i::LanguageMode language_mode() { return scope_->language_mode(); }
614
615  void Consume(i::Token::Value token) { Next(); }
616
617  void Expect(i::Token::Value token, bool* ok) {
618    if (Next() != token) {
619      *ok = false;
620    }
621  }
622
623  bool Check(i::Token::Value token) {
624    i::Token::Value next = peek();
625    if (next == token) {
626      Consume(next);
627      return true;
628    }
629    return false;
630  }
631  void ExpectSemicolon(bool* ok);
632
633  static int Precedence(i::Token::Value tok, bool accept_IN);
634
635  void SetStrictModeViolation(i::Scanner::Location,
636                              const char* type,
637                              bool* ok);
638
639  void CheckDelayedStrictModeViolation(int beg_pos, int end_pos, bool* ok);
640
641  void StrictModeIdentifierViolation(i::Scanner::Location,
642                                     const char* eval_args_type,
643                                     Identifier identifier,
644                                     bool* ok);
645
646  i::Scanner* scanner_;
647  i::ParserRecorder* log_;
648  Scope* scope_;
649  uintptr_t stack_limit_;
650  i::Scanner::Location strict_mode_violation_location_;
651  const char* strict_mode_violation_type_;
652  bool stack_overflow_;
653  bool allow_lazy_;
654  bool allow_modules_;
655  bool allow_natives_syntax_;
656  bool parenthesized_function_;
657  bool harmony_scoping_;
658};
659} }  // v8::preparser
660
661#endif  // V8_PREPARSER_H
662