1014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch// Copyright 2016 the V8 project authors. All rights reserved. 2014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch// Use of this source code is governed by a BSD-style license that can be 3014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch// found in the LICENSE file. 4014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 5014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch#ifndef V8_REGEXP_REGEXP_PARSER_H_ 6014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch#define V8_REGEXP_REGEXP_PARSER_H_ 7014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 8014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch#include "src/objects.h" 9014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch#include "src/regexp/regexp-ast.h" 10f3b273f5e6ffd2f6ba1c18a27a17db41dfb113c3Ben Murdoch#include "src/zone/zone.h" 11014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 12014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochnamespace v8 { 13014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochnamespace internal { 14014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 15014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochstruct RegExpCompileData; 16014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 17014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 18014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch// A BufferedZoneList is an automatically growing list, just like (and backed 19014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch// by) a ZoneList, that is optimized for the case of adding and removing 20014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch// a single element. The last element added is stored outside the backing list, 21014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch// and if no more than one element is ever added, the ZoneList isn't even 22014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch// allocated. 23014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch// Elements must not be NULL pointers. 24014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochtemplate <typename T, int initial_size> 25014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochclass BufferedZoneList { 26014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch public: 27014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch BufferedZoneList() : list_(NULL), last_(NULL) {} 28014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 29014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // Adds element at end of list. This element is buffered and can 30014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // be read using last() or removed using RemoveLast until a new Add or until 31014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // RemoveLast or GetList has been called. 32014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch void Add(T* value, Zone* zone) { 33014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (last_ != NULL) { 34014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (list_ == NULL) { 35014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch list_ = new (zone) ZoneList<T*>(initial_size, zone); 36014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 37014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch list_->Add(last_, zone); 38014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 39014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch last_ = value; 40014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 41014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 42014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch T* last() { 43014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch DCHECK(last_ != NULL); 44014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch return last_; 45014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 46014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 47014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch T* RemoveLast() { 48014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch DCHECK(last_ != NULL); 49014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch T* result = last_; 50014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if ((list_ != NULL) && (list_->length() > 0)) 51014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch last_ = list_->RemoveLast(); 52014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch else 53014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch last_ = NULL; 54014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch return result; 55014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 56014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 57014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch T* Get(int i) { 58014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch DCHECK((0 <= i) && (i < length())); 59014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (list_ == NULL) { 60014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch DCHECK_EQ(0, i); 61014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch return last_; 62014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } else { 63014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (i == list_->length()) { 64014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch DCHECK(last_ != NULL); 65014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch return last_; 66014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } else { 67014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch return list_->at(i); 68014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 69014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 70014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 71014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 72014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch void Clear() { 73014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch list_ = NULL; 74014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch last_ = NULL; 75014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 76014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 77014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch int length() { 78014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch int length = (list_ == NULL) ? 0 : list_->length(); 79014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch return length + ((last_ == NULL) ? 0 : 1); 80014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 81014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 82014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch ZoneList<T*>* GetList(Zone* zone) { 83014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (list_ == NULL) { 84014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch list_ = new (zone) ZoneList<T*>(initial_size, zone); 85014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 86014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (last_ != NULL) { 87014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch list_->Add(last_, zone); 88014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch last_ = NULL; 89014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 90014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch return list_; 91014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 92014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 93014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch private: 94014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch ZoneList<T*>* list_; 95014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch T* last_; 96014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch}; 97014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 98014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 99014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch// Accumulates RegExp atoms and assertions into lists of terms and alternatives. 100014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochclass RegExpBuilder : public ZoneObject { 101014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch public: 102109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch RegExpBuilder(Zone* zone, bool ignore_case, bool unicode); 103014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch void AddCharacter(uc16 character); 104014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch void AddUnicodeCharacter(uc32 character); 105109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch void AddEscapedUnicodeCharacter(uc32 character); 106014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // "Adds" an empty expression. Does nothing except consume a 107014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // following quantifier 108014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch void AddEmpty(); 109109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch void AddCharacterClass(RegExpCharacterClass* cc); 110109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch void AddCharacterClassForDesugaring(uc32 c); 111014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch void AddAtom(RegExpTree* tree); 112109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch void AddTerm(RegExpTree* tree); 113014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch void AddAssertion(RegExpTree* tree); 114014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch void NewAlternative(); // '|' 115109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch bool AddQuantifierToAtom(int min, int max, 116014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpQuantifier::QuantifierType type); 117014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpTree* ToRegExp(); 118014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 119014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch private: 120109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch static const uc16 kNoPendingSurrogate = 0; 121109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch void AddLeadSurrogate(uc16 lead_surrogate); 122109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch void AddTrailSurrogate(uc16 trail_surrogate); 123109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch void FlushPendingSurrogate(); 124014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch void FlushCharacters(); 125014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch void FlushText(); 126014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch void FlushTerms(); 127109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch bool NeedsDesugaringForUnicode(RegExpCharacterClass* cc); 128109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch bool NeedsDesugaringForIgnoreCase(uc32 c); 129014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch Zone* zone() const { return zone_; } 130109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch bool ignore_case() const { return ignore_case_; } 131109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch bool unicode() const { return unicode_; } 132014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 133014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch Zone* zone_; 134014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch bool pending_empty_; 135109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch bool ignore_case_; 136109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch bool unicode_; 137014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch ZoneList<uc16>* characters_; 138109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch uc16 pending_surrogate_; 139014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch BufferedZoneList<RegExpTree, 2> terms_; 140014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch BufferedZoneList<RegExpTree, 2> text_; 141014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch BufferedZoneList<RegExpTree, 2> alternatives_; 142014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch#ifdef DEBUG 143014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch enum { ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM } last_added_; 144014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch#define LAST(x) last_added_ = x; 145014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch#else 146014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch#define LAST(x) 147014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch#endif 148014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch}; 149014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 150014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 151014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochclass RegExpParser BASE_EMBEDDED { 152014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch public: 153109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch RegExpParser(FlatStringReader* in, Handle<String>* error, 154109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch JSRegExp::Flags flags, Isolate* isolate, Zone* zone); 155014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 156014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch static bool ParseRegExp(Isolate* isolate, Zone* zone, FlatStringReader* input, 157109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch JSRegExp::Flags flags, RegExpCompileData* result); 158014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 159014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpTree* ParsePattern(); 160014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpTree* ParseDisjunction(); 161014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpTree* ParseGroup(); 162014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpTree* ParseCharacterClass(); 163014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 164014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // Parses a {...,...} quantifier and stores the range in the given 165014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // out parameters. 166014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch bool ParseIntervalQuantifier(int* min_out, int* max_out); 167014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 168014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // Parses and returns a single escaped character. The character 169014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // must not be 'b' or 'B' since they are usually handle specially. 170014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch uc32 ParseClassCharacterEscape(); 171014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 172014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // Checks whether the following is a length-digit hexadecimal number, 173014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // and sets the value if it is. 174014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch bool ParseHexEscape(int length, uc32* value); 175014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch bool ParseUnicodeEscape(uc32* value); 176014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch bool ParseUnlimitedLengthHexNumber(int max_value, uc32* value); 17713e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch bool ParsePropertyClass(ZoneList<CharacterRange>* result, bool negate); 178014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 179014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch uc32 ParseOctalLiteral(); 180014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 181014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // Tries to parse the input as a back reference. If successful it 182014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // stores the result in the output parameter and returns true. If 183014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // it fails it will push back the characters read so the same characters 184014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // can be reparsed. 185014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch bool ParseBackReferenceIndex(int* index_out); 186014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 1873b9bc31999c9787eb726ecdbfd5796bfdec32a18Ben Murdoch bool ParseClassProperty(ZoneList<CharacterRange>* result); 188014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch CharacterRange ParseClassAtom(uc16* char_class); 189014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpTree* ReportError(Vector<const char> message); 190014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch void Advance(); 191014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch void Advance(int dist); 192014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch void Reset(int pos); 193014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 194014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // Reports whether the pattern might be used as a literal search string. 195014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // Only use if the result of the parse is a single atom node. 196014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch bool simple(); 197014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch bool contains_anchor() { return contains_anchor_; } 198014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch void set_contains_anchor() { contains_anchor_ = true; } 199014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch int captures_started() { return captures_started_; } 200014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch int position() { return next_pos_ - 1; } 201014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch bool failed() { return failed_; } 202109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch bool ignore_case() const { return ignore_case_; } 203109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch bool multiline() const { return multiline_; } 204109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch bool unicode() const { return unicode_; } 205014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 206109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch static bool IsSyntaxCharacterOrSlash(uc32 c); 207014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 208014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch static const int kMaxCaptures = 1 << 16; 209014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch static const uc32 kEndMarker = (1 << 21); 210014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 211014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch private: 212014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch enum SubexpressionType { 213014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch INITIAL, 214014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch CAPTURE, // All positive values represent captures. 215014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch POSITIVE_LOOKAROUND, 216014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch NEGATIVE_LOOKAROUND, 217014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch GROUPING 218014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch }; 219014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 220014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch class RegExpParserState : public ZoneObject { 221014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch public: 222014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpParserState(RegExpParserState* previous_state, 223014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch SubexpressionType group_type, 224014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpLookaround::Type lookaround_type, 22513e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch int disjunction_capture_index, 22613e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch const ZoneVector<uc16>* capture_name, bool ignore_case, 227109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch bool unicode, Zone* zone) 228014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch : previous_state_(previous_state), 229109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch builder_(new (zone) RegExpBuilder(zone, ignore_case, unicode)), 230014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch group_type_(group_type), 231014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch lookaround_type_(lookaround_type), 23213e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch disjunction_capture_index_(disjunction_capture_index), 23313e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch capture_name_(capture_name) {} 234014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // Parser state of containing expression, if any. 235014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpParserState* previous_state() { return previous_state_; } 236014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch bool IsSubexpression() { return previous_state_ != NULL; } 237014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // RegExpBuilder building this regexp's AST. 238014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpBuilder* builder() { return builder_; } 239014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // Type of regexp being parsed (parenthesized group or entire regexp). 240014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch SubexpressionType group_type() { return group_type_; } 241014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // Lookahead or Lookbehind. 242014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpLookaround::Type lookaround_type() { return lookaround_type_; } 243014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // Index in captures array of first capture in this sub-expression, if any. 244014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // Also the capture index of this sub-expression itself, if group_type 245014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // is CAPTURE. 246014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch int capture_index() { return disjunction_capture_index_; } 24713e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch // The name of the current sub-expression, if group_type is CAPTURE. Only 24813e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch // used for named captures. 24913e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch const ZoneVector<uc16>* capture_name() { return capture_name_; } 25013e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch 25113e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch bool IsNamedCapture() const { return capture_name_ != nullptr; } 252014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 253014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // Check whether the parser is inside a capture group with the given index. 254014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch bool IsInsideCaptureGroup(int index); 25513e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch // Check whether the parser is inside a capture group with the given name. 25613e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch bool IsInsideCaptureGroup(const ZoneVector<uc16>* name); 257014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 258014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch private: 259014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // Linked list implementation of stack of states. 260014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpParserState* previous_state_; 261014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // Builder for the stored disjunction. 262014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpBuilder* builder_; 263014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // Stored disjunction type (capture, look-ahead or grouping), if any. 264014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch SubexpressionType group_type_; 265014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // Stored read direction. 266014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpLookaround::Type lookaround_type_; 267014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // Stored disjunction's capture index (if any). 268014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch int disjunction_capture_index_; 26913e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch // Stored capture name (if any). 27013e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch const ZoneVector<uc16>* capture_name_; 271014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch }; 272014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 273014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // Return the 1-indexed RegExpCapture object, allocate if necessary. 274014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpCapture* GetCapture(int index); 275014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 27613e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch // Creates a new named capture at the specified index. Must be called exactly 27713e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch // once for each named capture. Fails if a capture with the same name is 27813e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch // encountered. 27913e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch bool CreateNamedCaptureAtIndex(const ZoneVector<uc16>* name, int index); 28013e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch 28113e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch // Parses the name of a capture group (?<name>pattern). The name must adhere 28213e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch // to IdentifierName in the ECMAScript standard. 28313e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch const ZoneVector<uc16>* ParseCaptureGroupName(); 28413e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch 28513e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch bool ParseNamedBackReference(RegExpBuilder* builder, 28613e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch RegExpParserState* state); 28713e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch 28813e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch // After the initial parsing pass, patch corresponding RegExpCapture objects 28913e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch // into all RegExpBackReferences. This is done after initial parsing in order 29013e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch // to avoid complicating cases in which references comes before the capture. 29113e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch void PatchNamedBackReferences(); 29213e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch 29313e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch Handle<FixedArray> CreateCaptureNameMap(); 29413e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch 295014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch Isolate* isolate() { return isolate_; } 296014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch Zone* zone() const { return zone_; } 297014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 298014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch uc32 current() { return current_; } 299014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch bool has_more() { return has_more_; } 300014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch bool has_next() { return next_pos_ < in()->length(); } 301014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch uc32 Next(); 302109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch template <bool update_position> 303109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch uc32 ReadNext(); 304014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch FlatStringReader* in() { return in_; } 305014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch void ScanForCaptures(); 306014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 307014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch Isolate* isolate_; 308014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch Zone* zone_; 309014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch Handle<String>* error_; 310014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch ZoneList<RegExpCapture*>* captures_; 31113e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch ZoneList<RegExpCapture*>* named_captures_; 31213e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch ZoneList<RegExpBackReference*>* named_back_references_; 313014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch FlatStringReader* in_; 314014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch uc32 current_; 315109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch bool ignore_case_; 316109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch bool multiline_; 317109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch bool unicode_; 318014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch int next_pos_; 319014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch int captures_started_; 320014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // The capture count is only valid after we have scanned for captures. 321014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch int capture_count_; 322014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch bool has_more_; 323014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch bool simple_; 324014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch bool contains_anchor_; 325014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch bool is_scanned_for_captures_; 326014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch bool failed_; 327014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch}; 328014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 329014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch} // namespace internal 330014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch} // namespace v8 331014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 332014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch#endif // V8_REGEXP_REGEXP_PARSER_H_ 333