1// Copyright 2016 the V8 project authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "src/ostreams.h" 6#include "src/regexp/regexp-ast.h" 7 8namespace v8 { 9namespace internal { 10 11#define MAKE_ACCEPT(Name) \ 12 void* RegExp##Name::Accept(RegExpVisitor* visitor, void* data) { \ 13 return visitor->Visit##Name(this, data); \ 14 } 15FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ACCEPT) 16#undef MAKE_ACCEPT 17 18#define MAKE_TYPE_CASE(Name) \ 19 RegExp##Name* RegExpTree::As##Name() { return NULL; } \ 20 bool RegExpTree::Is##Name() { return false; } 21FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE) 22#undef MAKE_TYPE_CASE 23 24#define MAKE_TYPE_CASE(Name) \ 25 RegExp##Name* RegExp##Name::As##Name() { return this; } \ 26 bool RegExp##Name::Is##Name() { return true; } 27FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE) 28#undef MAKE_TYPE_CASE 29 30 31static Interval ListCaptureRegisters(ZoneList<RegExpTree*>* children) { 32 Interval result = Interval::Empty(); 33 for (int i = 0; i < children->length(); i++) 34 result = result.Union(children->at(i)->CaptureRegisters()); 35 return result; 36} 37 38 39Interval RegExpAlternative::CaptureRegisters() { 40 return ListCaptureRegisters(nodes()); 41} 42 43 44Interval RegExpDisjunction::CaptureRegisters() { 45 return ListCaptureRegisters(alternatives()); 46} 47 48 49Interval RegExpLookaround::CaptureRegisters() { 50 return body()->CaptureRegisters(); 51} 52 53 54Interval RegExpCapture::CaptureRegisters() { 55 Interval self(StartRegister(index()), EndRegister(index())); 56 return self.Union(body()->CaptureRegisters()); 57} 58 59 60Interval RegExpQuantifier::CaptureRegisters() { 61 return body()->CaptureRegisters(); 62} 63 64 65bool RegExpAssertion::IsAnchoredAtStart() { 66 return assertion_type() == RegExpAssertion::START_OF_INPUT; 67} 68 69 70bool RegExpAssertion::IsAnchoredAtEnd() { 71 return assertion_type() == RegExpAssertion::END_OF_INPUT; 72} 73 74 75bool RegExpAlternative::IsAnchoredAtStart() { 76 ZoneList<RegExpTree*>* nodes = this->nodes(); 77 for (int i = 0; i < nodes->length(); i++) { 78 RegExpTree* node = nodes->at(i); 79 if (node->IsAnchoredAtStart()) { 80 return true; 81 } 82 if (node->max_match() > 0) { 83 return false; 84 } 85 } 86 return false; 87} 88 89 90bool RegExpAlternative::IsAnchoredAtEnd() { 91 ZoneList<RegExpTree*>* nodes = this->nodes(); 92 for (int i = nodes->length() - 1; i >= 0; i--) { 93 RegExpTree* node = nodes->at(i); 94 if (node->IsAnchoredAtEnd()) { 95 return true; 96 } 97 if (node->max_match() > 0) { 98 return false; 99 } 100 } 101 return false; 102} 103 104 105bool RegExpDisjunction::IsAnchoredAtStart() { 106 ZoneList<RegExpTree*>* alternatives = this->alternatives(); 107 for (int i = 0; i < alternatives->length(); i++) { 108 if (!alternatives->at(i)->IsAnchoredAtStart()) return false; 109 } 110 return true; 111} 112 113 114bool RegExpDisjunction::IsAnchoredAtEnd() { 115 ZoneList<RegExpTree*>* alternatives = this->alternatives(); 116 for (int i = 0; i < alternatives->length(); i++) { 117 if (!alternatives->at(i)->IsAnchoredAtEnd()) return false; 118 } 119 return true; 120} 121 122 123bool RegExpLookaround::IsAnchoredAtStart() { 124 return is_positive() && type() == LOOKAHEAD && body()->IsAnchoredAtStart(); 125} 126 127 128bool RegExpCapture::IsAnchoredAtStart() { return body()->IsAnchoredAtStart(); } 129 130 131bool RegExpCapture::IsAnchoredAtEnd() { return body()->IsAnchoredAtEnd(); } 132 133 134// Convert regular expression trees to a simple sexp representation. 135// This representation should be different from the input grammar 136// in as many cases as possible, to make it more difficult for incorrect 137// parses to look as correct ones which is likely if the input and 138// output formats are alike. 139class RegExpUnparser final : public RegExpVisitor { 140 public: 141 RegExpUnparser(std::ostream& os, Zone* zone) : os_(os), zone_(zone) {} 142 void VisitCharacterRange(CharacterRange that); 143#define MAKE_CASE(Name) void* Visit##Name(RegExp##Name*, void* data) override; 144 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE) 145#undef MAKE_CASE 146 private: 147 std::ostream& os_; 148 Zone* zone_; 149}; 150 151 152void* RegExpUnparser::VisitDisjunction(RegExpDisjunction* that, void* data) { 153 os_ << "(|"; 154 for (int i = 0; i < that->alternatives()->length(); i++) { 155 os_ << " "; 156 that->alternatives()->at(i)->Accept(this, data); 157 } 158 os_ << ")"; 159 return NULL; 160} 161 162 163void* RegExpUnparser::VisitAlternative(RegExpAlternative* that, void* data) { 164 os_ << "(:"; 165 for (int i = 0; i < that->nodes()->length(); i++) { 166 os_ << " "; 167 that->nodes()->at(i)->Accept(this, data); 168 } 169 os_ << ")"; 170 return NULL; 171} 172 173 174void RegExpUnparser::VisitCharacterRange(CharacterRange that) { 175 os_ << AsUC32(that.from()); 176 if (!that.IsSingleton()) { 177 os_ << "-" << AsUC32(that.to()); 178 } 179} 180 181 182void* RegExpUnparser::VisitCharacterClass(RegExpCharacterClass* that, 183 void* data) { 184 if (that->is_negated()) os_ << "^"; 185 os_ << "["; 186 for (int i = 0; i < that->ranges(zone_)->length(); i++) { 187 if (i > 0) os_ << " "; 188 VisitCharacterRange(that->ranges(zone_)->at(i)); 189 } 190 os_ << "]"; 191 return NULL; 192} 193 194 195void* RegExpUnparser::VisitAssertion(RegExpAssertion* that, void* data) { 196 switch (that->assertion_type()) { 197 case RegExpAssertion::START_OF_INPUT: 198 os_ << "@^i"; 199 break; 200 case RegExpAssertion::END_OF_INPUT: 201 os_ << "@$i"; 202 break; 203 case RegExpAssertion::START_OF_LINE: 204 os_ << "@^l"; 205 break; 206 case RegExpAssertion::END_OF_LINE: 207 os_ << "@$l"; 208 break; 209 case RegExpAssertion::BOUNDARY: 210 os_ << "@b"; 211 break; 212 case RegExpAssertion::NON_BOUNDARY: 213 os_ << "@B"; 214 break; 215 } 216 return NULL; 217} 218 219 220void* RegExpUnparser::VisitAtom(RegExpAtom* that, void* data) { 221 os_ << "'"; 222 Vector<const uc16> chardata = that->data(); 223 for (int i = 0; i < chardata.length(); i++) { 224 os_ << AsUC16(chardata[i]); 225 } 226 os_ << "'"; 227 return NULL; 228} 229 230 231void* RegExpUnparser::VisitText(RegExpText* that, void* data) { 232 if (that->elements()->length() == 1) { 233 that->elements()->at(0).tree()->Accept(this, data); 234 } else { 235 os_ << "(!"; 236 for (int i = 0; i < that->elements()->length(); i++) { 237 os_ << " "; 238 that->elements()->at(i).tree()->Accept(this, data); 239 } 240 os_ << ")"; 241 } 242 return NULL; 243} 244 245 246void* RegExpUnparser::VisitQuantifier(RegExpQuantifier* that, void* data) { 247 os_ << "(# " << that->min() << " "; 248 if (that->max() == RegExpTree::kInfinity) { 249 os_ << "- "; 250 } else { 251 os_ << that->max() << " "; 252 } 253 os_ << (that->is_greedy() ? "g " : that->is_possessive() ? "p " : "n "); 254 that->body()->Accept(this, data); 255 os_ << ")"; 256 return NULL; 257} 258 259 260void* RegExpUnparser::VisitCapture(RegExpCapture* that, void* data) { 261 os_ << "(^ "; 262 that->body()->Accept(this, data); 263 os_ << ")"; 264 return NULL; 265} 266 267void* RegExpUnparser::VisitGroup(RegExpGroup* that, void* data) { 268 os_ << "(?: "; 269 that->body()->Accept(this, data); 270 os_ << ")"; 271 return NULL; 272} 273 274void* RegExpUnparser::VisitLookaround(RegExpLookaround* that, void* data) { 275 os_ << "("; 276 os_ << (that->type() == RegExpLookaround::LOOKAHEAD ? "->" : "<-"); 277 os_ << (that->is_positive() ? " + " : " - "); 278 that->body()->Accept(this, data); 279 os_ << ")"; 280 return NULL; 281} 282 283 284void* RegExpUnparser::VisitBackReference(RegExpBackReference* that, 285 void* data) { 286 os_ << "(<- " << that->index() << ")"; 287 return NULL; 288} 289 290 291void* RegExpUnparser::VisitEmpty(RegExpEmpty* that, void* data) { 292 os_ << '%'; 293 return NULL; 294} 295 296 297std::ostream& RegExpTree::Print(std::ostream& os, Zone* zone) { // NOLINT 298 RegExpUnparser unparser(os, zone); 299 Accept(&unparser, NULL); 300 return os; 301} 302 303 304RegExpDisjunction::RegExpDisjunction(ZoneList<RegExpTree*>* alternatives) 305 : alternatives_(alternatives) { 306 DCHECK(alternatives->length() > 1); 307 RegExpTree* first_alternative = alternatives->at(0); 308 min_match_ = first_alternative->min_match(); 309 max_match_ = first_alternative->max_match(); 310 for (int i = 1; i < alternatives->length(); i++) { 311 RegExpTree* alternative = alternatives->at(i); 312 min_match_ = Min(min_match_, alternative->min_match()); 313 max_match_ = Max(max_match_, alternative->max_match()); 314 } 315} 316 317 318static int IncreaseBy(int previous, int increase) { 319 if (RegExpTree::kInfinity - previous < increase) { 320 return RegExpTree::kInfinity; 321 } else { 322 return previous + increase; 323 } 324} 325 326 327RegExpAlternative::RegExpAlternative(ZoneList<RegExpTree*>* nodes) 328 : nodes_(nodes) { 329 DCHECK(nodes->length() > 1); 330 min_match_ = 0; 331 max_match_ = 0; 332 for (int i = 0; i < nodes->length(); i++) { 333 RegExpTree* node = nodes->at(i); 334 int node_min_match = node->min_match(); 335 min_match_ = IncreaseBy(min_match_, node_min_match); 336 int node_max_match = node->max_match(); 337 max_match_ = IncreaseBy(max_match_, node_max_match); 338 } 339} 340 341 342} // namespace internal 343} // namespace v8 344