12ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Copyright 2009 The RE2 Authors. All Rights Reserved. 22ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Use of this source code is governed by a BSD-style 32ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// license that can be found in the LICENSE file. 42ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 52ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "util/util.h" 62ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "re2/prefilter.h" 72ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "re2/re2.h" 82ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "re2/unicode_casefold.h" 92ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "re2/walker-inl.h" 102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonnamespace re2 { 122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstatic const int Trace = false; 142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsontypedef set<string>::iterator SSIter; 162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsontypedef set<string>::const_iterator ConstSSIter; 172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstatic int alloc_id = 100000; // Used for debugging. 192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Initializes a Prefilter, allocating subs_ as necessary. 202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::Prefilter(Op op) { 212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson op_ = op; 222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson subs_ = NULL; 232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (op_ == AND || op_ == OR) 242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson subs_ = new vector<Prefilter*>; 252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson alloc_id_ = alloc_id++; 272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson VLOG(10) << "alloc_id: " << alloc_id_; 282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Destroys a Prefilter. 312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::~Prefilter() { 322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson VLOG(10) << "Deleted: " << alloc_id_; 332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (subs_) { 342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for (int i = 0; i < subs_->size(); i++) 352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson delete (*subs_)[i]; 362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson delete subs_; 372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson subs_ = NULL; 382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Simplify if the node is an empty Or or And. 422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter* Prefilter::Simplify() { 432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (op_ != AND && op_ != OR) { 442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return this; 452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Nothing left in the AND/OR. 482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (subs_->size() == 0) { 492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (op_ == AND) 502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson op_ = ALL; // AND of nothing is true 512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson else 522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson op_ = NONE; // OR of nothing is false 532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return this; 552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Just one subnode: throw away wrapper. 582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (subs_->size() == 1) { 592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Prefilter* a = (*subs_)[0]; 602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson subs_->clear(); 612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson delete this; 622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return a->Simplify(); 632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return this; 662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Combines two Prefilters together to create an "op" (AND or OR). 692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// The passed Prefilters will be part of the returned Prefilter or deleted. 702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Does lots of work to avoid creating unnecessarily complicated structures. 712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter* Prefilter::AndOr(Op op, Prefilter* a, Prefilter* b) { 722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // If a, b can be rewritten as op, do so. 732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson a = a->Simplify(); 742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson b = b->Simplify(); 752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Canonicalize: a->op <= b->op. 772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (a->op() > b->op()) { 782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Prefilter* t = a; 792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson a = b; 802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson b = t; 812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Trivial cases. 842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // ALL AND b = b 852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // NONE OR b = b 862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // ALL OR b = ALL 872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // NONE AND b = NONE 882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Don't need to look at b, because of canonicalization above. 892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // ALL and NONE are smallest opcodes. 902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (a->op() == ALL || a->op() == NONE) { 912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if ((a->op() == ALL && op == AND) || 922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson (a->op() == NONE && op == OR)) { 932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson delete a; 942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return b; 952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } else { 962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson delete b; 972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return a; 982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 1002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // If a and b match op, merge their contents. 1022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (a->op() == op && b->op() == op) { 1032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for (int i = 0; i < b->subs()->size(); i++) { 1042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Prefilter* bb = (*b->subs())[i]; 1052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson a->subs()->push_back(bb); 1062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 1072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson b->subs()->clear(); 1082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson delete b; 1092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return a; 1102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 1112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // If a already has the same op as the op that is under construction 1132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // add in b (similarly if b already has the same op, add in a). 1142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (b->op() == op) { 1152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Prefilter* t = a; 1162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson a = b; 1172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson b = t; 1182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 1192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (a->op() == op) { 1202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson a->subs()->push_back(b); 1212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return a; 1222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 1232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Otherwise just return the op. 1252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Prefilter* c = new Prefilter(op); 1262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson c->subs()->push_back(a); 1272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson c->subs()->push_back(b); 1282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return c; 1292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 1302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter* Prefilter::And(Prefilter* a, Prefilter* b) { 1322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return AndOr(AND, a, b); 1332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 1342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter* Prefilter::Or(Prefilter* a, Prefilter* b) { 1362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return AndOr(OR, a, b); 1372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 1382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstatic void SimplifyStringSet(set<string> *ss) { 1402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Now make sure that the strings aren't redundant. For example, if 1412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // we know "ab" is a required string, then it doesn't help at all to 1422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // know that "abc" is also a required string, so delete "abc". This 1432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // is because, when we are performing a string search to filter 1442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // regexps, matching ab will already allow this regexp to be a 1452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // candidate for match, so further matching abc is redundant. 1462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for (SSIter i = ss->begin(); i != ss->end(); ++i) { 1482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson SSIter j = i; 1492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson ++j; 1502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson while (j != ss->end()) { 1512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Increment j early so that we can erase the element it points to. 1522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson SSIter old_j = j; 1532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson ++j; 1542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (old_j->find(*i) != string::npos) 1552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson ss->erase(old_j); 1562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 1572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 1582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 1592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter* Prefilter::OrStrings(set<string>* ss) { 1612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson SimplifyStringSet(ss); 1622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Prefilter* or_prefilter = NULL; 1632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!ss->empty()) { 1642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson or_prefilter = new Prefilter(NONE); 1652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for (SSIter i = ss->begin(); i != ss->end(); ++i) 1662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson or_prefilter = Or(or_prefilter, FromString(*i)); 1672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 1682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return or_prefilter; 1692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 1702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstatic Rune ToLowerRune(Rune r) { 1722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (r < Runeself) { 1732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if ('A' <= r && r <= 'Z') 1742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson r += 'a' - 'A'; 1752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return r; 1762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 1772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson CaseFold *f = LookupCaseFold(unicode_tolower, num_unicode_tolower, r); 1792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (f == NULL || r < f->lo) 1802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return r; 1812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return ApplyFold(f, r); 1822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 1832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter* Prefilter::FromString(const string& str) { 1852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Prefilter* m = new Prefilter(Prefilter::ATOM); 1862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson m->atom_ = str; 1872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return m; 1882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 1892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Information about a regexp used during computation of Prefilter. 1912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Can be thought of as information about the set of strings matching 1922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// the given regular expression. 1932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonclass Prefilter::Info { 1942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson public: 1952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Info(); 1962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson ~Info(); 1972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // More constructors. They delete their Info* arguments. 1992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson static Info* Alt(Info* a, Info* b); 2002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson static Info* Concat(Info* a, Info* b); 2012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson static Info* And(Info* a, Info* b); 2022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson static Info* Star(Info* a); 2032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson static Info* Plus(Info* a); 2042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson static Info* Quest(Info* a); 2052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson static Info* EmptyString(); 2062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson static Info* NoMatch(); 2072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson static Info* AnyChar(); 2082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson static Info* CClass(CharClass* cc); 2092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson static Info* Literal(Rune r); 2102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson static Info* AnyMatch(); 2112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Format Info as a string. 2132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson string ToString(); 2142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Caller takes ownership of the Prefilter. 2162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Prefilter* TakeMatch(); 2172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson set<string>& exact() { return exact_; } 2192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson bool is_exact() const { return is_exact_; } 2212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson class Walker; 2232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson private: 2252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson set<string> exact_; 2262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // When is_exact_ is true, the strings that match 2282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // are placed in exact_. When it is no longer an exact 2292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // set of strings that match this RE, then is_exact_ 2302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // is false and the match_ contains the required match 2312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // criteria. 2322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson bool is_exact_; 2332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Accumulated Prefilter query that any 2352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // match for this regexp is guaranteed to match. 2362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Prefilter* match_; 2372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}; 2382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::Info::Info() 2412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson : is_exact_(false), 2422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson match_(NULL) { 2432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 2442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::Info::~Info() { 2462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson delete match_; 2472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 2482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter* Prefilter::Info::TakeMatch() { 2502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (is_exact_) { 2512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson match_ = Prefilter::OrStrings(&exact_); 2522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson is_exact_ = false; 2532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 2542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Prefilter* m = match_; 2552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson match_ = NULL; 2562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return m; 2572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 2582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Format a Info in string form. 2602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstring Prefilter::Info::ToString() { 2612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (this == NULL) { 2622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Sometimes when iterating on children of a node, 2632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // some children might have NULL Info. Adding 2642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // the check here for NULL to take care of cases where 2652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // the caller is not checking. 2662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return ""; 2672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 2682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (is_exact_) { 2702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int n = 0; 2712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson string s; 2722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for (set<string>::iterator i = exact_.begin(); i != exact_.end(); ++i) { 2732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (n++ > 0) 2742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson s += ","; 2752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson s += *i; 2762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 2772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return s; 2782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 2792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (match_) 2812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return match_->DebugString(); 2822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return ""; 2842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 2852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Add the strings from src to dst. 2872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstatic void CopyIn(const set<string>& src, set<string>* dst) { 2882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for (ConstSSIter i = src.begin(); i != src.end(); ++i) 2892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson dst->insert(*i); 2902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 2912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 2922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Add the cross-product of a and b to dst. 2932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// (For each string i in a and j in b, add i+j.) 2942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstatic void CrossProduct(const set<string>& a, 2952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const set<string>& b, 2962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson set<string>* dst) { 2972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for (ConstSSIter i = a.begin(); i != a.end(); ++i) 2982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for (ConstSSIter j = b.begin(); j != b.end(); ++j) 2992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson dst->insert(*i + *j); 3002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 3012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Concats a and b. Requires that both are exact sets. 3032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Forms an exact set that is a crossproduct of a and b. 3042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::Info* Prefilter::Info::Concat(Info* a, Info* b) { 3052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (a == NULL) 3062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return b; 3072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson DCHECK(a->is_exact_); 3082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson DCHECK(b && b->is_exact_); 3092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Info *ab = new Info(); 3102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson CrossProduct(a->exact_, b->exact_, &ab->exact_); 3122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson ab->is_exact_ = true; 3132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson delete a; 3152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson delete b; 3162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return ab; 3172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 3182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Constructs an inexact Info for ab given a and b. 3202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Used only when a or b is not exact or when the 3212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// exact cross product is likely to be too big. 3222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::Info* Prefilter::Info::And(Info* a, Info* b) { 3232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (a == NULL) 3242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return b; 3252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (b == NULL) 3262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return a; 3272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Info *ab = new Info(); 3292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson ab->match_ = Prefilter::And(a->TakeMatch(), b->TakeMatch()); 3312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson ab->is_exact_ = false; 3322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson delete a; 3332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson delete b; 3342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return ab; 3352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 3362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Constructs Info for a|b given a and b. 3382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::Info* Prefilter::Info::Alt(Info* a, Info* b) { 3392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Info *ab = new Info(); 3402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (a->is_exact_ && b->is_exact_) { 3422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson CopyIn(a->exact_, &ab->exact_); 3432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson CopyIn(b->exact_, &ab->exact_); 3442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson ab->is_exact_ = true; 3452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } else { 3462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Either a or b has is_exact_ = false. If the other 3472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // one has is_exact_ = true, we move it to match_ and 3482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // then create a OR of a,b. The resulting Info has 3492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // is_exact_ = false. 3502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson ab->match_ = Prefilter::Or(a->TakeMatch(), b->TakeMatch()); 3512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson ab->is_exact_ = false; 3522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 3532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson delete a; 3552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson delete b; 3562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return ab; 3572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 3582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Constructs Info for a? given a. 3602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::Info* Prefilter::Info::Quest(Info *a) { 3612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Info *ab = new Info(); 3622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson ab->is_exact_ = false; 3642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson ab->match_ = new Prefilter(ALL); 3652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson delete a; 3662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return ab; 3672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 3682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Constructs Info for a* given a. 3702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Same as a? -- not much to do. 3712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::Info* Prefilter::Info::Star(Info *a) { 3722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return Quest(a); 3732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 3742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Constructs Info for a+ given a. If a was exact set, it isn't 3762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// anymore. 3772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::Info* Prefilter::Info::Plus(Info *a) { 3782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Info *ab = new Info(); 3792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson ab->match_ = a->TakeMatch(); 3812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson ab->is_exact_ = false; 3822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson delete a; 3842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return ab; 3852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 3862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstatic string RuneToString(Rune r) { 3882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson char buf[UTFmax]; 3892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int n = runetochar(buf, &r); 3902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return string(buf, n); 3912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 3922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 3932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Constructs Info for literal rune. 3942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::Info* Prefilter::Info::Literal(Rune r) { 3952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Info* info = new Info(); 3962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson info->exact_.insert(RuneToString(ToLowerRune(r))); 3972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson info->is_exact_ = true; 3982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return info; 3992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 4002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Constructs Info for dot (any character). 4022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::Info* Prefilter::Info::AnyChar() { 4032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Prefilter::Info* info = new Prefilter::Info(); 4042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson info->match_ = new Prefilter(ALL); 4052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return info; 4062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 4072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Constructs Prefilter::Info for no possible match. 4092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::Info* Prefilter::Info::NoMatch() { 4102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Prefilter::Info* info = new Prefilter::Info(); 4112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson info->match_ = new Prefilter(NONE); 4122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return info; 4132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 4142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Constructs Prefilter::Info for any possible match. 4162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// This Prefilter::Info is valid for any regular expression, 4172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// since it makes no assertions whatsoever about the 4182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// strings being matched. 4192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::Info* Prefilter::Info::AnyMatch() { 4202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Prefilter::Info *info = new Prefilter::Info(); 4212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson info->match_ = new Prefilter(ALL); 4222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return info; 4232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 4242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Constructs Prefilter::Info for just the empty string. 4262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::Info* Prefilter::Info::EmptyString() { 4272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Prefilter::Info* info = new Prefilter::Info(); 4282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson info->is_exact_ = true; 4292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson info->exact_.insert(""); 4302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return info; 4312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 4322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Constructs Prefilter::Info for a character class. 4342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsontypedef CharClass::iterator CCIter; 4352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::Info* Prefilter::Info::CClass(CharClass *cc) { 4362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (Trace) { 4372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson VLOG(0) << "CharClassInfo:"; 4382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for (CCIter i = cc->begin(); i != cc->end(); ++i) 4392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson VLOG(0) << " " << i->lo << "-" << i->hi; 4402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 4412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // If the class is too large, it's okay to overestimate. 4432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (cc->size() > 10) 4442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return AnyChar(); 4452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Prefilter::Info *a = new Prefilter::Info(); 4472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for (CCIter i = cc->begin(); i != cc->end(); ++i) 4482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for (Rune r = i->lo; r <= i->hi; r++) 4492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson a->exact_.insert(RuneToString(ToLowerRune(r))); 4502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson a->is_exact_ = true; 4522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (Trace) { 4542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson VLOG(0) << " = " << a->ToString(); 4552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 4562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return a; 4582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 4592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonclass Prefilter::Info::Walker : public Regexp::Walker<Prefilter::Info*> { 4612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson public: 4622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Walker() {} 4632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson virtual Info* PostVisit( 4652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Regexp* re, Info* parent_arg, 4662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Info* pre_arg, 4672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Info** child_args, int nchild_args); 4682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson virtual Info* ShortVisit( 4702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Regexp* re, 4712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Info* parent_arg); 4722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson private: 4742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson DISALLOW_EVIL_CONSTRUCTORS(Walker); 4752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}; 4762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::Info* Prefilter::BuildInfo(Regexp* re) { 4782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (Trace) { 4792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson LOG(INFO) << "BuildPrefilter::Info: " << re->ToString(); 4802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 4812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Prefilter::Info::Walker w; 4822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Prefilter::Info* info = w.WalkExponential(re, NULL, 100000); 4832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (w.stopped_early()) { 4852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson delete info; 4862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return NULL; 4872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 4882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return info; 4902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 4912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::Info* Prefilter::Info::Walker::ShortVisit( 4932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Regexp* re, Prefilter::Info* parent_arg) { 4942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return AnyMatch(); 4952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 4962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 4972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Constructs the Prefilter::Info for the given regular expression. 4982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Assumes re is simplified. 4992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::Info* Prefilter::Info::Walker::PostVisit( 5002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Regexp* re, Prefilter::Info* parent_arg, 5012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Prefilter::Info* pre_arg, Prefilter::Info** child_args, 5022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int nchild_args) { 5032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Prefilter::Info *info; 5042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson switch (re->op()) { 5052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson default: 5062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpRepeat: 5072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson LOG(DFATAL) << "Bad regexp op " << re->op(); 5082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson info = EmptyString(); 5092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 5102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 5112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpNoMatch: 5122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson info = NoMatch(); 5132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 5142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 5152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // These ops match the empty string: 5162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpEmptyMatch: // anywhere 5172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpBeginLine: // at beginning of line 5182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpEndLine: // at end of line 5192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpBeginText: // at beginning of text 5202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpEndText: // at end of text 5212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpWordBoundary: // at word boundary 5222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpNoWordBoundary: // not at word boundary 5232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson info = EmptyString(); 5242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 5252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 5262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpLiteral: 5272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson info = Literal(re->rune()); 5282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 5292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 5302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpLiteralString: 5312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (re->nrunes() == 0) { 5322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson info = NoMatch(); 5332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 5342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 5352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson info = Literal(re->runes()[0]); 5362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for (int i = 1; i < re->nrunes(); i++) 5372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson info = Concat(info, Literal(re->runes()[i])); 5382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 5392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 5402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpConcat: { 5412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Accumulate in info. 5422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Exact is concat of recent contiguous exact nodes. 5432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson info = NULL; 5442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Info* exact = NULL; 5452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for (int i = 0; i < nchild_args; i++) { 5462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Info* ci = child_args[i]; // child info 5472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!ci->is_exact() || 5482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson (exact && ci->exact().size() * exact->exact().size() > 16)) { 5492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Exact run is over. 5502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson info = And(info, exact); 5512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson exact = NULL; 5522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Add this child's info. 5532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson info = And(info, ci); 5542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } else { 5552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Append to exact run. 5562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson exact = Concat(exact, ci); 5572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 5582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 5592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson info = And(info, exact); 5602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 5612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 5622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 5632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpAlternate: 5642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson info = child_args[0]; 5652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for (int i = 1; i < nchild_args; i++) 5662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson info = Alt(info, child_args[i]); 5672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson VLOG(10) << "Alt: " << info->ToString(); 5682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 5692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 5702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpStar: 5712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson info = Star(child_args[0]); 5722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 5732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 5742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpQuest: 5752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson info = Quest(child_args[0]); 5762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 5772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 5782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpPlus: 5792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson info = Plus(child_args[0]); 5802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 5812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 5822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpAnyChar: 5832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Claim nothing, except that it's not empty. 5842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson info = AnyChar(); 5852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 5862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 5872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpCharClass: 5882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson info = CClass(re->cc()); 5892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 5902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 5912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case kRegexpCapture: 5922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // These don't affect the set of matching strings. 5932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson info = child_args[0]; 5942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson break; 5952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 5962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 5972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (Trace) { 5982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson VLOG(0) << "BuildInfo " << re->ToString() 5992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson << ": " << info->ToString(); 6002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 6012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 6022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return info; 6032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 6042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 6052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 6062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter* Prefilter::FromRegexp(Regexp* re) { 6072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (re == NULL) 6082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return NULL; 6092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 6102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Regexp* simple = re->Simplify(); 6112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Prefilter::Info *info = BuildInfo(simple); 6122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 6132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson simple->Decref(); 6142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (info == NULL) 6152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return NULL; 6162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 6172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Prefilter* m = info->TakeMatch(); 6182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 6192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson delete info; 6202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return m; 6212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 6222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 6232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstring Prefilter::DebugString() const { 6242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (this == NULL) 6252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return "<nil>"; 6262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 6272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson switch (op_) { 6282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson default: 6292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson LOG(DFATAL) << "Bad op in Prefilter::DebugString: " << op_; 6302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return StringPrintf("op%d", op_); 6312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case NONE: 6322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return "*no-matches*"; 6332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case ATOM: 6342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return atom_; 6352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case ALL: 6362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return ""; 6372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case AND: { 6382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson string s = ""; 6392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for (int i = 0; i < subs_->size(); i++) { 6402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (i > 0) 6412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson s += " "; 6422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson s += (*subs_)[i]->DebugString(); 6432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 6442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return s; 6452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 6462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson case OR: { 6472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson string s = "("; 6482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for (int i = 0; i < subs_->size(); i++) { 6492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (i > 0) 6502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson s += "|"; 6512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson s += (*subs_)[i]->DebugString(); 6522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 6532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson s += ")"; 6542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return s; 6552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 6562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 6572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 6582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 6592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter* Prefilter::FromRE2(const RE2* re2) { 6602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (re2 == NULL) 6612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return NULL; 6622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 6632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Regexp* regexp = re2->Regexp(); 6642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (regexp == NULL) 6652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return NULL; 6662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 6672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return FromRegexp(regexp); 6682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 6692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 6702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 6712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} // namespace re2 672