12ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Copyright 2009 The RE2 Authors.  All Rights Reserved.
22ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Use of this source code is governed by a BSD-style
32ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// license that can be found in the LICENSE file.
42ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
52ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "util/util.h"
62ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "re2/prefilter.h"
72ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "re2/re2.h"
82ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "re2/unicode_casefold.h"
92ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "re2/walker-inl.h"
102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonnamespace re2 {
122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstatic const int Trace = false;
142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsontypedef set<string>::iterator SSIter;
162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsontypedef set<string>::const_iterator ConstSSIter;
172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstatic int alloc_id = 100000;  // Used for debugging.
192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Initializes a Prefilter, allocating subs_ as necessary.
202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::Prefilter(Op op) {
212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  op_ = op;
222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  subs_ = NULL;
232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (op_ == AND || op_ == OR)
242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    subs_ = new vector<Prefilter*>;
252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  alloc_id_ = alloc_id++;
272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  VLOG(10) << "alloc_id: " << alloc_id_;
282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Destroys a Prefilter.
312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::~Prefilter() {
322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  VLOG(10) << "Deleted: " << alloc_id_;
332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (subs_) {
342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    for (int i = 0; i < subs_->size(); i++)
352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      delete (*subs_)[i];
362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    delete subs_;
372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    subs_ = NULL;
382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Simplify if the node is an empty Or or And.
422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter* Prefilter::Simplify() {
432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (op_ != AND && op_ != OR) {
442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return this;
452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // Nothing left in the AND/OR.
482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (subs_->size() == 0) {
492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    if (op_ == AND)
502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      op_ = ALL;  // AND of nothing is true
512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    else
522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      op_ = NONE;  // OR of nothing is false
532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return this;
552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // Just one subnode: throw away wrapper.
582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (subs_->size() == 1) {
592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    Prefilter* a = (*subs_)[0];
602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    subs_->clear();
612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    delete this;
622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return a->Simplify();
632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return this;
662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Combines two Prefilters together to create an "op" (AND or OR).
692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// The passed Prefilters will be part of the returned Prefilter or deleted.
702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Does lots of work to avoid creating unnecessarily complicated structures.
712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter* Prefilter::AndOr(Op op, Prefilter* a, Prefilter* b) {
722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // If a, b can be rewritten as op, do so.
732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  a = a->Simplify();
742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  b = b->Simplify();
752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // Canonicalize: a->op <= b->op.
772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (a->op() > b->op()) {
782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    Prefilter* t = a;
792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    a = b;
802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    b = t;
812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // Trivial cases.
842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  //    ALL AND b = b
852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  //    NONE OR b = b
862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  //    ALL OR b   = ALL
872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  //    NONE AND b = NONE
882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // Don't need to look at b, because of canonicalization above.
892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // ALL and NONE are smallest opcodes.
902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (a->op() == ALL || a->op() == NONE) {
912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    if ((a->op() == ALL && op == AND) ||
922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        (a->op() == NONE && op == OR)) {
932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      delete a;
942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      return b;
952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    } else {
962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      delete b;
972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      return a;
982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    }
992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
1002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // If a and b match op, merge their contents.
1022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (a->op() == op && b->op() == op) {
1032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    for (int i = 0; i < b->subs()->size(); i++) {
1042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      Prefilter* bb = (*b->subs())[i];
1052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      a->subs()->push_back(bb);
1062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    }
1072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    b->subs()->clear();
1082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    delete b;
1092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return a;
1102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
1112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // If a already has the same op as the op that is under construction
1132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // add in b (similarly if b already has the same op, add in a).
1142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (b->op() == op) {
1152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    Prefilter* t = a;
1162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    a = b;
1172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    b = t;
1182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
1192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (a->op() == op) {
1202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    a->subs()->push_back(b);
1212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return a;
1222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
1232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // Otherwise just return the op.
1252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  Prefilter* c = new Prefilter(op);
1262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  c->subs()->push_back(a);
1272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  c->subs()->push_back(b);
1282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return c;
1292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
1302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter* Prefilter::And(Prefilter* a, Prefilter* b) {
1322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return AndOr(AND, a, b);
1332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
1342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter* Prefilter::Or(Prefilter* a, Prefilter* b) {
1362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return AndOr(OR, a, b);
1372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
1382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstatic void SimplifyStringSet(set<string> *ss) {
1402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // Now make sure that the strings aren't redundant.  For example, if
1412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // we know "ab" is a required string, then it doesn't help at all to
1422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // know that "abc" is also a required string, so delete "abc". This
1432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // is because, when we are performing a string search to filter
1442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // regexps, matching ab will already allow this regexp to be a
1452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // candidate for match, so further matching abc is redundant.
1462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  for (SSIter i = ss->begin(); i != ss->end(); ++i) {
1482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    SSIter j = i;
1492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    ++j;
1502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    while (j != ss->end()) {
1512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      // Increment j early so that we can erase the element it points to.
1522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      SSIter old_j = j;
1532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      ++j;
1542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      if (old_j->find(*i) != string::npos)
1552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        ss->erase(old_j);
1562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    }
1572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
1582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
1592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter* Prefilter::OrStrings(set<string>* ss) {
1612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  SimplifyStringSet(ss);
1622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  Prefilter* or_prefilter = NULL;
1632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (!ss->empty()) {
1642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    or_prefilter = new Prefilter(NONE);
1652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    for (SSIter i = ss->begin(); i != ss->end(); ++i)
1662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      or_prefilter = Or(or_prefilter, FromString(*i));
1672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
1682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return or_prefilter;
1692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
1702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstatic Rune ToLowerRune(Rune r) {
1722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (r < Runeself) {
1732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    if ('A' <= r && r <= 'Z')
1742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      r += 'a' - 'A';
1752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return r;
1762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
1772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  CaseFold *f = LookupCaseFold(unicode_tolower, num_unicode_tolower, r);
1792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (f == NULL || r < f->lo)
1802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return r;
1812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return ApplyFold(f, r);
1822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
1832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter* Prefilter::FromString(const string& str) {
1852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  Prefilter* m = new Prefilter(Prefilter::ATOM);
1862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  m->atom_ = str;
1872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return m;
1882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
1892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Information about a regexp used during computation of Prefilter.
1912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Can be thought of as information about the set of strings matching
1922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// the given regular expression.
1932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonclass Prefilter::Info {
1942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson public:
1952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  Info();
1962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  ~Info();
1972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // More constructors.  They delete their Info* arguments.
1992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  static Info* Alt(Info* a, Info* b);
2002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  static Info* Concat(Info* a, Info* b);
2012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  static Info* And(Info* a, Info* b);
2022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  static Info* Star(Info* a);
2032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  static Info* Plus(Info* a);
2042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  static Info* Quest(Info* a);
2052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  static Info* EmptyString();
2062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  static Info* NoMatch();
2072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  static Info* AnyChar();
2082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  static Info* CClass(CharClass* cc);
2092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  static Info* Literal(Rune r);
2102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  static Info* AnyMatch();
2112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // Format Info as a string.
2132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  string ToString();
2142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // Caller takes ownership of the Prefilter.
2162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  Prefilter* TakeMatch();
2172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  set<string>& exact() { return exact_; }
2192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  bool is_exact() const { return is_exact_; }
2212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  class Walker;
2232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson private:
2252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  set<string> exact_;
2262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // When is_exact_ is true, the strings that match
2282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // are placed in exact_. When it is no longer an exact
2292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // set of strings that match this RE, then is_exact_
2302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // is false and the match_ contains the required match
2312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // criteria.
2322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  bool is_exact_;
2332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // Accumulated Prefilter query that any
2352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // match for this regexp is guaranteed to match.
2362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  Prefilter* match_;
2372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson};
2382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::Info::Info()
2412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  : is_exact_(false),
2422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    match_(NULL) {
2432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
2442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::Info::~Info() {
2462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  delete match_;
2472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
2482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter* Prefilter::Info::TakeMatch() {
2502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (is_exact_) {
2512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    match_ = Prefilter::OrStrings(&exact_);
2522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    is_exact_ = false;
2532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
2542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  Prefilter* m = match_;
2552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  match_ = NULL;
2562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return m;
2572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
2582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Format a Info in string form.
2602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstring Prefilter::Info::ToString() {
2612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (this == NULL) {
2622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    // Sometimes when iterating on children of a node,
2632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    // some children might have NULL Info. Adding
2642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    // the check here for NULL to take care of cases where
2652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    // the caller is not checking.
2662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return "";
2672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
2682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (is_exact_) {
2702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    int n = 0;
2712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    string s;
2722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    for (set<string>::iterator i = exact_.begin(); i != exact_.end(); ++i) {
2732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      if (n++ > 0)
2742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        s += ",";
2752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      s += *i;
2762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    }
2772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return s;
2782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
2792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (match_)
2812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return match_->DebugString();
2822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return "";
2842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
2852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Add the strings from src to dst.
2872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstatic void CopyIn(const set<string>& src, set<string>* dst) {
2882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  for (ConstSSIter i = src.begin(); i != src.end(); ++i)
2892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    dst->insert(*i);
2902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
2912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
2922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Add the cross-product of a and b to dst.
2932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// (For each string i in a and j in b, add i+j.)
2942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstatic void CrossProduct(const set<string>& a,
2952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson                         const set<string>& b,
2962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson                         set<string>* dst) {
2972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  for (ConstSSIter i = a.begin(); i != a.end(); ++i)
2982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    for (ConstSSIter j = b.begin(); j != b.end(); ++j)
2992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      dst->insert(*i + *j);
3002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
3012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
3022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Concats a and b. Requires that both are exact sets.
3032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Forms an exact set that is a crossproduct of a and b.
3042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::Info* Prefilter::Info::Concat(Info* a, Info* b) {
3052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (a == NULL)
3062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return b;
3072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  DCHECK(a->is_exact_);
3082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  DCHECK(b && b->is_exact_);
3092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  Info *ab = new Info();
3102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
3112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  CrossProduct(a->exact_, b->exact_, &ab->exact_);
3122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  ab->is_exact_ = true;
3132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
3142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  delete a;
3152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  delete b;
3162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return ab;
3172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
3182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
3192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Constructs an inexact Info for ab given a and b.
3202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Used only when a or b is not exact or when the
3212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// exact cross product is likely to be too big.
3222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::Info* Prefilter::Info::And(Info* a, Info* b) {
3232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (a == NULL)
3242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return b;
3252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (b == NULL)
3262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return a;
3272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
3282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  Info *ab = new Info();
3292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
3302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  ab->match_ = Prefilter::And(a->TakeMatch(), b->TakeMatch());
3312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  ab->is_exact_ = false;
3322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  delete a;
3332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  delete b;
3342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return ab;
3352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
3362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
3372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Constructs Info for a|b given a and b.
3382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::Info* Prefilter::Info::Alt(Info* a, Info* b) {
3392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  Info *ab = new Info();
3402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
3412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (a->is_exact_ && b->is_exact_) {
3422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    CopyIn(a->exact_, &ab->exact_);
3432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    CopyIn(b->exact_, &ab->exact_);
3442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    ab->is_exact_ = true;
3452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  } else {
3462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    // Either a or b has is_exact_ = false. If the other
3472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    // one has is_exact_ = true, we move it to match_ and
3482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    // then create a OR of a,b. The resulting Info has
3492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    // is_exact_ = false.
3502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    ab->match_ = Prefilter::Or(a->TakeMatch(), b->TakeMatch());
3512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    ab->is_exact_ = false;
3522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
3532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
3542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  delete a;
3552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  delete b;
3562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return ab;
3572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
3582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
3592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Constructs Info for a? given a.
3602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::Info* Prefilter::Info::Quest(Info *a) {
3612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  Info *ab = new Info();
3622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
3632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  ab->is_exact_ = false;
3642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  ab->match_ = new Prefilter(ALL);
3652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  delete a;
3662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return ab;
3672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
3682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
3692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Constructs Info for a* given a.
3702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Same as a? -- not much to do.
3712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::Info* Prefilter::Info::Star(Info *a) {
3722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return Quest(a);
3732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
3742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
3752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Constructs Info for a+ given a. If a was exact set, it isn't
3762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// anymore.
3772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::Info* Prefilter::Info::Plus(Info *a) {
3782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  Info *ab = new Info();
3792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
3802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  ab->match_ = a->TakeMatch();
3812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  ab->is_exact_ = false;
3822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
3832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  delete a;
3842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return ab;
3852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
3862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
3872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstatic string RuneToString(Rune r) {
3882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  char buf[UTFmax];
3892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  int n = runetochar(buf, &r);
3902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return string(buf, n);
3912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
3922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
3932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Constructs Info for literal rune.
3942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::Info* Prefilter::Info::Literal(Rune r) {
3952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  Info* info = new Info();
3962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  info->exact_.insert(RuneToString(ToLowerRune(r)));
3972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  info->is_exact_ = true;
3982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return info;
3992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
4002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
4012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Constructs Info for dot (any character).
4022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::Info* Prefilter::Info::AnyChar() {
4032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  Prefilter::Info* info = new Prefilter::Info();
4042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  info->match_ = new Prefilter(ALL);
4052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return info;
4062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
4072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
4082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Constructs Prefilter::Info for no possible match.
4092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::Info* Prefilter::Info::NoMatch() {
4102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  Prefilter::Info* info = new Prefilter::Info();
4112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  info->match_ = new Prefilter(NONE);
4122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return info;
4132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
4142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
4152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Constructs Prefilter::Info for any possible match.
4162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// This Prefilter::Info is valid for any regular expression,
4172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// since it makes no assertions whatsoever about the
4182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// strings being matched.
4192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::Info* Prefilter::Info::AnyMatch() {
4202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  Prefilter::Info *info = new Prefilter::Info();
4212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  info->match_ = new Prefilter(ALL);
4222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return info;
4232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
4242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
4252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Constructs Prefilter::Info for just the empty string.
4262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::Info* Prefilter::Info::EmptyString() {
4272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  Prefilter::Info* info = new Prefilter::Info();
4282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  info->is_exact_ = true;
4292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  info->exact_.insert("");
4302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return info;
4312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
4322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
4332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Constructs Prefilter::Info for a character class.
4342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsontypedef CharClass::iterator CCIter;
4352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::Info* Prefilter::Info::CClass(CharClass *cc) {
4362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (Trace) {
4372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    VLOG(0) << "CharClassInfo:";
4382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    for (CCIter i = cc->begin(); i != cc->end(); ++i)
4392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      VLOG(0) << "  " << i->lo << "-" << i->hi;
4402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
4412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
4422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // If the class is too large, it's okay to overestimate.
4432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (cc->size() > 10)
4442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return AnyChar();
4452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
4462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  Prefilter::Info *a = new Prefilter::Info();
4472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  for (CCIter i = cc->begin(); i != cc->end(); ++i)
4482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    for (Rune r = i->lo; r <= i->hi; r++)
4492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      a->exact_.insert(RuneToString(ToLowerRune(r)));
4502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
4512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  a->is_exact_ = true;
4522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
4532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (Trace) {
4542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    VLOG(0) << " = " << a->ToString();
4552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
4562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
4572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return a;
4582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
4592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
4602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonclass Prefilter::Info::Walker : public Regexp::Walker<Prefilter::Info*> {
4612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson public:
4622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  Walker() {}
4632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
4642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  virtual Info* PostVisit(
4652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      Regexp* re, Info* parent_arg,
4662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      Info* pre_arg,
4672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      Info** child_args, int nchild_args);
4682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
4692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  virtual Info* ShortVisit(
4702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      Regexp* re,
4712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      Info* parent_arg);
4722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
4732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson private:
4742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  DISALLOW_EVIL_CONSTRUCTORS(Walker);
4752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson};
4762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
4772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::Info* Prefilter::BuildInfo(Regexp* re) {
4782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (Trace) {
4792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    LOG(INFO) << "BuildPrefilter::Info: " << re->ToString();
4802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
4812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  Prefilter::Info::Walker w;
4822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  Prefilter::Info* info = w.WalkExponential(re, NULL, 100000);
4832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
4842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (w.stopped_early()) {
4852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    delete info;
4862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return NULL;
4872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
4882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
4892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return info;
4902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
4912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
4922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::Info* Prefilter::Info::Walker::ShortVisit(
4932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    Regexp* re, Prefilter::Info* parent_arg) {
4942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return AnyMatch();
4952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
4962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
4972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Constructs the Prefilter::Info for the given regular expression.
4982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Assumes re is simplified.
4992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter::Info* Prefilter::Info::Walker::PostVisit(
5002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    Regexp* re, Prefilter::Info* parent_arg,
5012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    Prefilter::Info* pre_arg, Prefilter::Info** child_args,
5022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    int nchild_args) {
5032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  Prefilter::Info *info;
5042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  switch (re->op()) {
5052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    default:
5062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpRepeat:
5072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      LOG(DFATAL) << "Bad regexp op " << re->op();
5082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      info = EmptyString();
5092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
5102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
5112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpNoMatch:
5122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      info = NoMatch();
5132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
5142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
5152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    // These ops match the empty string:
5162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpEmptyMatch:      // anywhere
5172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpBeginLine:       // at beginning of line
5182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpEndLine:         // at end of line
5192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpBeginText:       // at beginning of text
5202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpEndText:         // at end of text
5212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpWordBoundary:    // at word boundary
5222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpNoWordBoundary:  // not at word boundary
5232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      info = EmptyString();
5242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
5252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
5262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpLiteral:
5272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      info = Literal(re->rune());
5282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
5292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
5302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpLiteralString:
5312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      if (re->nrunes() == 0) {
5322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        info = NoMatch();
5332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        break;
5342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      }
5352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      info = Literal(re->runes()[0]);
5362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      for (int i = 1; i < re->nrunes(); i++)
5372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        info = Concat(info, Literal(re->runes()[i]));
5382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
5392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
5402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpConcat: {
5412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      // Accumulate in info.
5422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      // Exact is concat of recent contiguous exact nodes.
5432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      info = NULL;
5442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      Info* exact = NULL;
5452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      for (int i = 0; i < nchild_args; i++) {
5462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        Info* ci = child_args[i];  // child info
5472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        if (!ci->is_exact() ||
5482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson            (exact && ci->exact().size() * exact->exact().size() > 16)) {
5492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson          // Exact run is over.
5502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson          info = And(info, exact);
5512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson          exact = NULL;
5522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson          // Add this child's info.
5532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson          info = And(info, ci);
5542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        } else {
5552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson          // Append to exact run.
5562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson          exact = Concat(exact, ci);
5572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        }
5582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      }
5592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      info = And(info, exact);
5602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    }
5612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
5622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
5632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpAlternate:
5642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      info = child_args[0];
5652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      for (int i = 1; i < nchild_args; i++)
5662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        info = Alt(info, child_args[i]);
5672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      VLOG(10) << "Alt: " << info->ToString();
5682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
5692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
5702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpStar:
5712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      info = Star(child_args[0]);
5722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
5732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
5742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpQuest:
5752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      info = Quest(child_args[0]);
5762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
5772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
5782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpPlus:
5792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      info = Plus(child_args[0]);
5802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
5812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
5822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpAnyChar:
5832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      // Claim nothing, except that it's not empty.
5842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      info = AnyChar();
5852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
5862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
5872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpCharClass:
5882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      info = CClass(re->cc());
5892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
5902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
5912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case kRegexpCapture:
5922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      // These don't affect the set of matching strings.
5932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      info = child_args[0];
5942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      break;
5952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
5962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
5972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (Trace) {
5982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    VLOG(0) << "BuildInfo " << re->ToString()
5992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson            << ": " << info->ToString();
6002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
6012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
6022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return info;
6032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
6042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
6052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
6062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter* Prefilter::FromRegexp(Regexp* re) {
6072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (re == NULL)
6082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return NULL;
6092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
6102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  Regexp* simple = re->Simplify();
6112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  Prefilter::Info *info = BuildInfo(simple);
6122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
6132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  simple->Decref();
6142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (info == NULL)
6152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return NULL;
6162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
6172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  Prefilter* m = info->TakeMatch();
6182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
6192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  delete info;
6202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return m;
6212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
6222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
6232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonstring Prefilter::DebugString() const {
6242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (this == NULL)
6252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return "<nil>";
6262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
6272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  switch (op_) {
6282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    default:
6292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      LOG(DFATAL) << "Bad op in Prefilter::DebugString: " << op_;
6302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      return StringPrintf("op%d", op_);
6312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case NONE:
6322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      return "*no-matches*";
6332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case ATOM:
6342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      return atom_;
6352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case ALL:
6362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      return "";
6372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case AND: {
6382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      string s = "";
6392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      for (int i = 0; i < subs_->size(); i++) {
6402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        if (i > 0)
6412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson          s += " ";
6422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        s += (*subs_)[i]->DebugString();
6432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      }
6442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      return s;
6452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    }
6462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    case OR: {
6472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      string s = "(";
6482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      for (int i = 0; i < subs_->size(); i++) {
6492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        if (i > 0)
6502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson          s += "|";
6512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        s += (*subs_)[i]->DebugString();
6522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      }
6532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      s += ")";
6542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      return s;
6552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    }
6562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
6572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
6582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
6592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonPrefilter* Prefilter::FromRE2(const RE2* re2) {
6602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (re2 == NULL)
6612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return NULL;
6622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
6632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  Regexp* regexp = re2->Regexp();
6642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  if (regexp == NULL)
6652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return NULL;
6662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
6672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return FromRegexp(regexp);
6682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
6692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
6702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
6712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}  // namespace re2
672